aboutsummaryrefslogtreecommitdiff
path: root/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java
diff options
context:
space:
mode:
Diffstat (limited to 'pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java')
-rw-r--r--pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java162
1 files changed, 162 insertions, 0 deletions
diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java
new file mode 100644
index 0000000..69803e7
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java
@@ -0,0 +1,162 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ */
+package at.gv.egiz.pdfas.impl.vfilter.helper;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import at.gv.egiz.pdfas.exceptions.framework.VerificationFilterException;
+import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper;
+import at.gv.egiz.pdfas.impl.vfilter.Partition;
+import at.gv.egiz.pdfas.impl.vfilter.partition.BinaryPartition;
+import at.gv.egiz.pdfas.impl.vfilter.partition.TextPartition;
+import at.gv.egiz.pdfas.framework.input.PdfDataSource;
+import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult;
+
+/**
+ * Contains helpful methods used by the VerificationFilter.
+ *
+ * @author wprinz
+ */
+public final class VerificationFilterHelper
+{
+ /**
+ * Partitions the list of Incremental Update blocks into text and binary
+ * partitions.
+ *
+ * <p>
+ * A partition is a sequence of Incremental Update blocks of the same type.
+ * </p>
+ * <p>
+ * An Incremental Update block is considered to have the type "binary" if it
+ * contains an egiz dictionary. A block not containing an egiz dictionary is
+ * considert to have the type "text".
+ * </p>
+ *
+ * @param pdf
+ * The PDF.
+ * @param blocks
+ * The Incremental Update blocks.
+ * @return Returns the partitioning of the blocks.
+ * @throws VerificationFilterException
+ * Thrown if something goes wrong.
+ */
+ public static List partition(PdfDataSource pdf, List blocks) throws VerificationFilterException
+ {
+ List partitions = new ArrayList(blocks.size());
+
+ Iterator it = blocks.iterator();
+ while (it.hasNext())
+ {
+ FooterParseResult fpr = (FooterParseResult) it.next();
+
+ byte[] data = DataSourceHelper.convertDataSourceToByteArray(pdf);
+ if (VerificationFilterBinaryHelper.containsEGIZDict(data, fpr))
+ {
+ BinaryPartition bp = null;
+ if (partitions.isEmpty() || ((Partition) partitions.get(partitions.size() - 1)).isTextPartition())
+ {
+ bp = new BinaryPartition();
+ bp.blocks = new ArrayList(blocks.size());
+ partitions.add(bp);
+ }
+ else
+ {
+ bp = (BinaryPartition) partitions.get(partitions.size() - 1);
+ }
+ assert bp != null;
+
+ bp.blocks.add(fpr);
+ }
+ else
+ {
+ TextPartition tp = null;
+ if (partitions.isEmpty() || !((Partition) partitions.get(partitions.size() - 1)).isTextPartition())
+ {
+ tp = new TextPartition();
+ tp.blocks = new ArrayList(blocks.size());
+ partitions.add(tp);
+ }
+ else
+ {
+ tp = (TextPartition) partitions.get(partitions.size() - 1);
+ }
+ assert tp != null;
+
+ tp.blocks.add(fpr);
+ }
+ }
+
+ assert partitions.size() >= 1 : "There must be at least one partition";
+
+ return partitions;
+ }
+
+ /**
+ * Determines the end of the given partiton.
+ *
+ * @param partition
+ * The partition.
+ * @return Returns the end index of the given partition.
+ */
+ public static int getEndOfPartition(Partition partition)
+ {
+ List blocks = null;
+ if (partition instanceof TextPartition)
+ {
+ blocks = ((TextPartition) partition).blocks;
+ }
+ else
+ {
+ blocks = ((BinaryPartition) partition).blocks;
+ }
+
+ return ((FooterParseResult) blocks.get(blocks.size() - 1)).next_index;
+ }
+
+ /**
+ * Finds the last text partition in the given list of partitions.
+ *
+ * @param partitions
+ * The partitions.
+ * @return Returns the last TextPartition.
+ */
+ public static TextPartition findLastTextPartition(List partitions)
+ {
+ Partition lastTextPartition = (Partition) partitions.get(partitions.size() - 1);
+
+ if (!lastTextPartition.isTextPartition())
+ {
+ assert partitions.size() > 1 : "The only one partition cannot be a binary partition - where is the original document?";
+ Partition previousToLastPartition = (Partition) partitions.get(partitions.size() - 2);
+ assert previousToLastPartition.isTextPartition() : "The previous to last partition must be a text partition or something is wrong with the partitioning algorithm.";
+
+ lastTextPartition = previousToLastPartition;
+ }
+
+ return (TextPartition) lastTextPartition;
+ }
+
+}