aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper
diff options
context:
space:
mode:
authornetconomy <netconomy@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2007-08-17 06:10:56 +0000
committernetconomy <netconomy@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2007-08-17 06:10:56 +0000
commit3d982813b34f6f230baf4a467cdc37ec92a77595 (patch)
tree85319d39cee2ded1bb7a2b2dd9e8ea37e3778248 /src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper
parent07f6c8f33b2d700276fe6ec6339ff836c8710131 (diff)
downloadpdf-as-3-3d982813b34f6f230baf4a467cdc37ec92a77595.tar.gz
pdf-as-3-3d982813b34f6f230baf4a467cdc37ec92a77595.tar.bz2
pdf-as-3-3d982813b34f6f230baf4a467cdc37ec92a77595.zip
Performance
git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@167 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
Diffstat (limited to 'src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper')
-rw-r--r--src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterBinaryHelper.java152
-rw-r--r--src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java142
-rw-r--r--src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterTextHelper.java15
3 files changed, 309 insertions, 0 deletions
diff --git a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterBinaryHelper.java b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterBinaryHelper.java
new file mode 100644
index 0000000..b7f36d1
--- /dev/null
+++ b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterBinaryHelper.java
@@ -0,0 +1,152 @@
+/**
+ *
+ */
+package at.gv.egiz.pdfas.impl.vfilter.helper;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import at.gv.egiz.pdfas.exceptions.ErrorCode;
+
+import at.knowcenter.wag.egov.egiz.PdfASID;
+import at.knowcenter.wag.egov.egiz.exceptions.InvalidIDException;
+import at.knowcenter.wag.egov.egiz.exceptions.PDFDocumentException;
+import at.knowcenter.wag.egov.egiz.pdf.BinarySignature;
+import at.knowcenter.wag.egov.egiz.pdf.Placeholder;
+import at.knowcenter.wag.egov.egiz.pdf.StringInfo;
+import at.knowcenter.wag.exactparser.parsing.IndirectObjectReference;
+import at.knowcenter.wag.exactparser.parsing.PDFUtils;
+import at.knowcenter.wag.exactparser.parsing.results.ArrayParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.DictionaryParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.IndirectObjectReferenceParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.NumberParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.ObjectParseResult;
+
+/**
+ * Contains helpful methods used by the VerificationFilter to analyze the PDF for binary signatures.
+ *
+ * @author wprinz
+ */
+public final class VerificationFilterBinaryHelper
+{
+ /**
+ * The name of the egiz dict key.
+ */
+ public static final byte[] EGIZ_DICT_NAME = { 'E', 'G', 'I', 'Z', 'S', 'i', 'g', 'D', 'i', 'c', 't' };
+
+ /**
+ * The name of the ID (SIG_KZ) property in the egiz dict.
+ */
+ public static final byte[] EGIZ_KZ_NAME = { 'I', 'D' };
+
+ /**
+ * Tells, if the given incremental update block contains a binary signature.
+ *
+ * <p>
+ * According to definition, if a block is a binary block, it must/cannot
+ * contain other signatures than this one.
+ * </p>
+ *
+ * @param block
+ * The incremental update block.
+ * @return Returns true, if this block is a binary signature block, false
+ * otherwise.
+ */
+ public static boolean containsEGIZDict(final byte[] pdf, final FooterParseResult block)
+ {
+ int dict_index = PDFUtils.indexOfName(pdf, block.tpr.dpr.names, EGIZ_DICT_NAME);
+ if (dict_index <= 0)
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * Extracts the PDF AS ID of the egiz block.
+ *
+ * @param pdf
+ * The pdf.
+ * @param block
+ * The IU block.
+ * @return Returns the extracted PDF AS ID.
+ * @throws PDFDocumentException
+ * Forwarded exception.
+ * @throws InvalidIDException
+ * Forwarded exception.
+ */
+ public static PdfASID extractKZFromEGIZBlock(final byte[] pdf, final FooterParseResult block) throws PDFDocumentException, InvalidIDException
+ {
+ int egiz_index = PDFUtils.indexOfName(pdf, block.tpr.dpr.names, EGIZ_DICT_NAME);
+ if (egiz_index < 0)
+ {
+ throw new PDFDocumentException(301, "egiz_index = " + egiz_index);
+ }
+
+ IndirectObjectReferenceParseResult egiz_dict_iorpr = (IndirectObjectReferenceParseResult) block.tpr.dpr.values.get(egiz_index);
+ // logger_.debug("egiz_dict_ir = " + egiz_dict_iorpr.ior.object_number
+ // + " " + egiz_dict_iorpr.ior.generation_number);
+
+ IndirectObjectReference ior = egiz_dict_iorpr.ior;
+
+ final int egiz_dict_offset = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(block.xpr, ior);
+ // logger_.debug("egiz_dict_offset = " + egiz_dict_offset);
+
+ ObjectParseResult obj = PDFUtils.parseObject(pdf, egiz_dict_offset);
+ DictionaryParseResult egiz_dict = (DictionaryParseResult) obj.object;
+
+ int kz_index = PDFUtils.indexOfName(pdf, egiz_dict.names, EGIZ_KZ_NAME);
+ if (kz_index < 0)
+ {
+ throw new PDFDocumentException(301, "kz_index = " + kz_index);
+ }
+ ArrayParseResult kz_apr = (ArrayParseResult) egiz_dict.values.get(kz_index);
+
+ String kz_string = restoreKZ(pdf, kz_apr);
+ PdfASID kz = new PdfASID(kz_string);
+
+ return kz;
+ }
+
+ /**
+ * Restores the Kennzeichnung String from an Array.
+ *
+ * @param pdf
+ * The PDF.
+ * @param kz_apr
+ * The Array, as parsed from the EGIZ Dict.
+ * @return Returns the restored KZ.
+ * @throws PDFDocumentException
+ * Forwarded exception.
+ */
+ public static String restoreKZ(byte[] pdf, ArrayParseResult kz_apr) throws PDFDocumentException
+ {
+ try
+ {
+ List partition = new ArrayList();
+
+ for (int i = 0; i < kz_apr.elements.size() / 2; i++)
+ {
+ NumberParseResult start_npr = (NumberParseResult) kz_apr.elements.get(i * 2);
+ NumberParseResult length_npr = (NumberParseResult) kz_apr.elements.get(i * 2 + 1);
+
+ StringInfo si = new StringInfo();
+ si.string_start = start_npr.number;
+ si.string_length = length_npr.number;
+
+ partition.add(si);
+ }
+
+ String KZ = Placeholder.reconstructStringFromPartition(pdf, partition, BinarySignature.ENCODING_WIN);
+ return KZ;
+ }
+ catch (IOException e1)
+ {
+ throw new PDFDocumentException(ErrorCode.DOCUMENT_CANNOT_BE_READ, e1);
+ }
+ }
+
+}
diff --git a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java
new file mode 100644
index 0000000..67af129
--- /dev/null
+++ b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java
@@ -0,0 +1,142 @@
+/**
+ *
+ */
+package at.gv.egiz.pdfas.impl.vfilter.helper;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import at.gv.egiz.pdfas.exceptions.framework.VerificationFilterException;
+import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper;
+import at.gv.egiz.pdfas.impl.vfilter.Partition;
+import at.gv.egiz.pdfas.impl.vfilter.partition.BinaryPartition;
+import at.gv.egiz.pdfas.impl.vfilter.partition.TextPartition;
+import at.gv.egiz.pdfas.framework.input.PdfDataSource;
+import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult;
+
+/**
+ * Contains helpful methods used by the VerificationFilter.
+ *
+ * @author wprinz
+ */
+public final class VerificationFilterHelper
+{
+ /**
+ * Partitions the list of Incremental Update blocks into text and binary
+ * partitions.
+ *
+ * <p>
+ * A partition is a sequence of Incremental Update blocks of the same type.
+ * </p>
+ * <p>
+ * An Incremental Update block is considered to have the type "binary" if it
+ * contains an egiz dictionary. A block not containing an egiz dictionary is
+ * considert to have the type "text".
+ * </p>
+ *
+ * @param pdf
+ * The PDF.
+ * @param blocks
+ * The Incremental Update blocks.
+ * @return Returns the partitioning of the blocks.
+ * @throws VerificationFilterException
+ * Thrown if something goes wrong.
+ */
+ public static List partition(PdfDataSource pdf, List blocks) throws VerificationFilterException
+ {
+ List partitions = new ArrayList(blocks.size());
+
+ Iterator it = blocks.iterator();
+ while (it.hasNext())
+ {
+ FooterParseResult fpr = (FooterParseResult) it.next();
+
+ byte[] data = DataSourceHelper.convertDataSourceToByteArray(pdf);
+ if (VerificationFilterBinaryHelper.containsEGIZDict(data, fpr))
+ {
+ BinaryPartition bp = null;
+ if (partitions.isEmpty() || ((Partition) partitions.get(partitions.size() - 1)).isTextPartition())
+ {
+ bp = new BinaryPartition();
+ bp.blocks = new ArrayList(blocks.size());
+ partitions.add(bp);
+ }
+ else
+ {
+ bp = (BinaryPartition) partitions.get(partitions.size() - 1);
+ }
+ assert bp != null;
+
+ bp.blocks.add(fpr);
+ }
+ else
+ {
+ TextPartition tp = null;
+ if (partitions.isEmpty() || !((Partition) partitions.get(partitions.size() - 1)).isTextPartition())
+ {
+ tp = new TextPartition();
+ tp.blocks = new ArrayList(blocks.size());
+ partitions.add(tp);
+ }
+ else
+ {
+ tp = (TextPartition) partitions.get(partitions.size() - 1);
+ }
+ assert tp != null;
+
+ tp.blocks.add(fpr);
+ }
+ }
+
+ assert partitions.size() >= 1 : "There must be at least one partition";
+
+ return partitions;
+ }
+
+ /**
+ * Determines the end of the given partiton.
+ *
+ * @param partition
+ * The partition.
+ * @return Returns the end index of the given partition.
+ */
+ public static int getEndOfPartition(Partition partition)
+ {
+ List blocks = null;
+ if (partition instanceof TextPartition)
+ {
+ blocks = ((TextPartition) partition).blocks;
+ }
+ else
+ {
+ blocks = ((BinaryPartition) partition).blocks;
+ }
+
+ return ((FooterParseResult) blocks.get(blocks.size() - 1)).next_index;
+ }
+
+ /**
+ * Finds the last text partition in the given list of partitions.
+ *
+ * @param partitions
+ * The partitions.
+ * @return Returns the last TextPartition.
+ */
+ public static TextPartition findLastTextPartition(List partitions)
+ {
+ Partition lastTextPartition = (Partition) partitions.get(partitions.size() - 1);
+
+ if (!lastTextPartition.isTextPartition())
+ {
+ assert partitions.size() > 1 : "The only one partition cannot be a binary partition - where is the original document?";
+ Partition previousToLastPartition = (Partition) partitions.get(partitions.size() - 2);
+ assert previousToLastPartition.isTextPartition() : "The previous to last partition must be a text partition or something is wrong with the partitioning algorithm.";
+
+ lastTextPartition = previousToLastPartition;
+ }
+
+ return (TextPartition) lastTextPartition;
+ }
+
+}
diff --git a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterTextHelper.java b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterTextHelper.java
new file mode 100644
index 0000000..f9a79b0
--- /dev/null
+++ b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterTextHelper.java
@@ -0,0 +1,15 @@
+/**
+ *
+ */
+package at.gv.egiz.pdfas.impl.vfilter.helper;
+
+/**
+ * Contains helpful methods used by the VerificationFilter to analyze text and
+ * find text signatures.
+ *
+ * @author wprinz
+ */
+public final class VerificationFilterTextHelper
+{
+
+}