From 3d982813b34f6f230baf4a467cdc37ec92a77595 Mon Sep 17 00:00:00 2001 From: netconomy Date: Fri, 17 Aug 2007 06:10:56 +0000 Subject: Performance git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@167 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../helper/VerificationFilterBinaryHelper.java | 152 +++++++++++++++++++++ .../vfilter/helper/VerificationFilterHelper.java | 142 +++++++++++++++++++ .../helper/VerificationFilterTextHelper.java | 15 ++ 3 files changed, 309 insertions(+) create mode 100644 src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterBinaryHelper.java create mode 100644 src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java create mode 100644 src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterTextHelper.java (limited to 'src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper') diff --git a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterBinaryHelper.java b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterBinaryHelper.java new file mode 100644 index 0000000..b7f36d1 --- /dev/null +++ b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterBinaryHelper.java @@ -0,0 +1,152 @@ +/** + * + */ +package at.gv.egiz.pdfas.impl.vfilter.helper; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import at.gv.egiz.pdfas.exceptions.ErrorCode; + +import at.knowcenter.wag.egov.egiz.PdfASID; +import at.knowcenter.wag.egov.egiz.exceptions.InvalidIDException; +import at.knowcenter.wag.egov.egiz.exceptions.PDFDocumentException; +import at.knowcenter.wag.egov.egiz.pdf.BinarySignature; +import at.knowcenter.wag.egov.egiz.pdf.Placeholder; +import at.knowcenter.wag.egov.egiz.pdf.StringInfo; +import at.knowcenter.wag.exactparser.parsing.IndirectObjectReference; +import at.knowcenter.wag.exactparser.parsing.PDFUtils; +import at.knowcenter.wag.exactparser.parsing.results.ArrayParseResult; +import at.knowcenter.wag.exactparser.parsing.results.DictionaryParseResult; +import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult; +import at.knowcenter.wag.exactparser.parsing.results.IndirectObjectReferenceParseResult; +import at.knowcenter.wag.exactparser.parsing.results.NumberParseResult; +import at.knowcenter.wag.exactparser.parsing.results.ObjectParseResult; + +/** + * Contains helpful methods used by the VerificationFilter to analyze the PDF for binary signatures. + * + * @author wprinz + */ +public final class VerificationFilterBinaryHelper +{ + /** + * The name of the egiz dict key. + */ + public static final byte[] EGIZ_DICT_NAME = { 'E', 'G', 'I', 'Z', 'S', 'i', 'g', 'D', 'i', 'c', 't' }; + + /** + * The name of the ID (SIG_KZ) property in the egiz dict. + */ + public static final byte[] EGIZ_KZ_NAME = { 'I', 'D' }; + + /** + * Tells, if the given incremental update block contains a binary signature. + * + *

+ * According to definition, if a block is a binary block, it must/cannot + * contain other signatures than this one. + *

+ * + * @param block + * The incremental update block. + * @return Returns true, if this block is a binary signature block, false + * otherwise. + */ + public static boolean containsEGIZDict(final byte[] pdf, final FooterParseResult block) + { + int dict_index = PDFUtils.indexOfName(pdf, block.tpr.dpr.names, EGIZ_DICT_NAME); + if (dict_index <= 0) + { + return false; + } + + return true; + } + + /** + * Extracts the PDF AS ID of the egiz block. + * + * @param pdf + * The pdf. + * @param block + * The IU block. + * @return Returns the extracted PDF AS ID. + * @throws PDFDocumentException + * Forwarded exception. + * @throws InvalidIDException + * Forwarded exception. + */ + public static PdfASID extractKZFromEGIZBlock(final byte[] pdf, final FooterParseResult block) throws PDFDocumentException, InvalidIDException + { + int egiz_index = PDFUtils.indexOfName(pdf, block.tpr.dpr.names, EGIZ_DICT_NAME); + if (egiz_index < 0) + { + throw new PDFDocumentException(301, "egiz_index = " + egiz_index); + } + + IndirectObjectReferenceParseResult egiz_dict_iorpr = (IndirectObjectReferenceParseResult) block.tpr.dpr.values.get(egiz_index); + // logger_.debug("egiz_dict_ir = " + egiz_dict_iorpr.ior.object_number + // + " " + egiz_dict_iorpr.ior.generation_number); + + IndirectObjectReference ior = egiz_dict_iorpr.ior; + + final int egiz_dict_offset = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(block.xpr, ior); + // logger_.debug("egiz_dict_offset = " + egiz_dict_offset); + + ObjectParseResult obj = PDFUtils.parseObject(pdf, egiz_dict_offset); + DictionaryParseResult egiz_dict = (DictionaryParseResult) obj.object; + + int kz_index = PDFUtils.indexOfName(pdf, egiz_dict.names, EGIZ_KZ_NAME); + if (kz_index < 0) + { + throw new PDFDocumentException(301, "kz_index = " + kz_index); + } + ArrayParseResult kz_apr = (ArrayParseResult) egiz_dict.values.get(kz_index); + + String kz_string = restoreKZ(pdf, kz_apr); + PdfASID kz = new PdfASID(kz_string); + + return kz; + } + + /** + * Restores the Kennzeichnung String from an Array. + * + * @param pdf + * The PDF. + * @param kz_apr + * The Array, as parsed from the EGIZ Dict. + * @return Returns the restored KZ. + * @throws PDFDocumentException + * Forwarded exception. + */ + public static String restoreKZ(byte[] pdf, ArrayParseResult kz_apr) throws PDFDocumentException + { + try + { + List partition = new ArrayList(); + + for (int i = 0; i < kz_apr.elements.size() / 2; i++) + { + NumberParseResult start_npr = (NumberParseResult) kz_apr.elements.get(i * 2); + NumberParseResult length_npr = (NumberParseResult) kz_apr.elements.get(i * 2 + 1); + + StringInfo si = new StringInfo(); + si.string_start = start_npr.number; + si.string_length = length_npr.number; + + partition.add(si); + } + + String KZ = Placeholder.reconstructStringFromPartition(pdf, partition, BinarySignature.ENCODING_WIN); + return KZ; + } + catch (IOException e1) + { + throw new PDFDocumentException(ErrorCode.DOCUMENT_CANNOT_BE_READ, e1); + } + } + +} diff --git a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java new file mode 100644 index 0000000..67af129 --- /dev/null +++ b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java @@ -0,0 +1,142 @@ +/** + * + */ +package at.gv.egiz.pdfas.impl.vfilter.helper; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import at.gv.egiz.pdfas.exceptions.framework.VerificationFilterException; +import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper; +import at.gv.egiz.pdfas.impl.vfilter.Partition; +import at.gv.egiz.pdfas.impl.vfilter.partition.BinaryPartition; +import at.gv.egiz.pdfas.impl.vfilter.partition.TextPartition; +import at.gv.egiz.pdfas.framework.input.PdfDataSource; +import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult; + +/** + * Contains helpful methods used by the VerificationFilter. + * + * @author wprinz + */ +public final class VerificationFilterHelper +{ + /** + * Partitions the list of Incremental Update blocks into text and binary + * partitions. + * + *

+ * A partition is a sequence of Incremental Update blocks of the same type. + *

+ *

+ * An Incremental Update block is considered to have the type "binary" if it + * contains an egiz dictionary. A block not containing an egiz dictionary is + * considert to have the type "text". + *

+ * + * @param pdf + * The PDF. + * @param blocks + * The Incremental Update blocks. + * @return Returns the partitioning of the blocks. + * @throws VerificationFilterException + * Thrown if something goes wrong. + */ + public static List partition(PdfDataSource pdf, List blocks) throws VerificationFilterException + { + List partitions = new ArrayList(blocks.size()); + + Iterator it = blocks.iterator(); + while (it.hasNext()) + { + FooterParseResult fpr = (FooterParseResult) it.next(); + + byte[] data = DataSourceHelper.convertDataSourceToByteArray(pdf); + if (VerificationFilterBinaryHelper.containsEGIZDict(data, fpr)) + { + BinaryPartition bp = null; + if (partitions.isEmpty() || ((Partition) partitions.get(partitions.size() - 1)).isTextPartition()) + { + bp = new BinaryPartition(); + bp.blocks = new ArrayList(blocks.size()); + partitions.add(bp); + } + else + { + bp = (BinaryPartition) partitions.get(partitions.size() - 1); + } + assert bp != null; + + bp.blocks.add(fpr); + } + else + { + TextPartition tp = null; + if (partitions.isEmpty() || !((Partition) partitions.get(partitions.size() - 1)).isTextPartition()) + { + tp = new TextPartition(); + tp.blocks = new ArrayList(blocks.size()); + partitions.add(tp); + } + else + { + tp = (TextPartition) partitions.get(partitions.size() - 1); + } + assert tp != null; + + tp.blocks.add(fpr); + } + } + + assert partitions.size() >= 1 : "There must be at least one partition"; + + return partitions; + } + + /** + * Determines the end of the given partiton. + * + * @param partition + * The partition. + * @return Returns the end index of the given partition. + */ + public static int getEndOfPartition(Partition partition) + { + List blocks = null; + if (partition instanceof TextPartition) + { + blocks = ((TextPartition) partition).blocks; + } + else + { + blocks = ((BinaryPartition) partition).blocks; + } + + return ((FooterParseResult) blocks.get(blocks.size() - 1)).next_index; + } + + /** + * Finds the last text partition in the given list of partitions. + * + * @param partitions + * The partitions. + * @return Returns the last TextPartition. + */ + public static TextPartition findLastTextPartition(List partitions) + { + Partition lastTextPartition = (Partition) partitions.get(partitions.size() - 1); + + if (!lastTextPartition.isTextPartition()) + { + assert partitions.size() > 1 : "The only one partition cannot be a binary partition - where is the original document?"; + Partition previousToLastPartition = (Partition) partitions.get(partitions.size() - 2); + assert previousToLastPartition.isTextPartition() : "The previous to last partition must be a text partition or something is wrong with the partitioning algorithm."; + + lastTextPartition = previousToLastPartition; + } + + return (TextPartition) lastTextPartition; + } + +} diff --git a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterTextHelper.java b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterTextHelper.java new file mode 100644 index 0000000..f9a79b0 --- /dev/null +++ b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterTextHelper.java @@ -0,0 +1,15 @@ +/** + * + */ +package at.gv.egiz.pdfas.impl.vfilter.helper; + +/** + * Contains helpful methods used by the VerificationFilter to analyze text and + * find text signatures. + * + * @author wprinz + */ +public final class VerificationFilterTextHelper +{ + +} -- cgit v1.2.3