From 3d982813b34f6f230baf4a467cdc37ec92a77595 Mon Sep 17 00:00:00 2001 From: netconomy Date: Fri, 17 Aug 2007 06:10:56 +0000 Subject: Performance git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@167 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../at/gv/egiz/pdfas/impl/vfilter/Partition.java | 9 + .../pdfas/impl/vfilter/VerificationFilterImpl.java | 575 +++++++++++++++++++++ .../vfilter/VerificationFilterParametersImpl.java | 67 +++ .../helper/VerificationFilterBinaryHelper.java | 152 ++++++ .../vfilter/helper/VerificationFilterHelper.java | 142 +++++ .../helper/VerificationFilterTextHelper.java | 15 + .../impl/vfilter/partition/BinaryPartition.java | 19 + .../impl/vfilter/partition/TextPartition.java | 20 + 8 files changed, 999 insertions(+) create mode 100644 src/main/java/at/gv/egiz/pdfas/impl/vfilter/Partition.java create mode 100644 src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java create mode 100644 src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterParametersImpl.java create mode 100644 src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterBinaryHelper.java create mode 100644 src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java create mode 100644 src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterTextHelper.java create mode 100644 src/main/java/at/gv/egiz/pdfas/impl/vfilter/partition/BinaryPartition.java create mode 100644 src/main/java/at/gv/egiz/pdfas/impl/vfilter/partition/TextPartition.java (limited to 'src/main/java/at/gv/egiz/pdfas/impl/vfilter') diff --git a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/Partition.java b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/Partition.java new file mode 100644 index 0000000..6fe90e5 --- /dev/null +++ b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/Partition.java @@ -0,0 +1,9 @@ +/** + * + */ +package at.gv.egiz.pdfas.impl.vfilter; + +public interface Partition +{ + public boolean isTextPartition(); +} \ No newline at end of file diff --git a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java new file mode 100644 index 0000000..981b868 --- /dev/null +++ b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java @@ -0,0 +1,575 @@ +/** + * + */ +package at.gv.egiz.pdfas.impl.vfilter; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import at.gv.egiz.pdfas.exceptions.framework.VerificationFilterException; +import at.gv.egiz.pdfas.framework.SignatureHolderHelper; +import at.gv.egiz.pdfas.framework.VerificatorFactory; +import at.gv.egiz.pdfas.framework.input.PdfDataSource; +import at.gv.egiz.pdfas.framework.input.TextDataSource; +import at.gv.egiz.pdfas.framework.verificator.Verificator; +import at.gv.egiz.pdfas.framework.vfilter.VerificationFilter; +import at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters; +import at.gv.egiz.pdfas.impl.input.DelimitedInputStream; +import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper; +import at.gv.egiz.pdfas.impl.vfilter.helper.VerificationFilterBinaryHelper; +import at.gv.egiz.pdfas.impl.vfilter.helper.VerificationFilterHelper; +import at.gv.egiz.pdfas.impl.vfilter.partition.BinaryPartition; +import at.gv.egiz.pdfas.impl.vfilter.partition.TextPartition; +import at.knowcenter.wag.egov.egiz.PdfAS; +import at.knowcenter.wag.egov.egiz.PdfASID; +import at.knowcenter.wag.egov.egiz.exceptions.NormalizeException; +import at.knowcenter.wag.egov.egiz.exceptions.PDFDocumentException; +import at.knowcenter.wag.egov.egiz.exceptions.PresentableException; +import at.knowcenter.wag.egov.egiz.exceptions.SignatureException; +import at.knowcenter.wag.egov.egiz.exceptions.SignatureTypesException; +import at.knowcenter.wag.egov.egiz.pdf.AbsoluteTextSignature; +import at.knowcenter.wag.egov.egiz.pdf.EGIZDate; +import at.knowcenter.wag.egov.egiz.pdf.SignatureHolder; +import at.knowcenter.wag.egov.egiz.pdf.TextualSignatureHolder; +import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult; + +/** + * @author wprinz + */ +public class VerificationFilterImpl implements VerificationFilter +{ + + /** + * The log. + */ + private static final Log log = LogFactory.getLog(VerificationFilterImpl.class); + + /** + * @see at.gv.egiz.pdfas.framework.vfilter.VerificationFilter#extractSignatureHolders(at.gv.egiz.pdfas.framework.input.PdfDataSource, + * java.util.List, + * at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters) + */ + public List extractSignatureHolders(final PdfDataSource pdf, List blocks, final VerificationFilterParameters parameters) throws VerificationFilterException + { + log.trace("extractSignaturHolders:"); + + if (log.isDebugEnabled()) + { + log.debug("Original IU blocks: " + blocks.size()); + debugIUBlocks(blocks); + } + + unrollLinearization(blocks); + + if (log.isDebugEnabled()) + { + log.debug("IU blocks without linearization: " + blocks.size()); + debugIUBlocks(blocks); + } + + List signatureHolderChain = null; + + if (parameters.extractBinarySignaturesOnly()) + { + log.debug("Extracting only binary signatures. Binary-only mode."); + + signatureHolderChain = performBinaryOnly(pdf, blocks); + } + else + { + List partitions = VerificationFilterHelper.partition(pdf, blocks); + if (log.isDebugEnabled()) + { + debugPartitions(partitions); + } + + if (parameters.assumeOnlySignatureUpdateBlocks()) + { + log.debug("Assuming that there are only signature Incremental Update blocks. Semi-conservative mode."); + + signatureHolderChain = performSemiConservative(pdf, parameters.scanForOldSignatures(), blocks, partitions); + } + else + { + log.debug("Scanning complete document. Conservative mode."); + + signatureHolderChain = performFullConservative(pdf, parameters.scanForOldSignatures(), blocks, partitions); + } + + } + + log.trace("extractSignaturHolders finished."); + return signatureHolderChain; + } + + /** + * @see at.gv.egiz.pdfas.framework.vfilter.VerificationFilter#extractSignaturHolders(at.gv.egiz.pdfas.framework.input.TextDataSource, + * at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters) + */ + public List extractSignaturHolders(TextDataSource text, VerificationFilterParameters parameters) throws VerificationFilterException + { + if (parameters.extractBinarySignaturesOnly()) + { + log + .warn("A free text signature extraction was issued although the VerificationFilter was configured to detect only binary signatures (binary-only mode). The result is of course that no signatures can be found."); + + return new ArrayList(); + } + + String freetext = text.getText(); + String normalizedText = normalizeText(freetext); + + List foundSignatures = null; + if (parameters.scanForOldSignatures()) + { + log.debug("Extracting old and new signatures from text."); + + foundSignatures = extractNewAndOldSignaturesFromText(normalizedText); + } + else + { + log.debug("Extracting new signatures from text (not extracting old ones)."); + + foundSignatures = extractNewSignaturesFromText(normalizedText); + } + + List textOnlySignatures = filterOutBinarySignatures(foundSignatures); + + return textOnlySignatures; + } + + protected String normalizeText(String freetext) throws VerificationFilterException + { + try + { + return PdfAS.normalizeText(freetext); + } + catch (NormalizeException e) + { + throw new VerificationFilterException(e); + } + } + + /** + * Removes the linearization footer from the list of update blocks. + * + * @param blocks + * The list of FooterParseResult objects in \prev order. + */ + protected void unrollLinearization(List blocks) + { + int linearization_index = -1; + for (int i = 0; i < blocks.size(); i++) + { + FooterParseResult bpr = (FooterParseResult) blocks.get(i); + + if (bpr.sxpr.xref_index == 0) + { + if (linearization_index >= 0) + { + throw new RuntimeException("There is more than one linearization block! index = " + i); + } + linearization_index = i; + } + } + + if (linearization_index >= 0) + { + // logger_.debug("The document is linearized - unrolling + // linearization block " + linearization_index); + blocks.remove(linearization_index); + } + } + + protected List performBinaryOnly(PdfDataSource pdf, List blocks) throws VerificationFilterException + { + return extractBinarySignaturesOnly(pdf, blocks); + } + + protected List performSemiConservative(PdfDataSource pdf, boolean scanForOldSignatures, List blocks, List partitions) throws VerificationFilterException + { + List binarySignatures = extractBinarySignaturesOnly(pdf, blocks); + + TextPartition lastTextPartition = VerificationFilterHelper.findLastTextPartition(partitions); + List extractedSignatures = null; + if (scanForOldSignatures) + { + SignaturesAndOld sao = extractSignaturesFromPartitionAndOld(pdf, lastTextPartition); + extractedSignatures = sao.newSignatures; + if (sao.oldSignature != null) + { + extractedSignatures.add(0, sao.oldSignature); + } + } + else + { + extractedSignatures = extractSignaturesFromPartition(pdf, lastTextPartition); + } + + List signatureHolderChain = intermingleSignatures(binarySignatures, extractedSignatures); + + return signatureHolderChain; + } + + protected List performFullConservative(PdfDataSource pdf, boolean scanForOldSignatures, List blocks, List partitions) throws VerificationFilterException + { + List binarySignatures = extractBinarySignaturesOnly(pdf, blocks); + + SignatureHolder oldSignature = null; + + List partitionResults = new ArrayList(partitions.size()); + for (int i = 0; i < partitions.size(); i++) + { + Partition p = (Partition) partitions.get(i); + + if (p instanceof TextPartition) + { + TextPartition tp = (TextPartition) p; + + List partitionResult = null; + + boolean scanThisPartitionForOldSignature = (i == 0) && scanForOldSignatures; + if (scanThisPartitionForOldSignature) + { + SignaturesAndOld sao = extractSignaturesFromPartitionAndOld(pdf, tp); + partitionResult = sao.newSignatures; + oldSignature = sao.oldSignature; + } + else + { + partitionResult = extractSignaturesFromPartition(pdf, tp); + } + + partitionResults.add(partitionResult); + } + } + + List extractedSignatures = new ArrayList(); + Iterator it = partitionResults.iterator(); + List prevPartitionResult = null; + while (it.hasNext()) + { + List partitionResult = (List) it.next(); + + if (prevPartitionResult == null) + { + extractedSignatures.addAll(partitionResult); + } + else + { + assert partitionResult.size() >= prevPartitionResult.size(); + + for (int i = prevPartitionResult.size(); i < partitionResult.size(); i++) + { + SignatureHolder sh = (SignatureHolder) partitionResult.get(i); + extractedSignatures.add(sh); + } + } + + prevPartitionResult = partitionResult; + } + + List signatureHolderChain = intermingleSignatures(binarySignatures, extractedSignatures); + + if (oldSignature != null) + { + signatureHolderChain.add(0, oldSignature); + } + + return signatureHolderChain; + } + + protected String extractText(PdfDataSource pdf, int endOfDocument) throws PresentableException + { + DelimitedInputStream dis = new DelimitedInputStream(pdf.createInputStream(), endOfDocument); + return PdfAS.extractNormalizedTextTextual(dis); + } + + protected List extractNewSignaturesFromText(String text) throws VerificationFilterException + { + try + { + return AbsoluteTextSignature.extractSignatureHoldersFromText(text); + } + catch (PresentableException e) + { + throw new VerificationFilterException(e); + } + } + + protected List extractNewAndOldSignaturesFromText(String text) throws VerificationFilterException + { + SignaturesAndOld sao = extractSignaturesAndOld(text); + if (sao.oldSignature != null) + { + sao.newSignatures.add(0, sao.oldSignature); + } + + return sao.newSignatures; + } + + protected List extractOldSignaturesFromText(String text) throws PresentableException + { + return PdfAS.extractSignatureHoldersTextual(text, true); + } + + protected List intermingleSignatures(List binarySignatures, List extractedSignatures) + { + List textualSignatures = filterOutBinarySignatures(extractedSignatures); + + List intermingled = new ArrayList(binarySignatures.size() + textualSignatures.size()); + intermingled.addAll(binarySignatures); + intermingled.addAll(textualSignatures); + + sortSignatures(intermingled); + + return intermingled; + } + + protected List filterOutBinarySignatures(List signatures) + { + List textOnly = new ArrayList(signatures.size()); + + Iterator it = signatures.iterator(); + while (it.hasNext()) + { + SignatureHolder sh = (SignatureHolder) it.next(); + if (sh.getSignatureObject().isTextual()) + { + textOnly.add(sh); + } + } + + return textOnly; + } + + protected void sortSignatures(List signatures) + { + SignatureHolderHelper.sortByDate(signatures); + } + + protected void debugIUBlocks(List blocks) + { + Iterator it = blocks.iterator(); + while (it.hasNext()) + { + FooterParseResult fpr = (FooterParseResult) it.next(); + log.debug("footer: " + fpr.start_index + " to " + fpr.next_index + ", has predecessor = " + fpr.tpr.has_predecessor); + } + } + + protected void debugPartitions(List partitions) + { + Iterator it = partitions.iterator(); + while (it.hasNext()) + { + Object o = it.next(); + assert o instanceof Partition; + + List blocks = null; + if (o instanceof TextPartition) + { + TextPartition tp = (TextPartition) o; + + blocks = tp.blocks; + + log.debug("text partition with " + tp.blocks.size() + " blocks:"); + } + else + { + BinaryPartition bp = (BinaryPartition) o; + + blocks = bp.blocks; + + log.debug("binary partition: with " + bp.blocks.size() + " blocks:"); + + } + debugIUBlocks(blocks); + log.debug("partition finished."); + } + } + + /** + * Extracts the binary singatures from the given PDF. + * + *

+ * IU blocks without an egiz dict are not considered. + *

+ * + * @param pdf + * @param blocks + * @return Returns the List of signature holders. + * @throws PresentableException + */ + protected List extractBinarySignaturesOnly(PdfDataSource pdf, List blocks) throws VerificationFilterException + { + try + { + // PERF: extract binary signatures needs byte array + byte[] data = DataSourceHelper.convertDataSourceToByteArray(pdf); + + List binarySignatures = new ArrayList(blocks.size()); + + Iterator it = blocks.iterator(); + int prev_end = 0; + while (it.hasNext()) + { + FooterParseResult fpr = (FooterParseResult) it.next(); + assert fpr.next_index > prev_end; + + if (VerificationFilterBinaryHelper.containsEGIZDict(data, fpr)) + { + PdfASID kz = VerificationFilterBinaryHelper.extractKZFromEGIZBlock(data, fpr); + + Verificator verificator = VerificatorFactory.createBinaryVerificator(kz); + List binary_holders = verificator.parseBlock(pdf, data, fpr, prev_end); + + binarySignatures.addAll(binary_holders); + } + + prev_end = fpr.next_index; + } + + return binarySignatures; + } + catch (PresentableException e) + { + throw new VerificationFilterException(e); + } + } + + protected List extractSignatures(PdfDataSource pdf, int endOfDocument) throws VerificationFilterException + { + try + { + log.debug("Extracting text from 0 to " + endOfDocument + " (total document size = " + pdf.getLength() + "):"); + String extractedText = extractText(pdf, endOfDocument); + log.debug("Extracting text finished."); + + log.debug("Extracting signatures:"); + List extractedSignatures = extractNewSignaturesFromText(extractedText); + log.debug("Extracting signatures finished."); + + return extractedSignatures; + } + catch (PresentableException e) + { + throw new VerificationFilterException(e); + } + } + + protected String determineRestText(List newSignatures, String extractedText) + { + if (newSignatures.isEmpty()) + { + return extractedText; + } + + // note that even if the oldest signature is a binary signature, + // the rest text is the text of this binary signature, which was extracted + // like a text signature. + TextualSignatureHolder oldestSignature = (TextualSignatureHolder) newSignatures.get(0); + return oldestSignature.getSignedText(); + } + + protected List extractSignaturesFromPartition(PdfDataSource pdf, Partition partition) throws VerificationFilterException + { + assert partition.isTextPartition(); + + int endOfDocument = VerificationFilterHelper.getEndOfPartition(partition); + return extractSignatures(pdf, endOfDocument); + } + + protected SignaturesAndOld extractSignaturesFromPartitionAndOld(PdfDataSource pdf, Partition partition) throws VerificationFilterException + { + assert partition.isTextPartition(); + + try + { + int endOfDocument = VerificationFilterHelper.getEndOfPartition(partition); + + log.debug("Extracting text from 0 to " + endOfDocument + " (total document size = " + pdf.getLength() + "):"); + String extractedText = extractText(pdf, endOfDocument); + log.debug("Extracting text finished."); + + SignaturesAndOld sao = extractSignaturesAndOld(extractedText); + + return sao; + } + catch (PresentableException e) + { + throw new VerificationFilterException(e); + } + } + + protected static class SignaturesAndOld + { + public List newSignatures = null; + + public SignatureHolder oldSignature = null; + } + + protected SignaturesAndOld extractSignaturesAndOld(String text) throws VerificationFilterException + { + try + { + log.debug("Extracting signatures:"); + List extractedSignatures = extractNewSignaturesFromText(text); + log.debug("Extracting signatures finished."); + + log.debug("Extracting old signatures:"); + SignatureHolder oldSignature = extractOldSignature(text, extractedSignatures); + log.debug("Extracting old signatures finished."); + + SignaturesAndOld sao = new SignaturesAndOld(); + sao.newSignatures = extractedSignatures; + sao.oldSignature = oldSignature; + + return sao; + } + catch (PresentableException e) + { + throw new VerificationFilterException(e); + } + } + + /** + * Extracts the old signature from the text, but only if it is older than the + * oldest signature of the new signatueres. + * + * @param extractedText + * @param newSignatures + * @return + * @throws PDFDocumentException + * @throws SignatureException + * @throws NormalizeException + * @throws SignatureTypesException + */ + protected SignatureHolder extractOldSignature(String extractedText, List newSignatures) throws PDFDocumentException, SignatureException, NormalizeException, SignatureTypesException + { + SignatureHolder oldSignature = null; + + String restText = determineRestText(newSignatures, extractedText); + + List oldSignatures = PdfAS.extractSignatureHoldersTextual(restText, true); + if (!oldSignatures.isEmpty()) + { + oldSignature = (SignatureHolder) oldSignatures.get(0); + if (!newSignatures.isEmpty()) + { + SignatureHolder oldestNewSignature = (SignatureHolder) newSignatures.get(0); + EGIZDate oldDate = EGIZDate.parseFromString(oldSignature.getSignatureObject().getSignationDate()); + EGIZDate newDate = EGIZDate.parseFromString(oldestNewSignature.getSignatureObject().getSignationDate()); + if (newDate.compareTo(oldDate) <= 0) + { + oldSignature = null; + } + } + } + return oldSignature; + } +} diff --git a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterParametersImpl.java b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterParametersImpl.java new file mode 100644 index 0000000..292e816 --- /dev/null +++ b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterParametersImpl.java @@ -0,0 +1,67 @@ +/** + * + */ +package at.gv.egiz.pdfas.impl.vfilter; + +import java.io.Serializable; + +import at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters; + +/** + * @author wprinz + * + */ +public class VerificationFilterParametersImpl implements VerificationFilterParameters, Serializable +{ + /** + * SVUID. + */ + private static final long serialVersionUID = -7118403150485416046L; + + protected boolean extractBinarySignaturesOnly = false; + + protected boolean assumeOnlySignatureUpdateBlocks = false; + + protected boolean scanForOldSignatures = true; + + public VerificationFilterParametersImpl(boolean extractBinarySignaturesOnly, boolean assumeOnlySignatureUpdateBlocks, boolean scanForOldSignatures) + { + this.extractBinarySignaturesOnly = extractBinarySignaturesOnly; + this.assumeOnlySignatureUpdateBlocks = assumeOnlySignatureUpdateBlocks; + this.scanForOldSignatures = scanForOldSignatures; + } + + /** + * @see at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters#extractBinarySignaturesOnly() + */ + public boolean extractBinarySignaturesOnly() + { + return this.extractBinarySignaturesOnly; + } + + /** + * @see at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters#assumeOnlySignatureUpdateBlocks() + */ + public boolean assumeOnlySignatureUpdateBlocks() + { + return this.assumeOnlySignatureUpdateBlocks; + } + + + /** + * @see at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters#scanForOldSignatures() + */ + public boolean scanForOldSignatures() + { + return this.scanForOldSignatures; + } + + /** + * @see java.lang.Object#toString() + */ + // @override + public String toString() + { + return "{VerificationFilterParametersImpl: extractBinarySignaturesOnly = " + extractBinarySignaturesOnly() + ", assumeOnlySignatureUpdateBlocks = " + assumeOnlySignatureUpdateBlocks() + "}"; + } +} diff --git a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterBinaryHelper.java b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterBinaryHelper.java new file mode 100644 index 0000000..b7f36d1 --- /dev/null +++ b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterBinaryHelper.java @@ -0,0 +1,152 @@ +/** + * + */ +package at.gv.egiz.pdfas.impl.vfilter.helper; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import at.gv.egiz.pdfas.exceptions.ErrorCode; + +import at.knowcenter.wag.egov.egiz.PdfASID; +import at.knowcenter.wag.egov.egiz.exceptions.InvalidIDException; +import at.knowcenter.wag.egov.egiz.exceptions.PDFDocumentException; +import at.knowcenter.wag.egov.egiz.pdf.BinarySignature; +import at.knowcenter.wag.egov.egiz.pdf.Placeholder; +import at.knowcenter.wag.egov.egiz.pdf.StringInfo; +import at.knowcenter.wag.exactparser.parsing.IndirectObjectReference; +import at.knowcenter.wag.exactparser.parsing.PDFUtils; +import at.knowcenter.wag.exactparser.parsing.results.ArrayParseResult; +import at.knowcenter.wag.exactparser.parsing.results.DictionaryParseResult; +import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult; +import at.knowcenter.wag.exactparser.parsing.results.IndirectObjectReferenceParseResult; +import at.knowcenter.wag.exactparser.parsing.results.NumberParseResult; +import at.knowcenter.wag.exactparser.parsing.results.ObjectParseResult; + +/** + * Contains helpful methods used by the VerificationFilter to analyze the PDF for binary signatures. + * + * @author wprinz + */ +public final class VerificationFilterBinaryHelper +{ + /** + * The name of the egiz dict key. + */ + public static final byte[] EGIZ_DICT_NAME = { 'E', 'G', 'I', 'Z', 'S', 'i', 'g', 'D', 'i', 'c', 't' }; + + /** + * The name of the ID (SIG_KZ) property in the egiz dict. + */ + public static final byte[] EGIZ_KZ_NAME = { 'I', 'D' }; + + /** + * Tells, if the given incremental update block contains a binary signature. + * + *

+ * According to definition, if a block is a binary block, it must/cannot + * contain other signatures than this one. + *

+ * + * @param block + * The incremental update block. + * @return Returns true, if this block is a binary signature block, false + * otherwise. + */ + public static boolean containsEGIZDict(final byte[] pdf, final FooterParseResult block) + { + int dict_index = PDFUtils.indexOfName(pdf, block.tpr.dpr.names, EGIZ_DICT_NAME); + if (dict_index <= 0) + { + return false; + } + + return true; + } + + /** + * Extracts the PDF AS ID of the egiz block. + * + * @param pdf + * The pdf. + * @param block + * The IU block. + * @return Returns the extracted PDF AS ID. + * @throws PDFDocumentException + * Forwarded exception. + * @throws InvalidIDException + * Forwarded exception. + */ + public static PdfASID extractKZFromEGIZBlock(final byte[] pdf, final FooterParseResult block) throws PDFDocumentException, InvalidIDException + { + int egiz_index = PDFUtils.indexOfName(pdf, block.tpr.dpr.names, EGIZ_DICT_NAME); + if (egiz_index < 0) + { + throw new PDFDocumentException(301, "egiz_index = " + egiz_index); + } + + IndirectObjectReferenceParseResult egiz_dict_iorpr = (IndirectObjectReferenceParseResult) block.tpr.dpr.values.get(egiz_index); + // logger_.debug("egiz_dict_ir = " + egiz_dict_iorpr.ior.object_number + // + " " + egiz_dict_iorpr.ior.generation_number); + + IndirectObjectReference ior = egiz_dict_iorpr.ior; + + final int egiz_dict_offset = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(block.xpr, ior); + // logger_.debug("egiz_dict_offset = " + egiz_dict_offset); + + ObjectParseResult obj = PDFUtils.parseObject(pdf, egiz_dict_offset); + DictionaryParseResult egiz_dict = (DictionaryParseResult) obj.object; + + int kz_index = PDFUtils.indexOfName(pdf, egiz_dict.names, EGIZ_KZ_NAME); + if (kz_index < 0) + { + throw new PDFDocumentException(301, "kz_index = " + kz_index); + } + ArrayParseResult kz_apr = (ArrayParseResult) egiz_dict.values.get(kz_index); + + String kz_string = restoreKZ(pdf, kz_apr); + PdfASID kz = new PdfASID(kz_string); + + return kz; + } + + /** + * Restores the Kennzeichnung String from an Array. + * + * @param pdf + * The PDF. + * @param kz_apr + * The Array, as parsed from the EGIZ Dict. + * @return Returns the restored KZ. + * @throws PDFDocumentException + * Forwarded exception. + */ + public static String restoreKZ(byte[] pdf, ArrayParseResult kz_apr) throws PDFDocumentException + { + try + { + List partition = new ArrayList(); + + for (int i = 0; i < kz_apr.elements.size() / 2; i++) + { + NumberParseResult start_npr = (NumberParseResult) kz_apr.elements.get(i * 2); + NumberParseResult length_npr = (NumberParseResult) kz_apr.elements.get(i * 2 + 1); + + StringInfo si = new StringInfo(); + si.string_start = start_npr.number; + si.string_length = length_npr.number; + + partition.add(si); + } + + String KZ = Placeholder.reconstructStringFromPartition(pdf, partition, BinarySignature.ENCODING_WIN); + return KZ; + } + catch (IOException e1) + { + throw new PDFDocumentException(ErrorCode.DOCUMENT_CANNOT_BE_READ, e1); + } + } + +} diff --git a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java new file mode 100644 index 0000000..67af129 --- /dev/null +++ b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java @@ -0,0 +1,142 @@ +/** + * + */ +package at.gv.egiz.pdfas.impl.vfilter.helper; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import at.gv.egiz.pdfas.exceptions.framework.VerificationFilterException; +import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper; +import at.gv.egiz.pdfas.impl.vfilter.Partition; +import at.gv.egiz.pdfas.impl.vfilter.partition.BinaryPartition; +import at.gv.egiz.pdfas.impl.vfilter.partition.TextPartition; +import at.gv.egiz.pdfas.framework.input.PdfDataSource; +import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult; + +/** + * Contains helpful methods used by the VerificationFilter. + * + * @author wprinz + */ +public final class VerificationFilterHelper +{ + /** + * Partitions the list of Incremental Update blocks into text and binary + * partitions. + * + *

+ * A partition is a sequence of Incremental Update blocks of the same type. + *

+ *

+ * An Incremental Update block is considered to have the type "binary" if it + * contains an egiz dictionary. A block not containing an egiz dictionary is + * considert to have the type "text". + *

+ * + * @param pdf + * The PDF. + * @param blocks + * The Incremental Update blocks. + * @return Returns the partitioning of the blocks. + * @throws VerificationFilterException + * Thrown if something goes wrong. + */ + public static List partition(PdfDataSource pdf, List blocks) throws VerificationFilterException + { + List partitions = new ArrayList(blocks.size()); + + Iterator it = blocks.iterator(); + while (it.hasNext()) + { + FooterParseResult fpr = (FooterParseResult) it.next(); + + byte[] data = DataSourceHelper.convertDataSourceToByteArray(pdf); + if (VerificationFilterBinaryHelper.containsEGIZDict(data, fpr)) + { + BinaryPartition bp = null; + if (partitions.isEmpty() || ((Partition) partitions.get(partitions.size() - 1)).isTextPartition()) + { + bp = new BinaryPartition(); + bp.blocks = new ArrayList(blocks.size()); + partitions.add(bp); + } + else + { + bp = (BinaryPartition) partitions.get(partitions.size() - 1); + } + assert bp != null; + + bp.blocks.add(fpr); + } + else + { + TextPartition tp = null; + if (partitions.isEmpty() || !((Partition) partitions.get(partitions.size() - 1)).isTextPartition()) + { + tp = new TextPartition(); + tp.blocks = new ArrayList(blocks.size()); + partitions.add(tp); + } + else + { + tp = (TextPartition) partitions.get(partitions.size() - 1); + } + assert tp != null; + + tp.blocks.add(fpr); + } + } + + assert partitions.size() >= 1 : "There must be at least one partition"; + + return partitions; + } + + /** + * Determines the end of the given partiton. + * + * @param partition + * The partition. + * @return Returns the end index of the given partition. + */ + public static int getEndOfPartition(Partition partition) + { + List blocks = null; + if (partition instanceof TextPartition) + { + blocks = ((TextPartition) partition).blocks; + } + else + { + blocks = ((BinaryPartition) partition).blocks; + } + + return ((FooterParseResult) blocks.get(blocks.size() - 1)).next_index; + } + + /** + * Finds the last text partition in the given list of partitions. + * + * @param partitions + * The partitions. + * @return Returns the last TextPartition. + */ + public static TextPartition findLastTextPartition(List partitions) + { + Partition lastTextPartition = (Partition) partitions.get(partitions.size() - 1); + + if (!lastTextPartition.isTextPartition()) + { + assert partitions.size() > 1 : "The only one partition cannot be a binary partition - where is the original document?"; + Partition previousToLastPartition = (Partition) partitions.get(partitions.size() - 2); + assert previousToLastPartition.isTextPartition() : "The previous to last partition must be a text partition or something is wrong with the partitioning algorithm."; + + lastTextPartition = previousToLastPartition; + } + + return (TextPartition) lastTextPartition; + } + +} diff --git a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterTextHelper.java b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterTextHelper.java new file mode 100644 index 0000000..f9a79b0 --- /dev/null +++ b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterTextHelper.java @@ -0,0 +1,15 @@ +/** + * + */ +package at.gv.egiz.pdfas.impl.vfilter.helper; + +/** + * Contains helpful methods used by the VerificationFilter to analyze text and + * find text signatures. + * + * @author wprinz + */ +public final class VerificationFilterTextHelper +{ + +} diff --git a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/partition/BinaryPartition.java b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/partition/BinaryPartition.java new file mode 100644 index 0000000..520f3b1 --- /dev/null +++ b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/partition/BinaryPartition.java @@ -0,0 +1,19 @@ +/** + * + */ +package at.gv.egiz.pdfas.impl.vfilter.partition; + +import java.util.List; + +import at.gv.egiz.pdfas.impl.vfilter.Partition; + + +public class BinaryPartition implements Partition +{ + public List blocks = null; + + public boolean isTextPartition() + { + return false; + } +} \ No newline at end of file diff --git a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/partition/TextPartition.java b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/partition/TextPartition.java new file mode 100644 index 0000000..bf17633 --- /dev/null +++ b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/partition/TextPartition.java @@ -0,0 +1,20 @@ +/** + * + */ +package at.gv.egiz.pdfas.impl.vfilter.partition; + +import java.util.List; + +import at.gv.egiz.pdfas.impl.vfilter.Partition; + + +public class TextPartition implements Partition +{ + public List blocks = null; + + public boolean isTextPartition() + { + return true; + } + +} \ No newline at end of file -- cgit v1.2.3