/** * */ package at.gv.egiz.pdfas.impl.vfilter; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import at.gv.egiz.pdfas.exceptions.framework.VerificationFilterException; import at.gv.egiz.pdfas.framework.SignatureHolderHelper; import at.gv.egiz.pdfas.framework.VerificatorFactory; import at.gv.egiz.pdfas.framework.input.PdfDataSource; import at.gv.egiz.pdfas.framework.input.TextDataSource; import at.gv.egiz.pdfas.framework.verificator.Verificator; import at.gv.egiz.pdfas.framework.vfilter.VerificationFilter; import at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters; import at.gv.egiz.pdfas.impl.input.DelimitedInputStream; import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper; import at.gv.egiz.pdfas.impl.vfilter.helper.VerificationFilterBinaryHelper; import at.gv.egiz.pdfas.impl.vfilter.helper.VerificationFilterHelper; import at.gv.egiz.pdfas.impl.vfilter.partition.BinaryPartition; import at.gv.egiz.pdfas.impl.vfilter.partition.TextPartition; import at.knowcenter.wag.egov.egiz.PdfAS; import at.knowcenter.wag.egov.egiz.PdfASID; import at.knowcenter.wag.egov.egiz.exceptions.NormalizeException; import at.knowcenter.wag.egov.egiz.exceptions.PDFDocumentException; import at.knowcenter.wag.egov.egiz.exceptions.PresentableException; import at.knowcenter.wag.egov.egiz.exceptions.SignatureException; import at.knowcenter.wag.egov.egiz.exceptions.SignatureTypesException; import at.knowcenter.wag.egov.egiz.pdf.AbsoluteTextSignature; import at.knowcenter.wag.egov.egiz.pdf.EGIZDate; import at.knowcenter.wag.egov.egiz.pdf.SignatureHolder; import at.knowcenter.wag.egov.egiz.pdf.TextualSignatureHolder; import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult; /** * @author wprinz */ public class VerificationFilterImpl implements VerificationFilter { /** * The log. */ private static final Log log = LogFactory.getLog(VerificationFilterImpl.class); /** * @see at.gv.egiz.pdfas.framework.vfilter.VerificationFilter#extractSignatureHolders(at.gv.egiz.pdfas.framework.input.PdfDataSource, * java.util.List, * at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters) */ public List extractSignatureHolders(final PdfDataSource pdf, List blocks, final VerificationFilterParameters parameters) throws VerificationFilterException { log.trace("extractSignaturHolders:"); if (log.isDebugEnabled()) { log.debug("Original IU blocks: " + blocks.size()); debugIUBlocks(blocks); } unrollLinearization(blocks); if (log.isDebugEnabled()) { log.debug("IU blocks without linearization: " + blocks.size()); debugIUBlocks(blocks); } List signatureHolderChain = null; if (parameters.extractBinarySignaturesOnly()) { log.debug("Extracting only binary signatures. Binary-only mode."); signatureHolderChain = performBinaryOnly(pdf, blocks); } else { List partitions = VerificationFilterHelper.partition(pdf, blocks); if (log.isDebugEnabled()) { debugPartitions(partitions); } if (parameters.assumeOnlySignatureUpdateBlocks()) { log.debug("Assuming that there are only signature Incremental Update blocks. Semi-conservative mode."); signatureHolderChain = performSemiConservative(pdf, parameters.scanForOldSignatures(), blocks, partitions); } else { log.debug("Scanning complete document. Conservative mode."); signatureHolderChain = performFullConservative(pdf, parameters.scanForOldSignatures(), blocks, partitions); } } log.trace("extractSignaturHolders finished."); return signatureHolderChain; } /** * @see at.gv.egiz.pdfas.framework.vfilter.VerificationFilter#extractSignaturHolders(at.gv.egiz.pdfas.framework.input.TextDataSource, * at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters) */ public List extractSignaturHolders(TextDataSource text, VerificationFilterParameters parameters) throws VerificationFilterException { if (parameters.extractBinarySignaturesOnly()) { log .warn("A free text signature extraction was issued although the VerificationFilter was configured to detect only binary signatures (binary-only mode). The result is of course that no signatures can be found."); return new ArrayList(); } String freetext = text.getText(); String normalizedText = normalizeText(freetext); List foundSignatures = null; if (parameters.scanForOldSignatures()) { log.debug("Extracting old and new signatures from text."); foundSignatures = extractNewAndOldSignaturesFromText(normalizedText); } else { log.debug("Extracting new signatures from text (not extracting old ones)."); foundSignatures = extractNewSignaturesFromText(normalizedText); } List textOnlySignatures = filterOutBinarySignatures(foundSignatures); return textOnlySignatures; } protected String normalizeText(String freetext) throws VerificationFilterException { try { return PdfAS.normalizeText(freetext); } catch (NormalizeException e) { throw new VerificationFilterException(e); } } /** * Removes the linearization footer from the list of update blocks. * * @param blocks * The list of FooterParseResult objects in \prev order. */ protected void unrollLinearization(List blocks) { int linearization_index = -1; for (int i = 0; i < blocks.size(); i++) { FooterParseResult bpr = (FooterParseResult) blocks.get(i); if (bpr.sxpr.xref_index == 0) { if (linearization_index >= 0) { throw new RuntimeException("There is more than one linearization block! index = " + i); } linearization_index = i; } } if (linearization_index >= 0) { // logger_.debug("The document is linearized - unrolling // linearization block " + linearization_index); blocks.remove(linearization_index); } } protected List performBinaryOnly(PdfDataSource pdf, List blocks) throws VerificationFilterException { return extractBinarySignaturesOnly(pdf, blocks); } protected List performSemiConservative(PdfDataSource pdf, boolean scanForOldSignatures, List blocks, List partitions) throws VerificationFilterException { List binarySignatures = extractBinarySignaturesOnly(pdf, blocks); TextPartition lastTextPartition = VerificationFilterHelper.findLastTextPartition(partitions); List extractedSignatures = null; if (scanForOldSignatures) { SignaturesAndOld sao = extractSignaturesFromPartitionAndOld(pdf, lastTextPartition); extractedSignatures = sao.newSignatures; if (sao.oldSignature != null) { extractedSignatures.add(0, sao.oldSignature); } } else { extractedSignatures = extractSignaturesFromPartition(pdf, lastTextPartition); } List signatureHolderChain = intermingleSignatures(binarySignatures, extractedSignatures); return signatureHolderChain; } protected List performFullConservative(PdfDataSource pdf, boolean scanForOldSignatures, List blocks, List partitions) throws VerificationFilterException { List binarySignatures = extractBinarySignaturesOnly(pdf, blocks); SignatureHolder oldSignature = null; List partitionResults = new ArrayList(partitions.size()); for (int i = 0; i < partitions.size(); i++) { Partition p = (Partition) partitions.get(i); if (p instanceof TextPartition) { TextPartition tp = (TextPartition) p; List partitionResult = null; boolean scanThisPartitionForOldSignature = (i == 0) && scanForOldSignatures; if (scanThisPartitionForOldSignature) { SignaturesAndOld sao = extractSignaturesFromPartitionAndOld(pdf, tp); partitionResult = sao.newSignatures; oldSignature = sao.oldSignature; } else { partitionResult = extractSignaturesFromPartition(pdf, tp); } partitionResults.add(partitionResult); } } List extractedSignatures = new ArrayList(); Iterator it = partitionResults.iterator(); List prevPartitionResult = null; while (it.hasNext()) { List partitionResult = (List) it.next(); if (prevPartitionResult == null) { extractedSignatures.addAll(partitionResult); } else { assert partitionResult.size() >= prevPartitionResult.size(); for (int i = prevPartitionResult.size(); i < partitionResult.size(); i++) { SignatureHolder sh = (SignatureHolder) partitionResult.get(i); extractedSignatures.add(sh); } } prevPartitionResult = partitionResult; } List signatureHolderChain = intermingleSignatures(binarySignatures, extractedSignatures); if (oldSignature != null) { signatureHolderChain.add(0, oldSignature); } return signatureHolderChain; } protected String extractText(PdfDataSource pdf, int endOfDocument) throws PresentableException { DelimitedInputStream dis = new DelimitedInputStream(pdf.createInputStream(), endOfDocument); return PdfAS.extractNormalizedTextTextual(dis); } protected List extractNewSignaturesFromText(String text) throws VerificationFilterException { try { return AbsoluteTextSignature.extractSignatureHoldersFromText(text); } catch (PresentableException e) { throw new VerificationFilterException(e); } } protected List extractNewAndOldSignaturesFromText(String text) throws VerificationFilterException { SignaturesAndOld sao = extractSignaturesAndOld(text); if (sao.oldSignature != null) { sao.newSignatures.add(0, sao.oldSignature); } return sao.newSignatures; } protected List extractOldSignaturesFromText(String text) throws PresentableException { return PdfAS.extractSignatureHoldersTextual(text, true); } protected List intermingleSignatures(List binarySignatures, List extractedSignatures) { List textualSignatures = filterOutBinarySignatures(extractedSignatures); List intermingled = new ArrayList(binarySignatures.size() + textualSignatures.size()); intermingled.addAll(binarySignatures); intermingled.addAll(textualSignatures); sortSignatures(intermingled); return intermingled; } protected List filterOutBinarySignatures(List signatures) { List textOnly = new ArrayList(signatures.size()); Iterator it = signatures.iterator(); while (it.hasNext()) { SignatureHolder sh = (SignatureHolder) it.next(); if (sh.getSignatureObject().isTextual()) { textOnly.add(sh); } } return textOnly; } protected void sortSignatures(List signatures) { SignatureHolderHelper.sortByDate(signatures); } protected void debugIUBlocks(List blocks) { Iterator it = blocks.iterator(); while (it.hasNext()) { FooterParseResult fpr = (FooterParseResult) it.next(); log.debug("footer: " + fpr.start_index + " to " + fpr.next_index + ", has predecessor = " + fpr.tpr.has_predecessor); } } protected void debugPartitions(List partitions) { Iterator it = partitions.iterator(); while (it.hasNext()) { Object o = it.next(); assert o instanceof Partition; List blocks = null; if (o instanceof TextPartition) { TextPartition tp = (TextPartition) o; blocks = tp.blocks; log.debug("text partition with " + tp.blocks.size() + " blocks:"); } else { BinaryPartition bp = (BinaryPartition) o; blocks = bp.blocks; log.debug("binary partition: with " + bp.blocks.size() + " blocks:"); } debugIUBlocks(blocks); log.debug("partition finished."); } } /** * Extracts the binary singatures from the given PDF. * *

* IU blocks without an egiz dict are not considered. *

* * @param pdf * @param blocks * @return Returns the List of signature holders. * @throws PresentableException */ protected List extractBinarySignaturesOnly(PdfDataSource pdf, List blocks) throws VerificationFilterException { try { // PERF: extract binary signatures needs byte array byte[] data = DataSourceHelper.convertDataSourceToByteArray(pdf); List binarySignatures = new ArrayList(blocks.size()); Iterator it = blocks.iterator(); int prev_end = 0; while (it.hasNext()) { FooterParseResult fpr = (FooterParseResult) it.next(); assert fpr.next_index > prev_end; if (VerificationFilterBinaryHelper.containsEGIZDict(data, fpr)) { PdfASID kz = VerificationFilterBinaryHelper.extractKZFromEGIZBlock(data, fpr); Verificator verificator = VerificatorFactory.createBinaryVerificator(kz); List binary_holders = verificator.parseBlock(pdf, data, fpr, prev_end); binarySignatures.addAll(binary_holders); } prev_end = fpr.next_index; } return binarySignatures; } catch (PresentableException e) { throw new VerificationFilterException(e); } } protected List extractSignatures(PdfDataSource pdf, int endOfDocument) throws VerificationFilterException { try { log.debug("Extracting text from 0 to " + endOfDocument + " (total document size = " + pdf.getLength() + "):"); String extractedText = extractText(pdf, endOfDocument); log.debug("Extracting text finished."); log.debug("Extracting signatures:"); List extractedSignatures = extractNewSignaturesFromText(extractedText); log.debug("Extracting signatures finished."); return extractedSignatures; } catch (PresentableException e) { throw new VerificationFilterException(e); } } protected String determineRestText(List newSignatures, String extractedText) { if (newSignatures.isEmpty()) { return extractedText; } // note that even if the oldest signature is a binary signature, // the rest text is the text of this binary signature, which was extracted // like a text signature. TextualSignatureHolder oldestSignature = (TextualSignatureHolder) newSignatures.get(0); return oldestSignature.getSignedText(); } protected List extractSignaturesFromPartition(PdfDataSource pdf, Partition partition) throws VerificationFilterException { assert partition.isTextPartition(); int endOfDocument = VerificationFilterHelper.getEndOfPartition(partition); return extractSignatures(pdf, endOfDocument); } protected SignaturesAndOld extractSignaturesFromPartitionAndOld(PdfDataSource pdf, Partition partition) throws VerificationFilterException { assert partition.isTextPartition(); try { int endOfDocument = VerificationFilterHelper.getEndOfPartition(partition); log.debug("Extracting text from 0 to " + endOfDocument + " (total document size = " + pdf.getLength() + "):"); String extractedText = extractText(pdf, endOfDocument); log.debug("Extracting text finished."); SignaturesAndOld sao = extractSignaturesAndOld(extractedText); return sao; } catch (PresentableException e) { throw new VerificationFilterException(e); } } protected static class SignaturesAndOld { public List newSignatures = null; public SignatureHolder oldSignature = null; } protected SignaturesAndOld extractSignaturesAndOld(String text) throws VerificationFilterException { try { log.debug("Extracting signatures:"); List extractedSignatures = extractNewSignaturesFromText(text); log.debug("Extracting signatures finished."); log.debug("Extracting old signatures:"); SignatureHolder oldSignature = extractOldSignature(text, extractedSignatures); log.debug("Extracting old signatures finished."); SignaturesAndOld sao = new SignaturesAndOld(); sao.newSignatures = extractedSignatures; sao.oldSignature = oldSignature; return sao; } catch (PresentableException e) { throw new VerificationFilterException(e); } } /** * Extracts the old signature from the text, but only if it is older than the * oldest signature of the new signatueres. * * @param extractedText * @param newSignatures * @return * @throws PDFDocumentException * @throws SignatureException * @throws NormalizeException * @throws SignatureTypesException */ protected SignatureHolder extractOldSignature(String extractedText, List newSignatures) throws PDFDocumentException, SignatureException, NormalizeException, SignatureTypesException { SignatureHolder oldSignature = null; String restText = determineRestText(newSignatures, extractedText); List oldSignatures = PdfAS.extractSignatureHoldersTextual(restText, true); if (!oldSignatures.isEmpty()) { oldSignature = (SignatureHolder) oldSignatures.get(0); if (!newSignatures.isEmpty()) { SignatureHolder oldestNewSignature = (SignatureHolder) newSignatures.get(0); EGIZDate oldDate = EGIZDate.parseFromString(oldSignature.getSignatureObject().getSignationDate()); EGIZDate newDate = EGIZDate.parseFromString(oldestNewSignature.getSignatureObject().getSignationDate()); if (newDate.compareTo(oldDate) <= 0) { oldSignature = null; } } } return oldSignature; } }