aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java')
-rw-r--r--src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java575
1 files changed, 575 insertions, 0 deletions
diff --git a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java
new file mode 100644
index 0000000..981b868
--- /dev/null
+++ b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java
@@ -0,0 +1,575 @@
+/**
+ *
+ */
+package at.gv.egiz.pdfas.impl.vfilter;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import at.gv.egiz.pdfas.exceptions.framework.VerificationFilterException;
+import at.gv.egiz.pdfas.framework.SignatureHolderHelper;
+import at.gv.egiz.pdfas.framework.VerificatorFactory;
+import at.gv.egiz.pdfas.framework.input.PdfDataSource;
+import at.gv.egiz.pdfas.framework.input.TextDataSource;
+import at.gv.egiz.pdfas.framework.verificator.Verificator;
+import at.gv.egiz.pdfas.framework.vfilter.VerificationFilter;
+import at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters;
+import at.gv.egiz.pdfas.impl.input.DelimitedInputStream;
+import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper;
+import at.gv.egiz.pdfas.impl.vfilter.helper.VerificationFilterBinaryHelper;
+import at.gv.egiz.pdfas.impl.vfilter.helper.VerificationFilterHelper;
+import at.gv.egiz.pdfas.impl.vfilter.partition.BinaryPartition;
+import at.gv.egiz.pdfas.impl.vfilter.partition.TextPartition;
+import at.knowcenter.wag.egov.egiz.PdfAS;
+import at.knowcenter.wag.egov.egiz.PdfASID;
+import at.knowcenter.wag.egov.egiz.exceptions.NormalizeException;
+import at.knowcenter.wag.egov.egiz.exceptions.PDFDocumentException;
+import at.knowcenter.wag.egov.egiz.exceptions.PresentableException;
+import at.knowcenter.wag.egov.egiz.exceptions.SignatureException;
+import at.knowcenter.wag.egov.egiz.exceptions.SignatureTypesException;
+import at.knowcenter.wag.egov.egiz.pdf.AbsoluteTextSignature;
+import at.knowcenter.wag.egov.egiz.pdf.EGIZDate;
+import at.knowcenter.wag.egov.egiz.pdf.SignatureHolder;
+import at.knowcenter.wag.egov.egiz.pdf.TextualSignatureHolder;
+import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult;
+
+/**
+ * @author wprinz
+ */
+public class VerificationFilterImpl implements VerificationFilter
+{
+
+ /**
+ * The log.
+ */
+ private static final Log log = LogFactory.getLog(VerificationFilterImpl.class);
+
+ /**
+ * @see at.gv.egiz.pdfas.framework.vfilter.VerificationFilter#extractSignatureHolders(at.gv.egiz.pdfas.framework.input.PdfDataSource,
+ * java.util.List,
+ * at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters)
+ */
+ public List extractSignatureHolders(final PdfDataSource pdf, List blocks, final VerificationFilterParameters parameters) throws VerificationFilterException
+ {
+ log.trace("extractSignaturHolders:");
+
+ if (log.isDebugEnabled())
+ {
+ log.debug("Original IU blocks: " + blocks.size());
+ debugIUBlocks(blocks);
+ }
+
+ unrollLinearization(blocks);
+
+ if (log.isDebugEnabled())
+ {
+ log.debug("IU blocks without linearization: " + blocks.size());
+ debugIUBlocks(blocks);
+ }
+
+ List signatureHolderChain = null;
+
+ if (parameters.extractBinarySignaturesOnly())
+ {
+ log.debug("Extracting only binary signatures. Binary-only mode.");
+
+ signatureHolderChain = performBinaryOnly(pdf, blocks);
+ }
+ else
+ {
+ List partitions = VerificationFilterHelper.partition(pdf, blocks);
+ if (log.isDebugEnabled())
+ {
+ debugPartitions(partitions);
+ }
+
+ if (parameters.assumeOnlySignatureUpdateBlocks())
+ {
+ log.debug("Assuming that there are only signature Incremental Update blocks. Semi-conservative mode.");
+
+ signatureHolderChain = performSemiConservative(pdf, parameters.scanForOldSignatures(), blocks, partitions);
+ }
+ else
+ {
+ log.debug("Scanning complete document. Conservative mode.");
+
+ signatureHolderChain = performFullConservative(pdf, parameters.scanForOldSignatures(), blocks, partitions);
+ }
+
+ }
+
+ log.trace("extractSignaturHolders finished.");
+ return signatureHolderChain;
+ }
+
+ /**
+ * @see at.gv.egiz.pdfas.framework.vfilter.VerificationFilter#extractSignaturHolders(at.gv.egiz.pdfas.framework.input.TextDataSource,
+ * at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters)
+ */
+ public List extractSignaturHolders(TextDataSource text, VerificationFilterParameters parameters) throws VerificationFilterException
+ {
+ if (parameters.extractBinarySignaturesOnly())
+ {
+ log
+ .warn("A free text signature extraction was issued although the VerificationFilter was configured to detect only binary signatures (binary-only mode). The result is of course that no signatures can be found.");
+
+ return new ArrayList();
+ }
+
+ String freetext = text.getText();
+ String normalizedText = normalizeText(freetext);
+
+ List foundSignatures = null;
+ if (parameters.scanForOldSignatures())
+ {
+ log.debug("Extracting old and new signatures from text.");
+
+ foundSignatures = extractNewAndOldSignaturesFromText(normalizedText);
+ }
+ else
+ {
+ log.debug("Extracting new signatures from text (not extracting old ones).");
+
+ foundSignatures = extractNewSignaturesFromText(normalizedText);
+ }
+
+ List textOnlySignatures = filterOutBinarySignatures(foundSignatures);
+
+ return textOnlySignatures;
+ }
+
+ protected String normalizeText(String freetext) throws VerificationFilterException
+ {
+ try
+ {
+ return PdfAS.normalizeText(freetext);
+ }
+ catch (NormalizeException e)
+ {
+ throw new VerificationFilterException(e);
+ }
+ }
+
+ /**
+ * Removes the linearization footer from the list of update blocks.
+ *
+ * @param blocks
+ * The list of FooterParseResult objects in \prev order.
+ */
+ protected void unrollLinearization(List blocks)
+ {
+ int linearization_index = -1;
+ for (int i = 0; i < blocks.size(); i++)
+ {
+ FooterParseResult bpr = (FooterParseResult) blocks.get(i);
+
+ if (bpr.sxpr.xref_index == 0)
+ {
+ if (linearization_index >= 0)
+ {
+ throw new RuntimeException("There is more than one linearization block! index = " + i);
+ }
+ linearization_index = i;
+ }
+ }
+
+ if (linearization_index >= 0)
+ {
+ // logger_.debug("The document is linearized - unrolling
+ // linearization block " + linearization_index);
+ blocks.remove(linearization_index);
+ }
+ }
+
+ protected List performBinaryOnly(PdfDataSource pdf, List blocks) throws VerificationFilterException
+ {
+ return extractBinarySignaturesOnly(pdf, blocks);
+ }
+
+ protected List performSemiConservative(PdfDataSource pdf, boolean scanForOldSignatures, List blocks, List partitions) throws VerificationFilterException
+ {
+ List binarySignatures = extractBinarySignaturesOnly(pdf, blocks);
+
+ TextPartition lastTextPartition = VerificationFilterHelper.findLastTextPartition(partitions);
+ List extractedSignatures = null;
+ if (scanForOldSignatures)
+ {
+ SignaturesAndOld sao = extractSignaturesFromPartitionAndOld(pdf, lastTextPartition);
+ extractedSignatures = sao.newSignatures;
+ if (sao.oldSignature != null)
+ {
+ extractedSignatures.add(0, sao.oldSignature);
+ }
+ }
+ else
+ {
+ extractedSignatures = extractSignaturesFromPartition(pdf, lastTextPartition);
+ }
+
+ List signatureHolderChain = intermingleSignatures(binarySignatures, extractedSignatures);
+
+ return signatureHolderChain;
+ }
+
+ protected List performFullConservative(PdfDataSource pdf, boolean scanForOldSignatures, List blocks, List partitions) throws VerificationFilterException
+ {
+ List binarySignatures = extractBinarySignaturesOnly(pdf, blocks);
+
+ SignatureHolder oldSignature = null;
+
+ List partitionResults = new ArrayList(partitions.size());
+ for (int i = 0; i < partitions.size(); i++)
+ {
+ Partition p = (Partition) partitions.get(i);
+
+ if (p instanceof TextPartition)
+ {
+ TextPartition tp = (TextPartition) p;
+
+ List partitionResult = null;
+
+ boolean scanThisPartitionForOldSignature = (i == 0) && scanForOldSignatures;
+ if (scanThisPartitionForOldSignature)
+ {
+ SignaturesAndOld sao = extractSignaturesFromPartitionAndOld(pdf, tp);
+ partitionResult = sao.newSignatures;
+ oldSignature = sao.oldSignature;
+ }
+ else
+ {
+ partitionResult = extractSignaturesFromPartition(pdf, tp);
+ }
+
+ partitionResults.add(partitionResult);
+ }
+ }
+
+ List extractedSignatures = new ArrayList();
+ Iterator it = partitionResults.iterator();
+ List prevPartitionResult = null;
+ while (it.hasNext())
+ {
+ List partitionResult = (List) it.next();
+
+ if (prevPartitionResult == null)
+ {
+ extractedSignatures.addAll(partitionResult);
+ }
+ else
+ {
+ assert partitionResult.size() >= prevPartitionResult.size();
+
+ for (int i = prevPartitionResult.size(); i < partitionResult.size(); i++)
+ {
+ SignatureHolder sh = (SignatureHolder) partitionResult.get(i);
+ extractedSignatures.add(sh);
+ }
+ }
+
+ prevPartitionResult = partitionResult;
+ }
+
+ List signatureHolderChain = intermingleSignatures(binarySignatures, extractedSignatures);
+
+ if (oldSignature != null)
+ {
+ signatureHolderChain.add(0, oldSignature);
+ }
+
+ return signatureHolderChain;
+ }
+
+ protected String extractText(PdfDataSource pdf, int endOfDocument) throws PresentableException
+ {
+ DelimitedInputStream dis = new DelimitedInputStream(pdf.createInputStream(), endOfDocument);
+ return PdfAS.extractNormalizedTextTextual(dis);
+ }
+
+ protected List extractNewSignaturesFromText(String text) throws VerificationFilterException
+ {
+ try
+ {
+ return AbsoluteTextSignature.extractSignatureHoldersFromText(text);
+ }
+ catch (PresentableException e)
+ {
+ throw new VerificationFilterException(e);
+ }
+ }
+
+ protected List extractNewAndOldSignaturesFromText(String text) throws VerificationFilterException
+ {
+ SignaturesAndOld sao = extractSignaturesAndOld(text);
+ if (sao.oldSignature != null)
+ {
+ sao.newSignatures.add(0, sao.oldSignature);
+ }
+
+ return sao.newSignatures;
+ }
+
+ protected List extractOldSignaturesFromText(String text) throws PresentableException
+ {
+ return PdfAS.extractSignatureHoldersTextual(text, true);
+ }
+
+ protected List intermingleSignatures(List binarySignatures, List extractedSignatures)
+ {
+ List textualSignatures = filterOutBinarySignatures(extractedSignatures);
+
+ List intermingled = new ArrayList(binarySignatures.size() + textualSignatures.size());
+ intermingled.addAll(binarySignatures);
+ intermingled.addAll(textualSignatures);
+
+ sortSignatures(intermingled);
+
+ return intermingled;
+ }
+
+ protected List filterOutBinarySignatures(List signatures)
+ {
+ List textOnly = new ArrayList(signatures.size());
+
+ Iterator it = signatures.iterator();
+ while (it.hasNext())
+ {
+ SignatureHolder sh = (SignatureHolder) it.next();
+ if (sh.getSignatureObject().isTextual())
+ {
+ textOnly.add(sh);
+ }
+ }
+
+ return textOnly;
+ }
+
+ protected void sortSignatures(List signatures)
+ {
+ SignatureHolderHelper.sortByDate(signatures);
+ }
+
+ protected void debugIUBlocks(List blocks)
+ {
+ Iterator it = blocks.iterator();
+ while (it.hasNext())
+ {
+ FooterParseResult fpr = (FooterParseResult) it.next();
+ log.debug("footer: " + fpr.start_index + " to " + fpr.next_index + ", has predecessor = " + fpr.tpr.has_predecessor);
+ }
+ }
+
+ protected void debugPartitions(List partitions)
+ {
+ Iterator it = partitions.iterator();
+ while (it.hasNext())
+ {
+ Object o = it.next();
+ assert o instanceof Partition;
+
+ List blocks = null;
+ if (o instanceof TextPartition)
+ {
+ TextPartition tp = (TextPartition) o;
+
+ blocks = tp.blocks;
+
+ log.debug("text partition with " + tp.blocks.size() + " blocks:");
+ }
+ else
+ {
+ BinaryPartition bp = (BinaryPartition) o;
+
+ blocks = bp.blocks;
+
+ log.debug("binary partition: with " + bp.blocks.size() + " blocks:");
+
+ }
+ debugIUBlocks(blocks);
+ log.debug("partition finished.");
+ }
+ }
+
+ /**
+ * Extracts the binary singatures from the given PDF.
+ *
+ * <p>
+ * IU blocks without an egiz dict are not considered.
+ * </p>
+ *
+ * @param pdf
+ * @param blocks
+ * @return Returns the List of signature holders.
+ * @throws PresentableException
+ */
+ protected List extractBinarySignaturesOnly(PdfDataSource pdf, List blocks) throws VerificationFilterException
+ {
+ try
+ {
+ // PERF: extract binary signatures needs byte array
+ byte[] data = DataSourceHelper.convertDataSourceToByteArray(pdf);
+
+ List binarySignatures = new ArrayList(blocks.size());
+
+ Iterator it = blocks.iterator();
+ int prev_end = 0;
+ while (it.hasNext())
+ {
+ FooterParseResult fpr = (FooterParseResult) it.next();
+ assert fpr.next_index > prev_end;
+
+ if (VerificationFilterBinaryHelper.containsEGIZDict(data, fpr))
+ {
+ PdfASID kz = VerificationFilterBinaryHelper.extractKZFromEGIZBlock(data, fpr);
+
+ Verificator verificator = VerificatorFactory.createBinaryVerificator(kz);
+ List binary_holders = verificator.parseBlock(pdf, data, fpr, prev_end);
+
+ binarySignatures.addAll(binary_holders);
+ }
+
+ prev_end = fpr.next_index;
+ }
+
+ return binarySignatures;
+ }
+ catch (PresentableException e)
+ {
+ throw new VerificationFilterException(e);
+ }
+ }
+
+ protected List extractSignatures(PdfDataSource pdf, int endOfDocument) throws VerificationFilterException
+ {
+ try
+ {
+ log.debug("Extracting text from 0 to " + endOfDocument + " (total document size = " + pdf.getLength() + "):");
+ String extractedText = extractText(pdf, endOfDocument);
+ log.debug("Extracting text finished.");
+
+ log.debug("Extracting signatures:");
+ List extractedSignatures = extractNewSignaturesFromText(extractedText);
+ log.debug("Extracting signatures finished.");
+
+ return extractedSignatures;
+ }
+ catch (PresentableException e)
+ {
+ throw new VerificationFilterException(e);
+ }
+ }
+
+ protected String determineRestText(List newSignatures, String extractedText)
+ {
+ if (newSignatures.isEmpty())
+ {
+ return extractedText;
+ }
+
+ // note that even if the oldest signature is a binary signature,
+ // the rest text is the text of this binary signature, which was extracted
+ // like a text signature.
+ TextualSignatureHolder oldestSignature = (TextualSignatureHolder) newSignatures.get(0);
+ return oldestSignature.getSignedText();
+ }
+
+ protected List extractSignaturesFromPartition(PdfDataSource pdf, Partition partition) throws VerificationFilterException
+ {
+ assert partition.isTextPartition();
+
+ int endOfDocument = VerificationFilterHelper.getEndOfPartition(partition);
+ return extractSignatures(pdf, endOfDocument);
+ }
+
+ protected SignaturesAndOld extractSignaturesFromPartitionAndOld(PdfDataSource pdf, Partition partition) throws VerificationFilterException
+ {
+ assert partition.isTextPartition();
+
+ try
+ {
+ int endOfDocument = VerificationFilterHelper.getEndOfPartition(partition);
+
+ log.debug("Extracting text from 0 to " + endOfDocument + " (total document size = " + pdf.getLength() + "):");
+ String extractedText = extractText(pdf, endOfDocument);
+ log.debug("Extracting text finished.");
+
+ SignaturesAndOld sao = extractSignaturesAndOld(extractedText);
+
+ return sao;
+ }
+ catch (PresentableException e)
+ {
+ throw new VerificationFilterException(e);
+ }
+ }
+
+ protected static class SignaturesAndOld
+ {
+ public List newSignatures = null;
+
+ public SignatureHolder oldSignature = null;
+ }
+
+ protected SignaturesAndOld extractSignaturesAndOld(String text) throws VerificationFilterException
+ {
+ try
+ {
+ log.debug("Extracting signatures:");
+ List extractedSignatures = extractNewSignaturesFromText(text);
+ log.debug("Extracting signatures finished.");
+
+ log.debug("Extracting old signatures:");
+ SignatureHolder oldSignature = extractOldSignature(text, extractedSignatures);
+ log.debug("Extracting old signatures finished.");
+
+ SignaturesAndOld sao = new SignaturesAndOld();
+ sao.newSignatures = extractedSignatures;
+ sao.oldSignature = oldSignature;
+
+ return sao;
+ }
+ catch (PresentableException e)
+ {
+ throw new VerificationFilterException(e);
+ }
+ }
+
+ /**
+ * Extracts the old signature from the text, but only if it is older than the
+ * oldest signature of the new signatueres.
+ *
+ * @param extractedText
+ * @param newSignatures
+ * @return
+ * @throws PDFDocumentException
+ * @throws SignatureException
+ * @throws NormalizeException
+ * @throws SignatureTypesException
+ */
+ protected SignatureHolder extractOldSignature(String extractedText, List newSignatures) throws PDFDocumentException, SignatureException, NormalizeException, SignatureTypesException
+ {
+ SignatureHolder oldSignature = null;
+
+ String restText = determineRestText(newSignatures, extractedText);
+
+ List oldSignatures = PdfAS.extractSignatureHoldersTextual(restText, true);
+ if (!oldSignatures.isEmpty())
+ {
+ oldSignature = (SignatureHolder) oldSignatures.get(0);
+ if (!newSignatures.isEmpty())
+ {
+ SignatureHolder oldestNewSignature = (SignatureHolder) newSignatures.get(0);
+ EGIZDate oldDate = EGIZDate.parseFromString(oldSignature.getSignatureObject().getSignationDate());
+ EGIZDate newDate = EGIZDate.parseFromString(oldestNewSignature.getSignatureObject().getSignationDate());
+ if (newDate.compareTo(oldDate) <= 0)
+ {
+ oldSignature = null;
+ }
+ }
+ }
+ return oldSignature;
+ }
+}