aboutsummaryrefslogtreecommitdiff
path: root/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter
diff options
context:
space:
mode:
Diffstat (limited to 'pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter')
-rw-r--r--pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/Partition.java29
-rw-r--r--pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java964
-rw-r--r--pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterParametersImpl.java98
-rw-r--r--pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterBinaryHelper.java190
-rw-r--r--pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java162
-rw-r--r--pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterTextHelper.java35
-rw-r--r--pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/partition/BinaryPartition.java39
-rw-r--r--pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/partition/TextPartition.java40
8 files changed, 1557 insertions, 0 deletions
diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/Partition.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/Partition.java
new file mode 100644
index 0000000..f4e91bd
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/Partition.java
@@ -0,0 +1,29 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ */
+package at.gv.egiz.pdfas.impl.vfilter;
+
+public interface Partition
+{
+ public boolean isTextPartition();
+}
diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java
new file mode 100644
index 0000000..3f0f482
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java
@@ -0,0 +1,964 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ */
+package at.gv.egiz.pdfas.impl.vfilter;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.lang.time.StopWatch;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import at.gv.egiz.pdfas.exceptions.ErrorCode;
+import at.gv.egiz.pdfas.exceptions.framework.VerificationFilterException;
+import at.gv.egiz.pdfas.framework.SignatureHolderHelper;
+import at.gv.egiz.pdfas.framework.VerificatorFactory;
+import at.gv.egiz.pdfas.framework.input.PdfDataSource;
+import at.gv.egiz.pdfas.framework.input.TextDataSource;
+import at.gv.egiz.pdfas.framework.verificator.Verificator;
+import at.gv.egiz.pdfas.framework.vfilter.VerificationFilter;
+import at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters;
+import at.gv.egiz.pdfas.impl.input.DelimitedPdfDataSource;
+import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper;
+import at.gv.egiz.pdfas.impl.vfilter.helper.VerificationFilterBinaryHelper;
+import at.gv.egiz.pdfas.impl.vfilter.helper.VerificationFilterHelper;
+import at.gv.egiz.pdfas.impl.vfilter.partition.BinaryPartition;
+import at.gv.egiz.pdfas.impl.vfilter.partition.TextPartition;
+import at.knowcenter.wag.egov.egiz.PdfAS;
+import at.knowcenter.wag.egov.egiz.PdfASID;
+import at.knowcenter.wag.egov.egiz.cfg.SettingsReader;
+import at.knowcenter.wag.egov.egiz.exceptions.NormalizeException;
+import at.knowcenter.wag.egov.egiz.exceptions.PDFDocumentException;
+import at.knowcenter.wag.egov.egiz.exceptions.PresentableException;
+import at.knowcenter.wag.egov.egiz.exceptions.SettingsException;
+import at.knowcenter.wag.egov.egiz.exceptions.SignatureException;
+import at.knowcenter.wag.egov.egiz.exceptions.SignatureTypesException;
+import at.knowcenter.wag.egov.egiz.pdf.AbsoluteTextSignature;
+import at.knowcenter.wag.egov.egiz.pdf.EGIZDate;
+import at.knowcenter.wag.egov.egiz.pdf.NoSignatureHolder;
+import at.knowcenter.wag.egov.egiz.pdf.SignatureHolder;
+import at.knowcenter.wag.egov.egiz.pdf.TextualSignatureHolder;
+import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult;
+
+/**
+ * @author wprinz
+ */
+public class VerificationFilterImpl implements VerificationFilter
+{
+
+ /**
+ * The log.
+ */
+ private static final Log log = LogFactory.getLog(VerificationFilterImpl.class);
+
+
+ public static final String CHECK_DOCUMENT = "check_document";
+ public static final String SUPRESS_EXCEPTION_WHEN_LAST_UIBLOCK_IS_NO_SIGNATURE = "supress_exception_when_last_iublock_is_no_signature";
+ public static final String BINARY_ONLY = "binary_only";
+ public static final String ASSUME_ONLY_SIGNATURE_BLOCKS = "assume_only_signature_blocks";
+
+ /**
+ * @see at.gv.egiz.pdfas.framework.vfilter.VerificationFilter#extractSignatureHolders(at.gv.egiz.pdfas.framework.input.PdfDataSource,
+ * java.util.List,
+ * at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters)
+ */
+ public List extractSignatureHolders(final PdfDataSource pdf, List blocks, final VerificationFilterParameters parameters) throws VerificationFilterException
+ {
+ log.trace("extractSignaturHolders:");
+ StopWatch sw = new StopWatch();
+ sw.start();
+
+ if (log.isDebugEnabled())
+ {
+ log.debug("Original IU blocks: " + blocks.size());
+ debugIUBlocks(blocks);
+ }
+
+ unrollLinearization(blocks);
+
+ if (log.isDebugEnabled())
+ {
+ log.debug("IU blocks without linearization: " + blocks.size());
+ debugIUBlocks(blocks);
+ }
+
+
+ SettingsReader settings;
+ try {
+ settings = SettingsReader.getInstance();
+ } catch (SettingsException e) {
+ throw new VerificationFilterException(e);
+ }
+ String check_doc = settings.getSetting(CHECK_DOCUMENT, "false");
+
+ // check document for textual sigs here here if binary_only is set
+ if ("true".equalsIgnoreCase(check_doc) &&
+ parameters.extractBinarySignaturesOnly()) {
+
+ checkBinaryOnly(pdf, parameters.scanForOldSignatures());
+ log.debug("checkDocument: " + sw.getTime() + "ms.");
+ } else {
+ log.debug("Skipping checkDocument for textual sigs.");
+ }
+ // end add
+
+ List signatureHolderChain = null;
+
+ if (parameters.extractBinarySignaturesOnly())
+ {
+ log.debug("Extracting only binary signatures. Binary-only mode.");
+
+ signatureHolderChain = performBinaryOnly(pdf, blocks);
+ }
+ else
+ {
+ List partitions = VerificationFilterHelper.partition(pdf, blocks);
+ if (log.isDebugEnabled())
+ {
+ debugPartitions(partitions);
+ }
+
+ if (parameters.assumeOnlySignatureUpdateBlocks())
+ {
+ log.debug("Assuming that there are only signature Incremental Update blocks. Semi-conservative mode.");
+
+ signatureHolderChain = performSemiConservative(pdf, parameters.scanForOldSignatures(), blocks, partitions);
+ }
+ else
+ {
+ log.debug("Scanning complete document. Conservative mode.");
+
+ signatureHolderChain = performFullConservative(pdf, parameters.scanForOldSignatures(), blocks, partitions);
+ }
+
+ }
+
+ log.trace("extractSignaturHolders finished (" + (signatureHolderChain != null ? signatureHolderChain.size() : 0) + " elements).");
+ sw.stop();
+ log.debug("extractSignatureHolders: " + sw.getTime() + "ms.");
+
+ return signatureHolderChain;
+ }
+
+ /**
+ * @see at.gv.egiz.pdfas.framework.vfilter.VerificationFilter#extractSignaturHolders(at.gv.egiz.pdfas.framework.input.TextDataSource,
+ * at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters)
+ */
+ public List extractSignaturHolders(TextDataSource text, VerificationFilterParameters parameters) throws VerificationFilterException
+ {
+ if (parameters.extractBinarySignaturesOnly())
+ {
+ log
+ .warn("A free text signature extraction was issued although the VerificationFilter was configured to detect only binary signatures (binary-only mode). The result is of course that no signatures can be found.");
+
+ return new ArrayList();
+ }
+
+ String freetext = text.getText();
+ String normalizedText = normalizeText(freetext);
+
+ List foundSignatures = null;
+ if (parameters.scanForOldSignatures())
+ {
+ log.debug("Extracting old and new signatures from text.");
+
+ foundSignatures = extractNewAndOldSignaturesFromText(normalizedText);
+ }
+ else
+ {
+ log.debug("Extracting new signatures from text (not extracting old ones).");
+
+ foundSignatures = extractNewSignaturesFromText(normalizedText);
+ }
+
+ List textOnlySignatures = filterOutBinarySignatures(foundSignatures);
+
+ return textOnlySignatures;
+ }
+
+ protected String normalizeText(String freetext) throws VerificationFilterException
+ {
+ try
+ {
+ return PdfAS.normalizeText(freetext);
+ }
+ catch (NormalizeException e)
+ {
+ throw new VerificationFilterException(e);
+ }
+ }
+
+ /**
+ * Removes the linearization footer from the list of update blocks.
+ *
+ * @param blocks
+ * The list of FooterParseResult objects in \prev order.
+ */
+ protected void unrollLinearization(List blocks)
+ {
+ int linearization_index = -1;
+ for (int i = 0; i < blocks.size(); i++)
+ {
+ FooterParseResult bpr = (FooterParseResult) blocks.get(i);
+
+ if (bpr.sxpr.xref_index == 0)
+ {
+ if (linearization_index >= 0)
+ {
+ throw new RuntimeException("There is more than one linearization block! index = " + i);
+ }
+ linearization_index = i;
+ }
+ }
+
+ if (linearization_index >= 0)
+ {
+// logger_.debug("The document is linearized - unrolling
+// linearization block " + linearization_index);
+ blocks.remove(linearization_index);
+ }
+ }
+
+ protected List performBinaryOnly(PdfDataSource pdf, List blocks) throws VerificationFilterException
+ {
+ return extractBinarySignaturesOnly(pdf, blocks);
+ }
+
+ protected List performSemiConservative(PdfDataSource pdf, boolean scanForOldSignatures, List blocks, List partitions) throws VerificationFilterException
+ {
+ log.debug("perform semiConservative()...");
+ List binarySignatures = extractBinarySignaturesOnly(pdf, blocks);
+
+ log.debug("determining last partition...");
+ TextPartition lastTextPartition = VerificationFilterHelper.findLastTextPartition(partitions);
+ List extractedSignatures = null;
+ if (scanForOldSignatures)
+ {
+ SignaturesAndOld sao = extractSignaturesFromPartitionAndOld(pdf, lastTextPartition);
+ extractedSignatures = sao.newSignatures;
+ if (sao.oldSignature != null)
+ {
+ extractedSignatures.add(0, sao.oldSignature);
+ }
+ }
+ else
+ {
+ log.debug("extracting signatures from last partition...");
+ extractedSignatures = extractSignaturesFromPartition(pdf, lastTextPartition);
+ }
+
+
+ List signatureHolderChain = intermingleSignatures(binarySignatures, extractedSignatures);
+
+ return signatureHolderChain;
+ }
+
+ protected List performFullConservative(PdfDataSource pdf, boolean scanForOldSignatures, List blocks, List partitions) throws VerificationFilterException
+ {
+ List binarySignatures = extractBinarySignaturesOnly(pdf, blocks);
+
+ // extract signature values of found binary signature blocks and store these values in a Set
+ // this set is later used to filter out the binary signatures that are recognized as text
+ // signatures.
+ Set binarySigValues = new HashSet();
+ Iterator iterator = binarySignatures.iterator();
+ while(iterator.hasNext()) {
+
+ SignatureHolder sh = (SignatureHolder)iterator.next();
+
+ String sigVal = sh.getSignatureObject().getSignationValue();
+ binarySigValues.add(sigVal);
+ }
+
+ SignatureHolder oldSignature = null;
+
+ //List originalPartitions = partitions;
+ // This gives every IU block an own text partition
+ // This allows text signatures to be found correctly if there are
+ // IU blocks with disturbing text after them.
+ // On the other hand, these requires extra text extractions and
+ // signature searches and thereby is slow.
+ List flattedOutPartitions = flattenOutTextPartitions(partitions, blocks);
+ partitions = flattedOutPartitions;
+
+ SettingsReader settings;
+ try {
+ settings = SettingsReader.getInstance();
+ } catch (SettingsException e) {
+ throw new VerificationFilterException(e);
+ }
+ String check_doc = settings.getSetting(CHECK_DOCUMENT, "false");
+ boolean supressException = "true".equalsIgnoreCase(settings.getSetting(SUPRESS_EXCEPTION_WHEN_LAST_UIBLOCK_IS_NO_SIGNATURE, "false"));
+
+ // flag indicating that the last IU-block of the document is a non-signature IU-block
+ boolean lastBlockWasModified = false;
+
+ // counter of all signatures (textual and binary) of this document
+ int signatureCounter = 0;
+
+ // counter of all textual signatures in this document
+ int txtSigsSoFar = 0;
+
+ // counter of all textual signatures in the current partition
+ int txtSigsThisPartition = 0;
+
+ List partitionResults = new ArrayList(partitions.size());
+ List nshList = new ArrayList();
+
+ boolean sigFound = false;
+
+ for (int i = 0; i < partitions.size(); i++)
+ {
+ Partition p = (Partition) partitions.get(i);
+
+ // updating flag and counter
+ boolean partitionContainsNewTextSignatures = true;
+ txtSigsSoFar = txtSigsThisPartition;
+
+ if (p instanceof TextPartition)
+ {
+ TextPartition tp = (TextPartition) p;
+
+ List partitionResult = null;
+
+ boolean scanThisPartitionForOldSignature = (i == 0) && scanForOldSignatures;
+ if (scanThisPartitionForOldSignature)
+ {
+ SignaturesAndOld sao = extractSignaturesFromPartitionAndOld(pdf, tp);
+ partitionResult = sao.newSignatures;
+ oldSignature = sao.oldSignature;
+ }
+ else
+ {
+ partitionResult = extractSignaturesFromPartition(pdf, tp);
+ }
+
+ // binary signature blocks that have been detected as well are identified by comparing their signature values
+ // with those stored in our Set above and are not considered for our IU-check
+ List onlyTextSignatures = new ArrayList();
+ Iterator iter = partitionResult.iterator();
+ while(iter.hasNext()) {
+
+ SignatureHolder sh = (SignatureHolder)iter.next();
+ if(!binarySigValues.contains(sh.getSignatureObject().getSignationValue())) {
+
+ onlyTextSignatures.add(sh);
+ }
+ }
+
+ // update signature counters
+ txtSigsThisPartition = onlyTextSignatures.size();
+ int newTextSignatures = txtSigsThisPartition - txtSigsSoFar;
+ signatureCounter = signatureCounter + newTextSignatures;
+
+ // update sigFound flag
+ if(txtSigsThisPartition > 0) {
+
+ sigFound = true;
+ }
+
+ // TextPartition is only valid, if at least one more text signature has been found than in the previous text partition
+ if(!(newTextSignatures > 0)) {
+
+ partitionContainsNewTextSignatures = false;
+ }
+
+ partitionResults.add(partitionResult);
+ } else {
+ // should be binary partition
+ if(p instanceof BinaryPartition) {
+
+ BinaryPartition binpart = (BinaryPartition)p;
+
+ // updating counter and flag
+ signatureCounter = signatureCounter + binpart.blocks.size();
+ sigFound = true;
+
+ }
+ }
+
+ // if document checking is enabled, at least one signature has been found so far, we are dealing with a
+ // non-signature IU-block
+ if ((check_doc.equalsIgnoreCase("true"))&& (sigFound && !partitionContainsNewTextSignatures)) {
+
+ nshList.add(new NoSignatureHolder(signatureCounter));
+ lastBlockWasModified = true;
+
+ } else {
+
+ lastBlockWasModified = false;
+ }
+
+ }
+
+ // throw an exception if the last update block does not contain a signature and signatures have been found in this document
+ if (lastBlockWasModified) {
+ if (!supressException) {
+ throw new VerificationFilterException(ErrorCode.MODIFIED_AFTER_SIGNATION, "The document has been modified after being signed.");
+ } else {
+ log.debug("The document has been modified after being signed. According to the configuration, no exception is thrown.");
+ }
+ }
+
+ List extractedSignatures = new ArrayList();
+ Iterator it = partitionResults.iterator();
+ List prevPartitionResult = null;
+ while (it.hasNext())
+ {
+ List partitionResult = (List) it.next();
+
+ if (prevPartitionResult == null)
+ {
+ extractedSignatures.addAll(partitionResult);
+ }
+ else
+ {
+ assert partitionResult.size() >= prevPartitionResult.size();
+
+// for (int i = prevPartitionResult.size(); i < partitionResult.size(); i++)
+// {
+// SignatureHolder sh = (SignatureHolder) partitionResult.get(i);
+// extractedSignatures.add(sh);
+// }
+ mergeSignatures(prevPartitionResult, partitionResult, extractedSignatures);
+ }
+
+ prevPartitionResult = partitionResult;
+ }
+
+ List signatureHolderChain = intermingleSignatures(binarySignatures, extractedSignatures);
+
+ if (oldSignature != null)
+ {
+ signatureHolderChain.add(0, oldSignature);
+ }
+
+ // add the created NoSignatureHolders
+ signatureHolderChain.addAll(nshList);
+
+ return signatureHolderChain;
+ }
+
+ private void mergeSignatures(List oldList, List newList, List result) {
+
+ for(int i=0; i < newList.size(); i++) {
+
+ SignatureHolder currentNewSh = (SignatureHolder)newList.get(i);
+
+ boolean shAlreadyPresentInOldList = false;
+ int pos = -1;
+
+ for(int j=0; j<oldList.size(); j++) {
+
+ SignatureHolder currentOldSh = (SignatureHolder)oldList.get(j);
+
+ if(currentNewSh.getSignatureObject().getSignationValue().equals(currentOldSh.getSignatureObject().getSignationValue())) {
+
+ shAlreadyPresentInOldList = true;
+ pos = j;
+ }
+ }
+
+ if(!shAlreadyPresentInOldList) {
+
+ // signature holder has not been found earlier -> add
+ result.add(currentNewSh);
+ }
+
+ }
+
+
+ return;
+ }
+
+
+ protected List flattenOutTextPartitions (List partitions, List blocks)
+ {
+
+ List blockPartitions = new ArrayList(blocks.size());
+ Iterator it = partitions.iterator();
+ while (it.hasNext())
+ {
+ Partition p = (Partition)it.next();
+ if (p instanceof TextPartition)
+ {
+ TextPartition tp = (TextPartition)p;
+ Iterator blockIt = tp.blocks.iterator();
+ while (blockIt.hasNext())
+ {
+ FooterParseResult fpr = (FooterParseResult)blockIt.next();
+ TextPartition newPt = new TextPartition();
+ newPt.blocks = new ArrayList(1);
+ newPt.blocks.add(fpr);
+ blockPartitions.add(newPt);
+ }
+ }
+ else
+ {
+ // binary partition
+ blockPartitions.add(p);
+ }
+ }
+
+ // note: successive binary blocks are still combined to one binary partition
+ assert blockPartitions.size() <= blocks.size();
+
+ return blockPartitions;
+ }
+
+ protected String extractText(PdfDataSource pdf, int endOfDocument) throws PresentableException {
+ return extractText(pdf, endOfDocument, "utf8");
+ }
+
+ protected String extractText(PdfDataSource pdf, int endOfDocument, String encoding) throws PresentableException
+ {
+
+ log.debug("EXTRACTING TEXT (" + encoding + ")... end index = " + endOfDocument);
+
+ DelimitedPdfDataSource dds = new DelimitedPdfDataSource(pdf, endOfDocument);
+ //DelimitedInputStream dis = new DelimitedInputStream(pdf.createInputStream(), endOfDocument);
+ return PdfAS.extractNormalizedTextTextual(dds, encoding);
+ }
+
+
+ protected List extractNewSignaturesFromText(String text) throws VerificationFilterException
+ {
+ try
+ {
+ return AbsoluteTextSignature.extractSignatureHoldersFromText(text);
+ }
+ catch (PresentableException e)
+ {
+ throw new VerificationFilterException(e);
+ }
+ }
+
+ protected List extractNewAndOldSignaturesFromText(String text) throws VerificationFilterException
+ {
+ SignaturesAndOld sao = extractSignaturesAndOld(text);
+ if (sao.oldSignature != null)
+ {
+ sao.newSignatures.add(0, sao.oldSignature);
+ }
+
+ return sao.newSignatures;
+ }
+
+ protected List extractOldSignaturesFromText(String text) throws PresentableException
+ {
+ return PdfAS.extractSignatureHoldersTextual(text, true);
+ }
+
+ protected List intermingleSignatures(List binarySignatures, List extractedSignatures)
+ {
+ List textualSignatures = filterOutBinarySignatures(extractedSignatures);
+
+ List intermingled = new ArrayList(binarySignatures.size() + textualSignatures.size());
+ intermingled.addAll(binarySignatures);
+ intermingled.addAll(textualSignatures);
+
+ sortSignatures(intermingled);
+
+ return intermingled;
+ }
+
+ protected List filterOutBinarySignatures(List signatures)
+ {
+ List textOnly = new ArrayList(signatures.size());
+
+ Iterator it = signatures.iterator();
+ while (it.hasNext())
+ {
+ SignatureHolder sh = (SignatureHolder) it.next();
+ if (sh.getSignatureObject().isTextual())
+ {
+ textOnly.add(sh);
+ }
+ }
+
+ return textOnly;
+ }
+
+ protected void sortSignatures(List signatures)
+ {
+ SignatureHolderHelper.sortByDate(signatures);
+ }
+
+ protected void debugIUBlocks(List blocks)
+ {
+ Iterator it = blocks.iterator();
+ while (it.hasNext())
+ {
+ FooterParseResult fpr = (FooterParseResult) it.next();
+ log.debug("footer: " + fpr.start_index + " to " + fpr.next_index + ", has predecessor = " + fpr.tpr.has_predecessor);
+ }
+ }
+
+ protected void debugPartitions(List partitions)
+ {
+ Iterator it = partitions.iterator();
+ while (it.hasNext())
+ {
+ Object o = it.next();
+ assert o instanceof Partition;
+
+ List blocks = null;
+ if (o instanceof TextPartition)
+ {
+ TextPartition tp = (TextPartition) o;
+
+ blocks = tp.blocks;
+
+ log.debug("text partition with " + tp.blocks.size() + " blocks:");
+ }
+ else
+ {
+ BinaryPartition bp = (BinaryPartition) o;
+
+ blocks = bp.blocks;
+
+ log.debug("binary partition: with " + bp.blocks.size() + " blocks:");
+
+ }
+ debugIUBlocks(blocks);
+ log.debug("partition finished.");
+ }
+ }
+
+ /**
+ * Extracts the binary singatures from the given PDF.
+ *
+ * <p>
+ * IU blocks without an egiz dict are not considered.
+ * </p>
+ *
+ * @param pdf
+ * @param blocks
+ * @return Returns the List of signature holders.
+ * @throws PresentableException
+ */
+ protected List extractBinarySignaturesOnly(PdfDataSource pdf, List blocks) throws VerificationFilterException
+ {
+ SettingsReader settings;
+ try {
+ settings = SettingsReader.getInstance();
+ } catch (SettingsException e) {
+ throw new VerificationFilterException(e);
+ }
+ String check_doc = settings.getSetting(CHECK_DOCUMENT, "false");
+ String binary_only = settings.getSetting(BINARY_ONLY, "false");
+ String assume_sigs_only = settings.getSetting(ASSUME_ONLY_SIGNATURE_BLOCKS, "false");
+ boolean supressException = "true".equalsIgnoreCase(settings.getSetting(SUPRESS_EXCEPTION_WHEN_LAST_UIBLOCK_IS_NO_SIGNATURE, "false"));
+
+ try
+ {
+ // PERF: extract binary signatures needs byte array
+ byte[] data = DataSourceHelper.convertDataSourceToByteArray(pdf);
+
+ List binarySignatures = new ArrayList(blocks.size());
+
+ Iterator it = blocks.iterator();
+ int prev_end = 0;
+ boolean sig_detected = false;
+ while (it.hasNext())
+ {
+ FooterParseResult fpr = (FooterParseResult) it.next();
+ assert fpr.next_index > prev_end;
+
+ if (VerificationFilterBinaryHelper.containsEGIZDict(data, fpr))
+ {
+ PdfASID kz = VerificationFilterBinaryHelper.extractKZFromEGIZBlock(data, fpr);
+
+ // TODO dferbas hack baik test
+ //kz = new PdfASID("urn:pdfsigfilter:bka.gv.at:binaer:v1.1.0");
+
+ Verificator verificator = VerificatorFactory.createBinaryVerificator(kz);
+ List binary_holders = verificator.parseBlock(pdf, data, fpr, prev_end);
+
+ binarySignatures.addAll(binary_holders);
+ if(binary_holders.size() > 0) {
+ sig_detected = true;
+ }
+ } else {
+ // an Exception is thrown here if:
+ // 1) check_document is activated
+ // 2) assume_only_signature_blocks is false - otherwise we permit updates
+ // 3) binary_only is true - otherwise updates are handled in method performFullConservative().
+ // when binary-only is true, we can be sure that a block that contains no egiz-dict is no textual
+ // signature either but an illegal update, otherwise an Exception (doc contains textual sig) would have been thrown before
+ // 4) a binary signature has been detected in a previous block
+ if(check_doc.equalsIgnoreCase("true") &&
+ binary_only.equalsIgnoreCase("true") &&
+ assume_sigs_only.equalsIgnoreCase("false") &&
+ sig_detected) {
+
+ if (!supressException) {
+ throw new VerificationFilterException(ErrorCode.MODIFIED_AFTER_SIGNATION, "The document has been modified after being signed.");
+ } else {
+ log.debug("The document has been modified after being signed. According to the configuration, no exception is thrown.");
+ }
+
+ }
+ }
+
+ prev_end = fpr.next_index;
+ }
+
+ return binarySignatures;
+ }
+ catch (PresentableException e)
+ {
+ throw new VerificationFilterException(e);
+ }
+ }
+
+ protected List extractSignatures(PdfDataSource pdf, int endOfDocument) throws VerificationFilterException
+ {
+ try
+ {
+ log.debug("Extracting text from 0 to " + endOfDocument + " (total document size = " + pdf.getLength() + "):");
+ String extractedText = extractText(pdf, endOfDocument);
+ log.debug("Extracting text finished.");
+ log.debug("extracted text: " + extractedText);
+
+ log.debug("Extracting signatures:");
+ List extractedSignatures = extractNewSignaturesFromText(extractedText);
+ log.debug("Extracting signatures finished.");
+ log.debug("Number of found signatures: " + extractedSignatures.size());
+
+ if (extractedSignatures.size() > 0) {
+ List cp1252SignaturesPositions = new ArrayList();
+ //boolean iscp1252Sig = false;
+ for (int i = 0; i < extractedSignatures.size(); i++) {
+ SignatureHolder sh = (SignatureHolder)extractedSignatures.get(i);
+ PdfASID kzid = sh.getSignatureObject().getKZ();
+ if (kzid != null && kzid.isOldCp1252Version()) {
+ log.debug("found cp1252 signature");
+ cp1252SignaturesPositions.add(new Integer(i));
+ //iscp1252Sig = true;
+ //break;
+ }
+ }
+ if (cp1252SignaturesPositions.size() > 0) {
+ log.debug("redo text and signature extraction with cp1252 encoding");
+ extractedText = extractText(pdf, endOfDocument, "cp1252");
+ log.debug("Extracting text finished.");
+
+ log.debug("Extracting signatures:");
+ List cp1252ExtractedSignatures = extractNewSignaturesFromText(extractedText);
+ log.debug("Extracting signatures finished.");
+ log.debug("Number of found signatures: " + extractedSignatures.size());
+
+ if (cp1252ExtractedSignatures.size() != extractedSignatures.size()) {
+ log.error("Invalid cp1252 signatures found. Skipping cp1252 compatibility.");
+ }
+ // merge signature holders
+ for (int i = 0; i < cp1252SignaturesPositions.size(); i++) {
+ int replaceIndex = ((Integer)cp1252SignaturesPositions.get(i)).intValue();
+ extractedSignatures.remove(replaceIndex);
+ extractedSignatures.add(replaceIndex, cp1252ExtractedSignatures.get(replaceIndex));
+ }
+ }
+
+ }
+
+ if (log.isDebugEnabled())
+ {
+ log.debug("extracted signatures:");
+ for (int i = 0; i < extractedSignatures.size(); i++)
+ {
+ SignatureHolder sh = (SignatureHolder)extractedSignatures.get(i);
+ String dateStr = sh.getSignatureObject().getSignationDate();
+ EGIZDate ed = EGIZDate.parseFromString(dateStr);
+ log.debug("#" + i + ": dateStr = " + dateStr + ", egizDate = " + ed.toString());
+ }
+ }
+
+ return extractedSignatures;
+ }
+ catch (PresentableException e)
+ {
+ throw new VerificationFilterException(e);
+ }
+ }
+
+ protected String determineRestText(List newSignatures, String extractedText)
+ {
+ if (newSignatures.isEmpty())
+ {
+ return extractedText;
+ }
+
+ // note that even if the oldest signature is a binary signature,
+ // the rest text is the text of this binary signature, which was extracted
+ // like a text signature.
+ TextualSignatureHolder oldestSignature = (TextualSignatureHolder) newSignatures.get(0);
+ return oldestSignature.getSignedText();
+ }
+
+ protected List extractSignaturesFromPartition(PdfDataSource pdf, Partition partition) throws VerificationFilterException
+ {
+ assert partition.isTextPartition();
+
+ int endOfDocument = VerificationFilterHelper.getEndOfPartition(partition);
+ List extractedSigs = extractSignatures(pdf, endOfDocument);
+ TextualSignatureHolder.mulitSetUiBlockEndPos(extractedSigs, endOfDocument);
+ return extractedSigs;
+ }
+
+ protected SignaturesAndOld extractSignaturesFromPartitionAndOld(PdfDataSource pdf, Partition partition) throws VerificationFilterException
+ {
+ assert partition.isTextPartition();
+
+ try
+ {
+ int endOfDocument = VerificationFilterHelper.getEndOfPartition(partition);
+
+// log.debug("Extracting text from 0 to " + endOfDocument + " (total document size = " + pdf.getLength() + "):");
+ String extractedText = extractText(pdf, endOfDocument);
+// log.debug("Extracting text finished.");
+// log.debug("extracted text: " + extractedText);
+
+ SignaturesAndOld sao = extractSignaturesAndOld(extractedText);
+ TextualSignatureHolder.trySetUiBlockEndPos(sao.oldSignature, endOfDocument);
+ TextualSignatureHolder.mulitSetUiBlockEndPos(sao.newSignatures, endOfDocument);
+
+ return sao;
+ }
+ catch (PresentableException e)
+ {
+ throw new VerificationFilterException(e);
+ }
+ }
+
+ protected void checkBinaryOnly(PdfDataSource pdf, boolean considerOldSigs) throws VerificationFilterException {
+
+ DelimitedPdfDataSource dds = new DelimitedPdfDataSource(pdf, pdf.getLength());
+ String text = null;
+ try {
+ text = PdfAS.extractNormalizedTextTextual(dds, "utf-8");
+ } catch (PresentableException e) {
+ throw new VerificationFilterException(e);
+ }
+
+ List sigs = new ArrayList();
+
+ if(considerOldSigs) {
+ SignaturesAndOld sao = extractSignaturesAndOld(text);
+ if(sao != null) {
+ if(sao.newSignatures != null) {
+ sigs.addAll(sao.newSignatures);
+ }
+ if(sao.oldSignature != null) {
+ sigs.add(sao.oldSignature);
+ }
+ }
+ } else {
+ List signatures = extractSignatures(pdf, pdf.getLength());
+ if(signatures != null) {
+ sigs.addAll(signatures);
+ }
+ }
+
+ Iterator it = sigs.iterator();
+ while(it.hasNext()) {
+ SignatureHolder current = (SignatureHolder)it.next();
+ if((current != null)&&(!current.getSignatureObject().isBinary())) {
+ throw new VerificationFilterException(ErrorCode.NON_BINARY_SIGNATURES_PRESENT, "The document contains non-binary signatures.");
+ }
+ }
+ }
+
+
+ protected static class SignaturesAndOld
+ {
+ public List newSignatures = null;
+
+ public SignatureHolder oldSignature = null;
+ }
+
+ protected SignaturesAndOld extractSignaturesAndOld(String text) throws VerificationFilterException
+ {
+ try
+ {
+ log.debug("Extracting signatures:");
+ List extractedSignatures = extractNewSignaturesFromText(text);
+ log.debug("Extracting signatures finished.");
+
+ log.debug("Extracting old signatures:");
+ SignatureHolder oldSignature = extractOldSignature(text, extractedSignatures);
+ log.debug("Extracting old signatures finished.");
+ log.debug("oldSignature = null: " + (oldSignature==null));
+
+ SignaturesAndOld sao = new SignaturesAndOld();
+ sao.newSignatures = extractedSignatures;
+ sao.oldSignature = oldSignature;
+
+ return sao;
+ }
+ catch (PresentableException e)
+ {
+ throw new VerificationFilterException(e);
+ }
+ }
+
+ /**
+ * Extracts the old signature from the text, but only if it is older than the
+ * oldest signature of the new signatueres.
+ *
+ * @param extractedText
+ * @param newSignatures
+ * @return
+ * @throws PDFDocumentException
+ * @throws SignatureException
+ * @throws NormalizeException
+ * @throws SignatureTypesException
+ */
+ protected SignatureHolder extractOldSignature(String extractedText, List newSignatures) throws PDFDocumentException, SignatureException, NormalizeException, SignatureTypesException
+ {
+ SignatureHolder oldSignature = null;
+
+ String restText = determineRestText(newSignatures, extractedText);
+
+ List oldSignatures = PdfAS.extractSignatureHoldersTextual(restText, true);
+ if (!oldSignatures.isEmpty())
+ {
+ oldSignature = (SignatureHolder) oldSignatures.get(0);
+ if (!newSignatures.isEmpty())
+ {
+ SignatureHolder oldestNewSignature = (SignatureHolder) newSignatures.get(0);
+ EGIZDate oldDate = EGIZDate.parseFromString(oldSignature.getSignatureObject().getSignationDate());
+ EGIZDate newDate = EGIZDate.parseFromString(oldestNewSignature.getSignatureObject().getSignationDate());
+ if (newDate.compareTo(oldDate) <= 0)
+ {
+ oldSignature = null;
+ }
+ }
+ }
+ return oldSignature;
+ }
+}
diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterParametersImpl.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterParametersImpl.java
new file mode 100644
index 0000000..635dc99
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterParametersImpl.java
@@ -0,0 +1,98 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ */
+package at.gv.egiz.pdfas.impl.vfilter;
+
+import java.io.Serializable;
+
+import at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters;
+
+/**
+ * @author wprinz
+ *
+ */
+public class VerificationFilterParametersImpl implements VerificationFilterParameters, Serializable
+{
+ /**
+ * SVUID.
+ */
+ private static final long serialVersionUID = -7118403150485416046L;
+
+ protected boolean extractBinarySignaturesOnly = false;
+
+ protected boolean assumeOnlySignatureUpdateBlocks = false;
+
+ protected boolean scanForOldSignatures = true;
+
+ protected boolean hasBeenCorrected = false;
+
+ public VerificationFilterParametersImpl(boolean extractBinarySignaturesOnly, boolean assumeOnlySignatureUpdateBlocks, boolean scanForOldSignatures)
+ {
+ this.extractBinarySignaturesOnly = extractBinarySignaturesOnly;
+ this.assumeOnlySignatureUpdateBlocks = assumeOnlySignatureUpdateBlocks;
+ this.scanForOldSignatures = scanForOldSignatures;
+ }
+
+ /**
+ * @see at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters#extractBinarySignaturesOnly()
+ */
+ public boolean extractBinarySignaturesOnly()
+ {
+ return this.extractBinarySignaturesOnly;
+ }
+
+ /**
+ * @see at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters#assumeOnlySignatureUpdateBlocks()
+ */
+ public boolean assumeOnlySignatureUpdateBlocks()
+ {
+ return this.assumeOnlySignatureUpdateBlocks;
+ }
+
+
+ /**
+ * @see at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters#scanForOldSignatures()
+ */
+ public boolean scanForOldSignatures()
+ {
+ return this.scanForOldSignatures;
+ }
+
+ /**
+ * @see java.lang.Object#toString()
+ */
+ // @override
+ public String toString()
+ {
+ return "{VerificationFilterParametersImpl: extractBinarySignaturesOnly = " + extractBinarySignaturesOnly() + ", assumeOnlySignatureUpdateBlocks = " + assumeOnlySignatureUpdateBlocks() + "}";
+ }
+
+ public boolean hasBeenCorrected() {
+ return this.hasBeenCorrected;
+ }
+
+ public void setBeenCorrected(boolean corrected) {
+ this.hasBeenCorrected = corrected;
+
+ }
+}
diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterBinaryHelper.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterBinaryHelper.java
new file mode 100644
index 0000000..735b874
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterBinaryHelper.java
@@ -0,0 +1,190 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ */
+package at.gv.egiz.pdfas.impl.vfilter.helper;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import at.gv.egiz.pdfas.exceptions.ErrorCode;
+import at.knowcenter.wag.egov.egiz.PdfASID;
+import at.knowcenter.wag.egov.egiz.exceptions.InvalidIDException;
+import at.knowcenter.wag.egov.egiz.exceptions.PDFDocumentException;
+import at.knowcenter.wag.egov.egiz.pdf.BinarySignature;
+import at.knowcenter.wag.egov.egiz.pdf.Placeholder;
+import at.knowcenter.wag.egov.egiz.pdf.StringInfo;
+import at.knowcenter.wag.exactparser.parsing.IndirectObjectReference;
+import at.knowcenter.wag.exactparser.parsing.PDFUtils;
+import at.knowcenter.wag.exactparser.parsing.results.ArrayParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.DictionaryParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.IndirectObjectReferenceParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.NumberParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.ObjectParseResult;
+
+/**
+ * Contains helpful methods used by the VerificationFilter to analyze the PDF for binary signatures.
+ *
+ * @author wprinz
+ */
+public final class VerificationFilterBinaryHelper
+{
+ /**
+ * The name of the egiz dict key.
+ */
+ public static final byte[] EGIZ_DICT_NAME = { 'E', 'G', 'I', 'Z', 'S', 'i', 'g', 'D', 'i', 'c', 't' };
+
+ /**
+ * The name of the ID (SIG_KZ) property in the egiz dict.
+ */
+ public static final byte[] EGIZ_KZ_NAME = { 'I', 'D' };
+
+ /**
+ * The log.
+ */
+ private static final Log log = LogFactory.getLog(VerificationFilterBinaryHelper.class);
+
+ /**
+ * Tells, if the given incremental update block contains a binary signature.
+ *
+ * <p>
+ * According to definition, if a block is a binary block, it must/cannot
+ * contain other signatures than this one.
+ * </p>
+ *
+ * @param block
+ * The incremental update block.
+ * @return Returns true, if this block is a binary signature block, false
+ * otherwise.
+ */
+ public static boolean containsEGIZDict(final byte[] pdf, final FooterParseResult block)
+ {
+ int dict_index = PDFUtils.indexOfName(pdf, block.tpr.dpr.names, EGIZ_DICT_NAME);
+ if (dict_index <= 0)
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * Extracts the PDF AS ID of the egiz block.
+ *
+ * @param pdf
+ * The pdf.
+ * @param block
+ * The IU block.
+ * @return Returns the extracted PDF AS ID.
+ * @throws PDFDocumentException
+ * Forwarded exception.
+ * @throws InvalidIDException
+ * Forwarded exception.
+ */
+ public static PdfASID extractKZFromEGIZBlock(final byte[] pdf, final FooterParseResult block) throws PDFDocumentException, InvalidIDException
+ {
+ int egiz_index = PDFUtils.indexOfName(pdf, block.tpr.dpr.names, EGIZ_DICT_NAME);
+ if (egiz_index < 0)
+ {
+ throw new PDFDocumentException(301, "egiz_index = " + egiz_index);
+ }
+
+ IndirectObjectReferenceParseResult egiz_dict_iorpr = (IndirectObjectReferenceParseResult) block.tpr.dpr.values.get(egiz_index);
+ // logger_.debug("egiz_dict_ir = " + egiz_dict_iorpr.ior.object_number
+ // + " " + egiz_dict_iorpr.ior.generation_number);
+
+ IndirectObjectReference ior = egiz_dict_iorpr.ior;
+
+ final int egiz_dict_offset = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(block.xpr, ior);
+ // logger_.debug("egiz_dict_offset = " + egiz_dict_offset);
+
+ ObjectParseResult obj = PDFUtils.parseObject(pdf, egiz_dict_offset);
+ DictionaryParseResult egiz_dict = (DictionaryParseResult) obj.object;
+
+ int kz_index = PDFUtils.indexOfName(pdf, egiz_dict.names, EGIZ_KZ_NAME);
+ if (kz_index < 0)
+ {
+ throw new PDFDocumentException(301, "kz_index = " + kz_index);
+ }
+ ArrayParseResult kz_apr = (ArrayParseResult) egiz_dict.values.get(kz_index);
+
+ String kz_string = restoreKZ(pdf, kz_apr);
+ PdfASID kz = new PdfASID(kz_string);
+
+ return kz;
+ }
+
+ /**
+ * Restores the Kennzeichnung String from an Array.
+ *
+ * @param pdf
+ * The PDF.
+ * @param kz_apr
+ * The Array, as parsed from the EGIZ Dict.
+ * @return Returns the restored KZ.
+ * @throws PDFDocumentException
+ * Forwarded exception.
+ */
+ public static String restoreKZ(byte[] pdf, ArrayParseResult kz_apr) throws PDFDocumentException
+ {
+ try
+ {
+ List partition = new ArrayList();
+
+ int linesToProcess = (kz_apr.elements.size() / 2);
+ log.trace("Lines to process for KZ: " + linesToProcess);
+ /*
+ if (linesToProcess > 1) {
+ log.debug("Multiple KZHOTFIX: forcing single line to process");
+ linesToProcess = 1;
+ }
+ */
+ for (int i = 0; i < linesToProcess; i++)
+ {
+ NumberParseResult start_npr = (NumberParseResult) kz_apr.elements.get(i * 2);
+ NumberParseResult length_npr = (NumberParseResult) kz_apr.elements.get(i * 2 + 1);
+
+ StringInfo si = new StringInfo();
+ si.string_start = start_npr.number;
+ si.string_length = length_npr.number;
+ si.pdf = pdf;
+
+ log.trace("Adding KZ: " + si.toString());
+
+ partition.add(si);
+ }
+
+ String KZ = Placeholder.reconstructStringFromPartition(pdf, partition, BinarySignature.ENCODING_WIN);
+ return KZ;
+ }
+ catch (IOException e1)
+ {
+ throw new PDFDocumentException(ErrorCode.DOCUMENT_CANNOT_BE_READ, e1);
+ }
+ }
+
+}
diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java
new file mode 100644
index 0000000..69803e7
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java
@@ -0,0 +1,162 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ */
+package at.gv.egiz.pdfas.impl.vfilter.helper;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import at.gv.egiz.pdfas.exceptions.framework.VerificationFilterException;
+import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper;
+import at.gv.egiz.pdfas.impl.vfilter.Partition;
+import at.gv.egiz.pdfas.impl.vfilter.partition.BinaryPartition;
+import at.gv.egiz.pdfas.impl.vfilter.partition.TextPartition;
+import at.gv.egiz.pdfas.framework.input.PdfDataSource;
+import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult;
+
+/**
+ * Contains helpful methods used by the VerificationFilter.
+ *
+ * @author wprinz
+ */
+public final class VerificationFilterHelper
+{
+ /**
+ * Partitions the list of Incremental Update blocks into text and binary
+ * partitions.
+ *
+ * <p>
+ * A partition is a sequence of Incremental Update blocks of the same type.
+ * </p>
+ * <p>
+ * An Incremental Update block is considered to have the type "binary" if it
+ * contains an egiz dictionary. A block not containing an egiz dictionary is
+ * considert to have the type "text".
+ * </p>
+ *
+ * @param pdf
+ * The PDF.
+ * @param blocks
+ * The Incremental Update blocks.
+ * @return Returns the partitioning of the blocks.
+ * @throws VerificationFilterException
+ * Thrown if something goes wrong.
+ */
+ public static List partition(PdfDataSource pdf, List blocks) throws VerificationFilterException
+ {
+ List partitions = new ArrayList(blocks.size());
+
+ Iterator it = blocks.iterator();
+ while (it.hasNext())
+ {
+ FooterParseResult fpr = (FooterParseResult) it.next();
+
+ byte[] data = DataSourceHelper.convertDataSourceToByteArray(pdf);
+ if (VerificationFilterBinaryHelper.containsEGIZDict(data, fpr))
+ {
+ BinaryPartition bp = null;
+ if (partitions.isEmpty() || ((Partition) partitions.get(partitions.size() - 1)).isTextPartition())
+ {
+ bp = new BinaryPartition();
+ bp.blocks = new ArrayList(blocks.size());
+ partitions.add(bp);
+ }
+ else
+ {
+ bp = (BinaryPartition) partitions.get(partitions.size() - 1);
+ }
+ assert bp != null;
+
+ bp.blocks.add(fpr);
+ }
+ else
+ {
+ TextPartition tp = null;
+ if (partitions.isEmpty() || !((Partition) partitions.get(partitions.size() - 1)).isTextPartition())
+ {
+ tp = new TextPartition();
+ tp.blocks = new ArrayList(blocks.size());
+ partitions.add(tp);
+ }
+ else
+ {
+ tp = (TextPartition) partitions.get(partitions.size() - 1);
+ }
+ assert tp != null;
+
+ tp.blocks.add(fpr);
+ }
+ }
+
+ assert partitions.size() >= 1 : "There must be at least one partition";
+
+ return partitions;
+ }
+
+ /**
+ * Determines the end of the given partiton.
+ *
+ * @param partition
+ * The partition.
+ * @return Returns the end index of the given partition.
+ */
+ public static int getEndOfPartition(Partition partition)
+ {
+ List blocks = null;
+ if (partition instanceof TextPartition)
+ {
+ blocks = ((TextPartition) partition).blocks;
+ }
+ else
+ {
+ blocks = ((BinaryPartition) partition).blocks;
+ }
+
+ return ((FooterParseResult) blocks.get(blocks.size() - 1)).next_index;
+ }
+
+ /**
+ * Finds the last text partition in the given list of partitions.
+ *
+ * @param partitions
+ * The partitions.
+ * @return Returns the last TextPartition.
+ */
+ public static TextPartition findLastTextPartition(List partitions)
+ {
+ Partition lastTextPartition = (Partition) partitions.get(partitions.size() - 1);
+
+ if (!lastTextPartition.isTextPartition())
+ {
+ assert partitions.size() > 1 : "The only one partition cannot be a binary partition - where is the original document?";
+ Partition previousToLastPartition = (Partition) partitions.get(partitions.size() - 2);
+ assert previousToLastPartition.isTextPartition() : "The previous to last partition must be a text partition or something is wrong with the partitioning algorithm.";
+
+ lastTextPartition = previousToLastPartition;
+ }
+
+ return (TextPartition) lastTextPartition;
+ }
+
+}
diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterTextHelper.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterTextHelper.java
new file mode 100644
index 0000000..87aa159
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterTextHelper.java
@@ -0,0 +1,35 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ */
+package at.gv.egiz.pdfas.impl.vfilter.helper;
+
+/**
+ * Contains helpful methods used by the VerificationFilter to analyze text and
+ * find text signatures.
+ *
+ * @author wprinz
+ */
+public final class VerificationFilterTextHelper
+{
+
+}
diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/partition/BinaryPartition.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/partition/BinaryPartition.java
new file mode 100644
index 0000000..5b3c7e2
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/partition/BinaryPartition.java
@@ -0,0 +1,39 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ */
+package at.gv.egiz.pdfas.impl.vfilter.partition;
+
+import java.util.List;
+
+import at.gv.egiz.pdfas.impl.vfilter.Partition;
+
+
+public class BinaryPartition implements Partition
+{
+ public List blocks = null;
+
+ public boolean isTextPartition()
+ {
+ return false;
+ }
+}
diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/partition/TextPartition.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/partition/TextPartition.java
new file mode 100644
index 0000000..665a5ef
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/partition/TextPartition.java
@@ -0,0 +1,40 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ */
+package at.gv.egiz.pdfas.impl.vfilter.partition;
+
+import java.util.List;
+
+import at.gv.egiz.pdfas.impl.vfilter.Partition;
+
+
+public class TextPartition implements Partition
+{
+ public List blocks = null;
+
+ public boolean isTextPartition()
+ {
+ return true;
+ }
+
+}