From 535a04fa05f739ec16dd81666e3b0f82dfbd442d Mon Sep 17 00:00:00 2001 From: tknall Date: Wed, 9 Jan 2013 15:41:29 +0000 Subject: pdf-as-lib maven project files moved to pdf-as-lib git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/pdf-as/trunk@926 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../pdfas/impl/vfilter/VerificationFilterImpl.java | 964 +++++++++++++++++++++ 1 file changed, 964 insertions(+) create mode 100644 pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java (limited to 'pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java') diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java new file mode 100644 index 0000000..3f0f482 --- /dev/null +++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java @@ -0,0 +1,964 @@ +/** + * Copyright 2006 by Know-Center, Graz, Austria + * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a + * joint initiative of the Federal Chancellery Austria and Graz University of + * Technology. + * + * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by + * the European Commission - subsequent versions of the EUPL (the "Licence"); + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * http://www.osor.eu/eupl/ + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and + * limitations under the Licence. + * + * This product combines work with different licenses. See the "NOTICE" text + * file for details on the various modules and licenses. + * The "NOTICE" text file is part of the distribution. Any derivative works + * that you distribute must include a readable copy of the "NOTICE" text file. + */ +package at.gv.egiz.pdfas.impl.vfilter; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import org.apache.commons.lang.time.StopWatch; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import at.gv.egiz.pdfas.exceptions.ErrorCode; +import at.gv.egiz.pdfas.exceptions.framework.VerificationFilterException; +import at.gv.egiz.pdfas.framework.SignatureHolderHelper; +import at.gv.egiz.pdfas.framework.VerificatorFactory; +import at.gv.egiz.pdfas.framework.input.PdfDataSource; +import at.gv.egiz.pdfas.framework.input.TextDataSource; +import at.gv.egiz.pdfas.framework.verificator.Verificator; +import at.gv.egiz.pdfas.framework.vfilter.VerificationFilter; +import at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters; +import at.gv.egiz.pdfas.impl.input.DelimitedPdfDataSource; +import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper; +import at.gv.egiz.pdfas.impl.vfilter.helper.VerificationFilterBinaryHelper; +import at.gv.egiz.pdfas.impl.vfilter.helper.VerificationFilterHelper; +import at.gv.egiz.pdfas.impl.vfilter.partition.BinaryPartition; +import at.gv.egiz.pdfas.impl.vfilter.partition.TextPartition; +import at.knowcenter.wag.egov.egiz.PdfAS; +import at.knowcenter.wag.egov.egiz.PdfASID; +import at.knowcenter.wag.egov.egiz.cfg.SettingsReader; +import at.knowcenter.wag.egov.egiz.exceptions.NormalizeException; +import at.knowcenter.wag.egov.egiz.exceptions.PDFDocumentException; +import at.knowcenter.wag.egov.egiz.exceptions.PresentableException; +import at.knowcenter.wag.egov.egiz.exceptions.SettingsException; +import at.knowcenter.wag.egov.egiz.exceptions.SignatureException; +import at.knowcenter.wag.egov.egiz.exceptions.SignatureTypesException; +import at.knowcenter.wag.egov.egiz.pdf.AbsoluteTextSignature; +import at.knowcenter.wag.egov.egiz.pdf.EGIZDate; +import at.knowcenter.wag.egov.egiz.pdf.NoSignatureHolder; +import at.knowcenter.wag.egov.egiz.pdf.SignatureHolder; +import at.knowcenter.wag.egov.egiz.pdf.TextualSignatureHolder; +import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult; + +/** + * @author wprinz + */ +public class VerificationFilterImpl implements VerificationFilter +{ + + /** + * The log. + */ + private static final Log log = LogFactory.getLog(VerificationFilterImpl.class); + + + public static final String CHECK_DOCUMENT = "check_document"; + public static final String SUPRESS_EXCEPTION_WHEN_LAST_UIBLOCK_IS_NO_SIGNATURE = "supress_exception_when_last_iublock_is_no_signature"; + public static final String BINARY_ONLY = "binary_only"; + public static final String ASSUME_ONLY_SIGNATURE_BLOCKS = "assume_only_signature_blocks"; + + /** + * @see at.gv.egiz.pdfas.framework.vfilter.VerificationFilter#extractSignatureHolders(at.gv.egiz.pdfas.framework.input.PdfDataSource, + * java.util.List, + * at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters) + */ + public List extractSignatureHolders(final PdfDataSource pdf, List blocks, final VerificationFilterParameters parameters) throws VerificationFilterException + { + log.trace("extractSignaturHolders:"); + StopWatch sw = new StopWatch(); + sw.start(); + + if (log.isDebugEnabled()) + { + log.debug("Original IU blocks: " + blocks.size()); + debugIUBlocks(blocks); + } + + unrollLinearization(blocks); + + if (log.isDebugEnabled()) + { + log.debug("IU blocks without linearization: " + blocks.size()); + debugIUBlocks(blocks); + } + + + SettingsReader settings; + try { + settings = SettingsReader.getInstance(); + } catch (SettingsException e) { + throw new VerificationFilterException(e); + } + String check_doc = settings.getSetting(CHECK_DOCUMENT, "false"); + + // check document for textual sigs here here if binary_only is set + if ("true".equalsIgnoreCase(check_doc) && + parameters.extractBinarySignaturesOnly()) { + + checkBinaryOnly(pdf, parameters.scanForOldSignatures()); + log.debug("checkDocument: " + sw.getTime() + "ms."); + } else { + log.debug("Skipping checkDocument for textual sigs."); + } + // end add + + List signatureHolderChain = null; + + if (parameters.extractBinarySignaturesOnly()) + { + log.debug("Extracting only binary signatures. Binary-only mode."); + + signatureHolderChain = performBinaryOnly(pdf, blocks); + } + else + { + List partitions = VerificationFilterHelper.partition(pdf, blocks); + if (log.isDebugEnabled()) + { + debugPartitions(partitions); + } + + if (parameters.assumeOnlySignatureUpdateBlocks()) + { + log.debug("Assuming that there are only signature Incremental Update blocks. Semi-conservative mode."); + + signatureHolderChain = performSemiConservative(pdf, parameters.scanForOldSignatures(), blocks, partitions); + } + else + { + log.debug("Scanning complete document. Conservative mode."); + + signatureHolderChain = performFullConservative(pdf, parameters.scanForOldSignatures(), blocks, partitions); + } + + } + + log.trace("extractSignaturHolders finished (" + (signatureHolderChain != null ? signatureHolderChain.size() : 0) + " elements)."); + sw.stop(); + log.debug("extractSignatureHolders: " + sw.getTime() + "ms."); + + return signatureHolderChain; + } + + /** + * @see at.gv.egiz.pdfas.framework.vfilter.VerificationFilter#extractSignaturHolders(at.gv.egiz.pdfas.framework.input.TextDataSource, + * at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters) + */ + public List extractSignaturHolders(TextDataSource text, VerificationFilterParameters parameters) throws VerificationFilterException + { + if (parameters.extractBinarySignaturesOnly()) + { + log + .warn("A free text signature extraction was issued although the VerificationFilter was configured to detect only binary signatures (binary-only mode). The result is of course that no signatures can be found."); + + return new ArrayList(); + } + + String freetext = text.getText(); + String normalizedText = normalizeText(freetext); + + List foundSignatures = null; + if (parameters.scanForOldSignatures()) + { + log.debug("Extracting old and new signatures from text."); + + foundSignatures = extractNewAndOldSignaturesFromText(normalizedText); + } + else + { + log.debug("Extracting new signatures from text (not extracting old ones)."); + + foundSignatures = extractNewSignaturesFromText(normalizedText); + } + + List textOnlySignatures = filterOutBinarySignatures(foundSignatures); + + return textOnlySignatures; + } + + protected String normalizeText(String freetext) throws VerificationFilterException + { + try + { + return PdfAS.normalizeText(freetext); + } + catch (NormalizeException e) + { + throw new VerificationFilterException(e); + } + } + + /** + * Removes the linearization footer from the list of update blocks. + * + * @param blocks + * The list of FooterParseResult objects in \prev order. + */ + protected void unrollLinearization(List blocks) + { + int linearization_index = -1; + for (int i = 0; i < blocks.size(); i++) + { + FooterParseResult bpr = (FooterParseResult) blocks.get(i); + + if (bpr.sxpr.xref_index == 0) + { + if (linearization_index >= 0) + { + throw new RuntimeException("There is more than one linearization block! index = " + i); + } + linearization_index = i; + } + } + + if (linearization_index >= 0) + { +// logger_.debug("The document is linearized - unrolling +// linearization block " + linearization_index); + blocks.remove(linearization_index); + } + } + + protected List performBinaryOnly(PdfDataSource pdf, List blocks) throws VerificationFilterException + { + return extractBinarySignaturesOnly(pdf, blocks); + } + + protected List performSemiConservative(PdfDataSource pdf, boolean scanForOldSignatures, List blocks, List partitions) throws VerificationFilterException + { + log.debug("perform semiConservative()..."); + List binarySignatures = extractBinarySignaturesOnly(pdf, blocks); + + log.debug("determining last partition..."); + TextPartition lastTextPartition = VerificationFilterHelper.findLastTextPartition(partitions); + List extractedSignatures = null; + if (scanForOldSignatures) + { + SignaturesAndOld sao = extractSignaturesFromPartitionAndOld(pdf, lastTextPartition); + extractedSignatures = sao.newSignatures; + if (sao.oldSignature != null) + { + extractedSignatures.add(0, sao.oldSignature); + } + } + else + { + log.debug("extracting signatures from last partition..."); + extractedSignatures = extractSignaturesFromPartition(pdf, lastTextPartition); + } + + + List signatureHolderChain = intermingleSignatures(binarySignatures, extractedSignatures); + + return signatureHolderChain; + } + + protected List performFullConservative(PdfDataSource pdf, boolean scanForOldSignatures, List blocks, List partitions) throws VerificationFilterException + { + List binarySignatures = extractBinarySignaturesOnly(pdf, blocks); + + // extract signature values of found binary signature blocks and store these values in a Set + // this set is later used to filter out the binary signatures that are recognized as text + // signatures. + Set binarySigValues = new HashSet(); + Iterator iterator = binarySignatures.iterator(); + while(iterator.hasNext()) { + + SignatureHolder sh = (SignatureHolder)iterator.next(); + + String sigVal = sh.getSignatureObject().getSignationValue(); + binarySigValues.add(sigVal); + } + + SignatureHolder oldSignature = null; + + //List originalPartitions = partitions; + // This gives every IU block an own text partition + // This allows text signatures to be found correctly if there are + // IU blocks with disturbing text after them. + // On the other hand, these requires extra text extractions and + // signature searches and thereby is slow. + List flattedOutPartitions = flattenOutTextPartitions(partitions, blocks); + partitions = flattedOutPartitions; + + SettingsReader settings; + try { + settings = SettingsReader.getInstance(); + } catch (SettingsException e) { + throw new VerificationFilterException(e); + } + String check_doc = settings.getSetting(CHECK_DOCUMENT, "false"); + boolean supressException = "true".equalsIgnoreCase(settings.getSetting(SUPRESS_EXCEPTION_WHEN_LAST_UIBLOCK_IS_NO_SIGNATURE, "false")); + + // flag indicating that the last IU-block of the document is a non-signature IU-block + boolean lastBlockWasModified = false; + + // counter of all signatures (textual and binary) of this document + int signatureCounter = 0; + + // counter of all textual signatures in this document + int txtSigsSoFar = 0; + + // counter of all textual signatures in the current partition + int txtSigsThisPartition = 0; + + List partitionResults = new ArrayList(partitions.size()); + List nshList = new ArrayList(); + + boolean sigFound = false; + + for (int i = 0; i < partitions.size(); i++) + { + Partition p = (Partition) partitions.get(i); + + // updating flag and counter + boolean partitionContainsNewTextSignatures = true; + txtSigsSoFar = txtSigsThisPartition; + + if (p instanceof TextPartition) + { + TextPartition tp = (TextPartition) p; + + List partitionResult = null; + + boolean scanThisPartitionForOldSignature = (i == 0) && scanForOldSignatures; + if (scanThisPartitionForOldSignature) + { + SignaturesAndOld sao = extractSignaturesFromPartitionAndOld(pdf, tp); + partitionResult = sao.newSignatures; + oldSignature = sao.oldSignature; + } + else + { + partitionResult = extractSignaturesFromPartition(pdf, tp); + } + + // binary signature blocks that have been detected as well are identified by comparing their signature values + // with those stored in our Set above and are not considered for our IU-check + List onlyTextSignatures = new ArrayList(); + Iterator iter = partitionResult.iterator(); + while(iter.hasNext()) { + + SignatureHolder sh = (SignatureHolder)iter.next(); + if(!binarySigValues.contains(sh.getSignatureObject().getSignationValue())) { + + onlyTextSignatures.add(sh); + } + } + + // update signature counters + txtSigsThisPartition = onlyTextSignatures.size(); + int newTextSignatures = txtSigsThisPartition - txtSigsSoFar; + signatureCounter = signatureCounter + newTextSignatures; + + // update sigFound flag + if(txtSigsThisPartition > 0) { + + sigFound = true; + } + + // TextPartition is only valid, if at least one more text signature has been found than in the previous text partition + if(!(newTextSignatures > 0)) { + + partitionContainsNewTextSignatures = false; + } + + partitionResults.add(partitionResult); + } else { + // should be binary partition + if(p instanceof BinaryPartition) { + + BinaryPartition binpart = (BinaryPartition)p; + + // updating counter and flag + signatureCounter = signatureCounter + binpart.blocks.size(); + sigFound = true; + + } + } + + // if document checking is enabled, at least one signature has been found so far, we are dealing with a + // non-signature IU-block + if ((check_doc.equalsIgnoreCase("true"))&& (sigFound && !partitionContainsNewTextSignatures)) { + + nshList.add(new NoSignatureHolder(signatureCounter)); + lastBlockWasModified = true; + + } else { + + lastBlockWasModified = false; + } + + } + + // throw an exception if the last update block does not contain a signature and signatures have been found in this document + if (lastBlockWasModified) { + if (!supressException) { + throw new VerificationFilterException(ErrorCode.MODIFIED_AFTER_SIGNATION, "The document has been modified after being signed."); + } else { + log.debug("The document has been modified after being signed. According to the configuration, no exception is thrown."); + } + } + + List extractedSignatures = new ArrayList(); + Iterator it = partitionResults.iterator(); + List prevPartitionResult = null; + while (it.hasNext()) + { + List partitionResult = (List) it.next(); + + if (prevPartitionResult == null) + { + extractedSignatures.addAll(partitionResult); + } + else + { + assert partitionResult.size() >= prevPartitionResult.size(); + +// for (int i = prevPartitionResult.size(); i < partitionResult.size(); i++) +// { +// SignatureHolder sh = (SignatureHolder) partitionResult.get(i); +// extractedSignatures.add(sh); +// } + mergeSignatures(prevPartitionResult, partitionResult, extractedSignatures); + } + + prevPartitionResult = partitionResult; + } + + List signatureHolderChain = intermingleSignatures(binarySignatures, extractedSignatures); + + if (oldSignature != null) + { + signatureHolderChain.add(0, oldSignature); + } + + // add the created NoSignatureHolders + signatureHolderChain.addAll(nshList); + + return signatureHolderChain; + } + + private void mergeSignatures(List oldList, List newList, List result) { + + for(int i=0; i < newList.size(); i++) { + + SignatureHolder currentNewSh = (SignatureHolder)newList.get(i); + + boolean shAlreadyPresentInOldList = false; + int pos = -1; + + for(int j=0; j add + result.add(currentNewSh); + } + + } + + + return; + } + + + protected List flattenOutTextPartitions (List partitions, List blocks) + { + + List blockPartitions = new ArrayList(blocks.size()); + Iterator it = partitions.iterator(); + while (it.hasNext()) + { + Partition p = (Partition)it.next(); + if (p instanceof TextPartition) + { + TextPartition tp = (TextPartition)p; + Iterator blockIt = tp.blocks.iterator(); + while (blockIt.hasNext()) + { + FooterParseResult fpr = (FooterParseResult)blockIt.next(); + TextPartition newPt = new TextPartition(); + newPt.blocks = new ArrayList(1); + newPt.blocks.add(fpr); + blockPartitions.add(newPt); + } + } + else + { + // binary partition + blockPartitions.add(p); + } + } + + // note: successive binary blocks are still combined to one binary partition + assert blockPartitions.size() <= blocks.size(); + + return blockPartitions; + } + + protected String extractText(PdfDataSource pdf, int endOfDocument) throws PresentableException { + return extractText(pdf, endOfDocument, "utf8"); + } + + protected String extractText(PdfDataSource pdf, int endOfDocument, String encoding) throws PresentableException + { + + log.debug("EXTRACTING TEXT (" + encoding + ")... end index = " + endOfDocument); + + DelimitedPdfDataSource dds = new DelimitedPdfDataSource(pdf, endOfDocument); + //DelimitedInputStream dis = new DelimitedInputStream(pdf.createInputStream(), endOfDocument); + return PdfAS.extractNormalizedTextTextual(dds, encoding); + } + + + protected List extractNewSignaturesFromText(String text) throws VerificationFilterException + { + try + { + return AbsoluteTextSignature.extractSignatureHoldersFromText(text); + } + catch (PresentableException e) + { + throw new VerificationFilterException(e); + } + } + + protected List extractNewAndOldSignaturesFromText(String text) throws VerificationFilterException + { + SignaturesAndOld sao = extractSignaturesAndOld(text); + if (sao.oldSignature != null) + { + sao.newSignatures.add(0, sao.oldSignature); + } + + return sao.newSignatures; + } + + protected List extractOldSignaturesFromText(String text) throws PresentableException + { + return PdfAS.extractSignatureHoldersTextual(text, true); + } + + protected List intermingleSignatures(List binarySignatures, List extractedSignatures) + { + List textualSignatures = filterOutBinarySignatures(extractedSignatures); + + List intermingled = new ArrayList(binarySignatures.size() + textualSignatures.size()); + intermingled.addAll(binarySignatures); + intermingled.addAll(textualSignatures); + + sortSignatures(intermingled); + + return intermingled; + } + + protected List filterOutBinarySignatures(List signatures) + { + List textOnly = new ArrayList(signatures.size()); + + Iterator it = signatures.iterator(); + while (it.hasNext()) + { + SignatureHolder sh = (SignatureHolder) it.next(); + if (sh.getSignatureObject().isTextual()) + { + textOnly.add(sh); + } + } + + return textOnly; + } + + protected void sortSignatures(List signatures) + { + SignatureHolderHelper.sortByDate(signatures); + } + + protected void debugIUBlocks(List blocks) + { + Iterator it = blocks.iterator(); + while (it.hasNext()) + { + FooterParseResult fpr = (FooterParseResult) it.next(); + log.debug("footer: " + fpr.start_index + " to " + fpr.next_index + ", has predecessor = " + fpr.tpr.has_predecessor); + } + } + + protected void debugPartitions(List partitions) + { + Iterator it = partitions.iterator(); + while (it.hasNext()) + { + Object o = it.next(); + assert o instanceof Partition; + + List blocks = null; + if (o instanceof TextPartition) + { + TextPartition tp = (TextPartition) o; + + blocks = tp.blocks; + + log.debug("text partition with " + tp.blocks.size() + " blocks:"); + } + else + { + BinaryPartition bp = (BinaryPartition) o; + + blocks = bp.blocks; + + log.debug("binary partition: with " + bp.blocks.size() + " blocks:"); + + } + debugIUBlocks(blocks); + log.debug("partition finished."); + } + } + + /** + * Extracts the binary singatures from the given PDF. + * + *

+ * IU blocks without an egiz dict are not considered. + *

+ * + * @param pdf + * @param blocks + * @return Returns the List of signature holders. + * @throws PresentableException + */ + protected List extractBinarySignaturesOnly(PdfDataSource pdf, List blocks) throws VerificationFilterException + { + SettingsReader settings; + try { + settings = SettingsReader.getInstance(); + } catch (SettingsException e) { + throw new VerificationFilterException(e); + } + String check_doc = settings.getSetting(CHECK_DOCUMENT, "false"); + String binary_only = settings.getSetting(BINARY_ONLY, "false"); + String assume_sigs_only = settings.getSetting(ASSUME_ONLY_SIGNATURE_BLOCKS, "false"); + boolean supressException = "true".equalsIgnoreCase(settings.getSetting(SUPRESS_EXCEPTION_WHEN_LAST_UIBLOCK_IS_NO_SIGNATURE, "false")); + + try + { + // PERF: extract binary signatures needs byte array + byte[] data = DataSourceHelper.convertDataSourceToByteArray(pdf); + + List binarySignatures = new ArrayList(blocks.size()); + + Iterator it = blocks.iterator(); + int prev_end = 0; + boolean sig_detected = false; + while (it.hasNext()) + { + FooterParseResult fpr = (FooterParseResult) it.next(); + assert fpr.next_index > prev_end; + + if (VerificationFilterBinaryHelper.containsEGIZDict(data, fpr)) + { + PdfASID kz = VerificationFilterBinaryHelper.extractKZFromEGIZBlock(data, fpr); + + // TODO dferbas hack baik test + //kz = new PdfASID("urn:pdfsigfilter:bka.gv.at:binaer:v1.1.0"); + + Verificator verificator = VerificatorFactory.createBinaryVerificator(kz); + List binary_holders = verificator.parseBlock(pdf, data, fpr, prev_end); + + binarySignatures.addAll(binary_holders); + if(binary_holders.size() > 0) { + sig_detected = true; + } + } else { + // an Exception is thrown here if: + // 1) check_document is activated + // 2) assume_only_signature_blocks is false - otherwise we permit updates + // 3) binary_only is true - otherwise updates are handled in method performFullConservative(). + // when binary-only is true, we can be sure that a block that contains no egiz-dict is no textual + // signature either but an illegal update, otherwise an Exception (doc contains textual sig) would have been thrown before + // 4) a binary signature has been detected in a previous block + if(check_doc.equalsIgnoreCase("true") && + binary_only.equalsIgnoreCase("true") && + assume_sigs_only.equalsIgnoreCase("false") && + sig_detected) { + + if (!supressException) { + throw new VerificationFilterException(ErrorCode.MODIFIED_AFTER_SIGNATION, "The document has been modified after being signed."); + } else { + log.debug("The document has been modified after being signed. According to the configuration, no exception is thrown."); + } + + } + } + + prev_end = fpr.next_index; + } + + return binarySignatures; + } + catch (PresentableException e) + { + throw new VerificationFilterException(e); + } + } + + protected List extractSignatures(PdfDataSource pdf, int endOfDocument) throws VerificationFilterException + { + try + { + log.debug("Extracting text from 0 to " + endOfDocument + " (total document size = " + pdf.getLength() + "):"); + String extractedText = extractText(pdf, endOfDocument); + log.debug("Extracting text finished."); + log.debug("extracted text: " + extractedText); + + log.debug("Extracting signatures:"); + List extractedSignatures = extractNewSignaturesFromText(extractedText); + log.debug("Extracting signatures finished."); + log.debug("Number of found signatures: " + extractedSignatures.size()); + + if (extractedSignatures.size() > 0) { + List cp1252SignaturesPositions = new ArrayList(); + //boolean iscp1252Sig = false; + for (int i = 0; i < extractedSignatures.size(); i++) { + SignatureHolder sh = (SignatureHolder)extractedSignatures.get(i); + PdfASID kzid = sh.getSignatureObject().getKZ(); + if (kzid != null && kzid.isOldCp1252Version()) { + log.debug("found cp1252 signature"); + cp1252SignaturesPositions.add(new Integer(i)); + //iscp1252Sig = true; + //break; + } + } + if (cp1252SignaturesPositions.size() > 0) { + log.debug("redo text and signature extraction with cp1252 encoding"); + extractedText = extractText(pdf, endOfDocument, "cp1252"); + log.debug("Extracting text finished."); + + log.debug("Extracting signatures:"); + List cp1252ExtractedSignatures = extractNewSignaturesFromText(extractedText); + log.debug("Extracting signatures finished."); + log.debug("Number of found signatures: " + extractedSignatures.size()); + + if (cp1252ExtractedSignatures.size() != extractedSignatures.size()) { + log.error("Invalid cp1252 signatures found. Skipping cp1252 compatibility."); + } + // merge signature holders + for (int i = 0; i < cp1252SignaturesPositions.size(); i++) { + int replaceIndex = ((Integer)cp1252SignaturesPositions.get(i)).intValue(); + extractedSignatures.remove(replaceIndex); + extractedSignatures.add(replaceIndex, cp1252ExtractedSignatures.get(replaceIndex)); + } + } + + } + + if (log.isDebugEnabled()) + { + log.debug("extracted signatures:"); + for (int i = 0; i < extractedSignatures.size(); i++) + { + SignatureHolder sh = (SignatureHolder)extractedSignatures.get(i); + String dateStr = sh.getSignatureObject().getSignationDate(); + EGIZDate ed = EGIZDate.parseFromString(dateStr); + log.debug("#" + i + ": dateStr = " + dateStr + ", egizDate = " + ed.toString()); + } + } + + return extractedSignatures; + } + catch (PresentableException e) + { + throw new VerificationFilterException(e); + } + } + + protected String determineRestText(List newSignatures, String extractedText) + { + if (newSignatures.isEmpty()) + { + return extractedText; + } + + // note that even if the oldest signature is a binary signature, + // the rest text is the text of this binary signature, which was extracted + // like a text signature. + TextualSignatureHolder oldestSignature = (TextualSignatureHolder) newSignatures.get(0); + return oldestSignature.getSignedText(); + } + + protected List extractSignaturesFromPartition(PdfDataSource pdf, Partition partition) throws VerificationFilterException + { + assert partition.isTextPartition(); + + int endOfDocument = VerificationFilterHelper.getEndOfPartition(partition); + List extractedSigs = extractSignatures(pdf, endOfDocument); + TextualSignatureHolder.mulitSetUiBlockEndPos(extractedSigs, endOfDocument); + return extractedSigs; + } + + protected SignaturesAndOld extractSignaturesFromPartitionAndOld(PdfDataSource pdf, Partition partition) throws VerificationFilterException + { + assert partition.isTextPartition(); + + try + { + int endOfDocument = VerificationFilterHelper.getEndOfPartition(partition); + +// log.debug("Extracting text from 0 to " + endOfDocument + " (total document size = " + pdf.getLength() + "):"); + String extractedText = extractText(pdf, endOfDocument); +// log.debug("Extracting text finished."); +// log.debug("extracted text: " + extractedText); + + SignaturesAndOld sao = extractSignaturesAndOld(extractedText); + TextualSignatureHolder.trySetUiBlockEndPos(sao.oldSignature, endOfDocument); + TextualSignatureHolder.mulitSetUiBlockEndPos(sao.newSignatures, endOfDocument); + + return sao; + } + catch (PresentableException e) + { + throw new VerificationFilterException(e); + } + } + + protected void checkBinaryOnly(PdfDataSource pdf, boolean considerOldSigs) throws VerificationFilterException { + + DelimitedPdfDataSource dds = new DelimitedPdfDataSource(pdf, pdf.getLength()); + String text = null; + try { + text = PdfAS.extractNormalizedTextTextual(dds, "utf-8"); + } catch (PresentableException e) { + throw new VerificationFilterException(e); + } + + List sigs = new ArrayList(); + + if(considerOldSigs) { + SignaturesAndOld sao = extractSignaturesAndOld(text); + if(sao != null) { + if(sao.newSignatures != null) { + sigs.addAll(sao.newSignatures); + } + if(sao.oldSignature != null) { + sigs.add(sao.oldSignature); + } + } + } else { + List signatures = extractSignatures(pdf, pdf.getLength()); + if(signatures != null) { + sigs.addAll(signatures); + } + } + + Iterator it = sigs.iterator(); + while(it.hasNext()) { + SignatureHolder current = (SignatureHolder)it.next(); + if((current != null)&&(!current.getSignatureObject().isBinary())) { + throw new VerificationFilterException(ErrorCode.NON_BINARY_SIGNATURES_PRESENT, "The document contains non-binary signatures."); + } + } + } + + + protected static class SignaturesAndOld + { + public List newSignatures = null; + + public SignatureHolder oldSignature = null; + } + + protected SignaturesAndOld extractSignaturesAndOld(String text) throws VerificationFilterException + { + try + { + log.debug("Extracting signatures:"); + List extractedSignatures = extractNewSignaturesFromText(text); + log.debug("Extracting signatures finished."); + + log.debug("Extracting old signatures:"); + SignatureHolder oldSignature = extractOldSignature(text, extractedSignatures); + log.debug("Extracting old signatures finished."); + log.debug("oldSignature = null: " + (oldSignature==null)); + + SignaturesAndOld sao = new SignaturesAndOld(); + sao.newSignatures = extractedSignatures; + sao.oldSignature = oldSignature; + + return sao; + } + catch (PresentableException e) + { + throw new VerificationFilterException(e); + } + } + + /** + * Extracts the old signature from the text, but only if it is older than the + * oldest signature of the new signatueres. + * + * @param extractedText + * @param newSignatures + * @return + * @throws PDFDocumentException + * @throws SignatureException + * @throws NormalizeException + * @throws SignatureTypesException + */ + protected SignatureHolder extractOldSignature(String extractedText, List newSignatures) throws PDFDocumentException, SignatureException, NormalizeException, SignatureTypesException + { + SignatureHolder oldSignature = null; + + String restText = determineRestText(newSignatures, extractedText); + + List oldSignatures = PdfAS.extractSignatureHoldersTextual(restText, true); + if (!oldSignatures.isEmpty()) + { + oldSignature = (SignatureHolder) oldSignatures.get(0); + if (!newSignatures.isEmpty()) + { + SignatureHolder oldestNewSignature = (SignatureHolder) newSignatures.get(0); + EGIZDate oldDate = EGIZDate.parseFromString(oldSignature.getSignatureObject().getSignationDate()); + EGIZDate newDate = EGIZDate.parseFromString(oldestNewSignature.getSignatureObject().getSignationDate()); + if (newDate.compareTo(oldDate) <= 0) + { + oldSignature = null; + } + } + } + return oldSignature; + } +} -- cgit v1.2.3