From 535a04fa05f739ec16dd81666e3b0f82dfbd442d Mon Sep 17 00:00:00 2001 From: tknall Date: Wed, 9 Jan 2013 15:41:29 +0000 Subject: pdf-as-lib maven project files moved to pdf-as-lib git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/pdf-as/trunk@926 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../egov/egiz/framework/VerificationFilter.java | 569 +++++++++++++++++++++ 1 file changed, 569 insertions(+) create mode 100644 pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/framework/VerificationFilter.java (limited to 'pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/framework/VerificationFilter.java') diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/framework/VerificationFilter.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/framework/VerificationFilter.java new file mode 100644 index 0000000..0bab96f --- /dev/null +++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/framework/VerificationFilter.java @@ -0,0 +1,569 @@ +/** + * Copyright 2006 by Know-Center, Graz, Austria + * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a + * joint initiative of the Federal Chancellery Austria and Graz University of + * Technology. + * + * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by + * the European Commission - subsequent versions of the EUPL (the "Licence"); + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * http://www.osor.eu/eupl/ + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and + * limitations under the Licence. + * + * This product combines work with different licenses. See the "NOTICE" text + * file for details on the various modules and licenses. + * The "NOTICE" text file is part of the distribution. Any derivative works + * that you distribute must include a readable copy of the "NOTICE" text file. + * + * $Id: VerificationFilter.java,v 1.5 2006/10/31 08:07:20 wprinz Exp $ + */ +package at.knowcenter.wag.egov.egiz.framework; + +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.log4j.Logger; + +import at.gv.egiz.pdfas.exceptions.ErrorCode; +import at.gv.egiz.pdfas.impl.vfilter.VerificationFilterImpl; +import at.knowcenter.wag.egov.egiz.PdfAS; +import at.knowcenter.wag.egov.egiz.PdfASID; +import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger; +import at.knowcenter.wag.egov.egiz.cfg.SettingsReader; +import at.knowcenter.wag.egov.egiz.exceptions.InvalidIDException; +import at.knowcenter.wag.egov.egiz.exceptions.NormalizeException; +import at.knowcenter.wag.egov.egiz.exceptions.PDFDocumentException; +import at.knowcenter.wag.egov.egiz.exceptions.PresentableException; +import at.knowcenter.wag.egov.egiz.exceptions.SignatureException; +import at.knowcenter.wag.egov.egiz.exceptions.SignatureTypesException; +import at.knowcenter.wag.egov.egiz.framework.verificators.BinaryVerificator_1_0_0; +import at.knowcenter.wag.egov.egiz.framework.verificators.TextualVerificator_1_0_0; +import at.knowcenter.wag.egov.egiz.pdf.AbsoluteTextSignature; +import at.knowcenter.wag.egov.egiz.pdf.BinarySignature; +import at.knowcenter.wag.egov.egiz.pdf.Placeholder; +import at.knowcenter.wag.egov.egiz.pdf.SignatureHolder; +import at.knowcenter.wag.egov.egiz.pdf.StringInfo; +import at.knowcenter.wag.egov.egiz.pdf.TextualSignatureHolder; +import at.knowcenter.wag.exactparser.ParseDocument; +import at.knowcenter.wag.exactparser.parsing.IndirectObjectReference; +import at.knowcenter.wag.exactparser.parsing.PDFUtils; +import at.knowcenter.wag.exactparser.parsing.results.ArrayParseResult; +import at.knowcenter.wag.exactparser.parsing.results.DictionaryParseResult; +import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult; +import at.knowcenter.wag.exactparser.parsing.results.IndirectObjectReferenceParseResult; +import at.knowcenter.wag.exactparser.parsing.results.NumberParseResult; +import at.knowcenter.wag.exactparser.parsing.results.ObjectParseResult; + + +/** + * This filter transforms an arbitrary input pdf into an ordered List of + * SignatureHolders for verification. + * + *

+ * The pdf document is parsed and the signature blocks (textual, binary, etc.) + * are extracted as verifyable SignatureHolder objects in the order they appear + * in the document. + *

+ * + * @deprecated use the new at.gv.egiz.framework instead + * + * @author wprinz + * @author amavriqi + */ +public class VerificationFilter +{ + public static final byte[] EGIZ_DICT_NAME = { 'E', 'G', 'I', 'Z', 'S', 'i', + 'g', 'D', 'i', 'c', 't' }; + + public static final byte[] EGIZ_KZ_NAME = { 'I', 'D' }; + + //tzefferer: flag for accepting post-sign modifications + public static final String ALLOW_POST_SIGN_MODIFICATIONS = "allow_post_sign_modifications"; + + /** + * The logger definition. + */ + private static final Logger logger_ = ConfigLogger.getLogger(VerificationFilter.class); + + /** + * Default constructor. + */ + public VerificationFilter() + { + // empty block. + } + + /** + * Extracts the List of SignatureHolders from the given PDF document. + * + * @param pdf + * The PDF document. + * @return Returns the ordered List of SignatureHolder objects (the first + * signature will be at index 0) extracted from the document or an + * empty list, if none could be found. + * @throws PresentableException + */ + public List extractSignaturesFromPdf(final byte[] pdf) throws PresentableException + { + return extractSignaturesFromPdf(pdf, false); + } + + //@deprecated + public List extractSignaturesFromPdf(final byte[] pdf, boolean ignorePostSignModificationsRestriction) throws PresentableException + { + + // tzefferer: get allow_post_sign_modifications property from property file + SettingsReader settings = SettingsReader.getInstance(); + String allow_post_sign_mods = settings.getSetting(ALLOW_POST_SIGN_MODIFICATIONS, "false"); + boolean supressException = "true".equalsIgnoreCase(settings.getSetting(VerificationFilterImpl.SUPRESS_EXCEPTION_WHEN_LAST_UIBLOCK_IS_NO_SIGNATURE, "false")); + + List holders = new ArrayList(); + + List blocks = null; + try + { + blocks = ParseDocument.parseDocument(pdf); + } + catch (Exception e) + { + logger_.debug("Error while parsing Document.", e); + throw new PDFDocumentException(201, e); + } + +// for (int i = 0; i < blocks.size(); i++) +// { +// FooterParseResult bpr = (FooterParseResult) blocks.get(i); +// // logger_.debug("block[" + i + "] from " + bpr.start_index + " to +// // " + bpr.next_index); +// } + + unrollLinearization(blocks); + + boolean signature_block_detected = false; + + for (int i = 0; i < blocks.size(); i++) + { + boolean current_block_contains_signature = false; + FooterParseResult bpr = (FooterParseResult) blocks.get(i); + + int prev_end = 0; + if (i > 0) + { + FooterParseResult prev_bpr = (FooterParseResult) blocks.get(i - 1); + prev_end = prev_bpr.next_index; + } + + // logger_.debug("block from " + prev_end + " to " + + // bpr.next_index); + + if (containsEGIZDict(pdf, bpr)) + { + logger_.debug("Parsing Binary Sig:"); + + PdfASID kz = extractKZFromEGIZBlock(pdf, bpr); + + if (!kz.toString().equals(BinaryVerificator_1_0_0.MY_ID.toString())) + { + logger_.debug("Warning: Binary Kennzeichnung not recognized:" + kz.toString()); + } + + Verificator verificator = new BinaryVerificator_1_0_0(); + List binary_holders = verificator.parseBlock(pdf, bpr, prev_end); + + holders.addAll(binary_holders); + + // tzefferer: check if signatures have been detected in current block + if(binary_holders.size() > 0) + { + signature_block_detected = true; + current_block_contains_signature = true; + } + + logger_.debug(":Parsing Binary Sig END - holders.size = " + holders.size()); + } + else + { + // TODO: make better - already deprecated + //amavriqi: skip checking for old sigs becouse of performance issues + String old_text_sigs = SettingsReader.getInstance().getSetting("check_old_textual_sigs", "false"); + + //amavriqi: if old textual signatures not checked for then + // no need to check Incremental Block 0 + if(old_text_sigs.equalsIgnoreCase("false") && (prev_end == 0)) + { + continue; + } + logger_.debug("Extracting text for: " + prev_end + " to " + bpr.next_index); + + Verificator verificator = new TextualVerificator_1_0_0(); + List text_holders = verificator.parseBlock(pdf, bpr, prev_end); + logger_.debug("text_holders = " + text_holders.size()); + + // tzefferer: check if signatures have been detected in current block + if(text_holders.size() > 0) + { + signature_block_detected = true; + current_block_contains_signature = true; + } + + // TODO: make better - already deprecated + //amavriqi: only if old textual signatures are checked for + if(old_text_sigs.equalsIgnoreCase("true")){ + if (prev_end == 0) + { + String rest_text = null; + if (!text_holders.isEmpty()) + { + TextualSignatureHolder first_holder = (TextualSignatureHolder) text_holders.get(0); + rest_text = first_holder.getSignedText(); + } + else + { + //a.m. + logger_.debug("Incemental block" + prev_end + " and there are " + text_holders.size() + " text holders"); + logger_.debug("Checking for older textual Signatures"); + logger_.debug("Extracting text to " + bpr.next_index); + rest_text = PdfAS.extractNormalizedTextTextual(pdf, bpr.next_index); + } + + List old_holders = PdfAS.extractSignatureHoldersTextual(rest_text, true); + + logger_.debug("Found old holders = " + old_holders.size()); + if (!old_holders.isEmpty()) + { + // there must be only one old holder. + holders.add(0, old_holders.get(0)); + } + } + } + if (!text_holders.isEmpty()) + { + List actual_text_holders = throwOutBinHolders(text_holders); + holders.addAll(actual_text_holders); + } + + logger_.debug(":Extracting tex END - holders.size = " + holders.size()); + } + + // tzefferer: check if illegal modifications have been performed on the document after performing a signation + if (!("true".equalsIgnoreCase(allow_post_sign_mods)) && !ignorePostSignModificationsRestriction) + { + if (signature_block_detected && !current_block_contains_signature) + { + if (!supressException) { + throw new PDFDocumentException(ErrorCode.MODIFIED_AFTER_SIGNATION, "Das Dokument wurde nach erfolgter Signierung verändert."); + } + + } + } + } + + for (int i = 0; i < holders.size(); i++) + { + SignatureHolder holder = (SignatureHolder) holders.get(i); + + PdfASID kz = holder.getSignatureObject().getKZ(); + if (kz != null) + { + checkKZ(kz); + } + } + + return holders; + } + + /** + * Checks, if the given KZ is recognized by this application or logs a warning if it isn't. + * @param kz The Kennzeichnung. + */ + protected void checkKZ (PdfASID kz) + { + if (!kz.getVendor().equals(SignatorFactory.VENDOR)) + { + logger_.warn("The vendor " + kz.getVendor() + " isn't known by this application."); + } + if (!kz.getVersion().equals(SignatorFactory.VERSION_1_0_0)) + { + logger_.warn("The version " + kz.getVersion() + " istn't supported by this application. This might cause problems."); + } + } + + /** + * Throws out SignatureHolders with a binary KZ. + * + * @param text_holders + * The List of SignatureHolder objects. + * @return Returns the List of SignatureHolder objects, where no object has + * binary KZ. + */ + private List throwOutBinHolders(List text_holders) + { + List actual_text_holders = new ArrayList(); + for (int i = 0; i < text_holders.size(); i++) + { + SignatureHolder sh = (SignatureHolder) text_holders.get(i); + PdfASID kz = null; + try + { + kz = sh.getSignatureObject().getKZ(); + } + catch (InvalidIDException e) + { + logger_.error(e.getMessage(), e); + } + if (kz != null && kz.getType().equals(SignatorFactory.TYPE_BINARY)) + { + logger_.info("Throwing out binary signature: " + kz); + continue; + } + actual_text_holders.add(sh); + } + return actual_text_holders; + } + + /** + * Removes the linearization footer from the list of update blocks. + * + * @param blocks + * The list of FooterParseResult objects in \prev order. + */ + protected void unrollLinearization(List blocks) + { + int linearization_index = -1; + for (int i = 0; i < blocks.size(); i++) + { + FooterParseResult bpr = (FooterParseResult) blocks.get(i); + + if (bpr.sxpr.xref_index == 0) + { + if (linearization_index >= 0) + { + throw new RuntimeException("There is more than one linearization block! index = " + i); + } + linearization_index = i; + } + } + + if (linearization_index >= 0) + { + // logger_.debug("The document is linearized - unrolling + // linearization block " + linearization_index); + blocks.remove(linearization_index); + } + } + + /** + * Extracts the List of SignatureHolders from the given plain text document. + * + *

+ * Note that this can only extract text signatures. + *

+ * + * @param raw_text + * The plain text document. + * @return Returns the ordered List of SignatureHolder objects (the first + * signature will be at index 0) extracted from the document or an + * empty list, if none could be found. + * @throws SignatureException + * @throws PDFDocumentException + * @throws SignatureTypesException + * @throws NormalizeException + */ + public List extractSignaturesFromPlainText(final String raw_text) throws PDFDocumentException, SignatureException, SignatureTypesException, NormalizeException + { + String normalized_text = PdfAS.normalizeText(raw_text); + + //List text_holders = PdfAS.extractSignatureHoldersTextual(normalized_text, false); + List text_holders = AbsoluteTextSignature.extractSignatureHoldersFromText(normalized_text); + + String rest_text = normalized_text; + if (!text_holders.isEmpty()) + { + TextualSignatureHolder holder = (TextualSignatureHolder) text_holders.get(0); + rest_text = holder.getSignedText(); + } + + List old_holders = PdfAS.extractSignatureHoldersTextual(rest_text, true); + if (!old_holders.isEmpty()) + { + text_holders.addAll(0, old_holders); + } + + List actual_text_holders = throwOutBinHolders(text_holders); + + return actual_text_holders; + } + + /** + * Tells, if the given incremental update block contains a binary signature. + * + *

+ * According to definition, if a block is a binary block, it must/cannot + * contain other signatures than this one. + *

+ * + * @param block + * The incremental update block. + * @return Returns true, if this block is a binary signature block, false + * otherwise. + */ + protected boolean containsEGIZDict(final byte[] pdf, + final FooterParseResult block) + { + int dict_index = PDFUtils.indexOfName(pdf, block.tpr.dpr.names, EGIZ_DICT_NAME); + if (dict_index <= 0) + { + return false; + } + + return true; + } + + /** + * Extracts the PDF AS ID of the egiz block. + * + * @param pdf + * The pdf. + * @param block + * The IU block. + * @return Returns the extracted PDF AS ID. + * @throws PDFDocumentException + * Forwarded exception. + * @throws InvalidIDException + * Forwarded exception. + */ + protected PdfASID extractKZFromEGIZBlock(final byte[] pdf, + final FooterParseResult block) throws PDFDocumentException, InvalidIDException + { + int egiz_index = PDFUtils.indexOfName(pdf, block.tpr.dpr.names, VerificationFilter.EGIZ_DICT_NAME); + if (egiz_index < 0) + { + throw new PDFDocumentException(301, "egiz_index = " + egiz_index); + } + + IndirectObjectReferenceParseResult egiz_dict_iorpr = (IndirectObjectReferenceParseResult) block.tpr.dpr.values.get(egiz_index); + // logger_.debug("egiz_dict_ir = " + egiz_dict_iorpr.ior.object_number + // + " " + egiz_dict_iorpr.ior.generation_number); + + IndirectObjectReference ior = egiz_dict_iorpr.ior; + + final int egiz_dict_offset = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(block.xpr, ior); + // logger_.debug("egiz_dict_offset = " + egiz_dict_offset); + + ObjectParseResult obj = PDFUtils.parseObject(pdf, egiz_dict_offset); + DictionaryParseResult egiz_dict = (DictionaryParseResult) obj.object; + + int kz_index = PDFUtils.indexOfName(pdf, egiz_dict.names, EGIZ_KZ_NAME); + if (kz_index < 0) + { + throw new PDFDocumentException(301, "kz_index = " + kz_index); + } + ArrayParseResult kz_apr = (ArrayParseResult) egiz_dict.values.get(kz_index); + + String kz_string = restoreKZ(pdf, kz_apr); + PdfASID kz = new PdfASID(kz_string); + + return kz; + } + + /** + * Restores the Kennzeichnung String from an Array. + * + * @param pdf + * The PDF. + * @param kz_apr + * The Array, as parsed from the EGIZ Dict. + * @return Returns the restored KZ. + * @throws PDFDocumentException + * Forwarded exception. + */ + public static String restoreKZ(byte[] pdf, ArrayParseResult kz_apr) throws PDFDocumentException + { + try + { + List partition = new ArrayList(); + + for (int i = 0; i < kz_apr.elements.size() / 2; i++) + { + NumberParseResult start_npr = (NumberParseResult) kz_apr.elements.get(i * 2); + NumberParseResult length_npr = (NumberParseResult) kz_apr.elements.get(i * 2 + 1); + + StringInfo si = new StringInfo(); + si.string_start = start_npr.number; + si.string_length = length_npr.number; + + partition.add(si); + } + + String KZ = Placeholder.reconstructStringFromPartition(pdf, partition, BinarySignature.ENCODING_WIN); + return KZ; + } + catch (IOException e1) + { + throw new PDFDocumentException(ErrorCode.DOCUMENT_CANNOT_BE_READ, e1); + } + } + + + protected static void printFoundHolders (String list_caption, List found_holders, PrintStream writer) + { + writer.println("------------------------------------"); + writer.println(list_caption + ": #=" + found_holders.size()); + + for (int i = 0; i < found_holders.size(); i++) + { + SignatureHolder holder = (SignatureHolder) found_holders.get(i); + String kz = "invalid"; + try + { + PdfASID kz_id = holder.getSignatureObject().getKZ(); + if (kz_id == null) + { + kz = "old signature"; + } + else + { + kz = kz_id.toString(); + } + } + catch (InvalidIDException e) + { + logger_.error(e.getMessage(), e); + } + writer.println(" holder[" + i + "]: " + holder.getSignatureObject().getSignationType() + ", KZ=" + kz); + } + + writer.println(":" + list_caption); + writer.println("------------------------------------"); + + } + + // TODO old code - remove +// public static void main(String[] args) throws IOException, PresentableException +// { +// SettingsReader.initializeForCommandLine(); +// +// File in = new File(args[0]); +// FileInputStream fis = new FileInputStream(in); +// byte[] pdf = new byte[(int) in.length()]; +// fis.read(pdf); +// fis.close(); +// +// String text = PdfAS.extractNormalizedTextTextual(pdf, pdf.length);; +// +// VerificationFilter vf = new VerificationFilter(); +// //List found = vf.extractSignaturesFromPdf(pdf); +// +// List found = vf.extractSignaturesFromPlainText(text); +// +// printFoundHolders("Final Holders", found, System.out); +// } +} -- cgit v1.2.3