/** * Copyright (c) 2006 by Know-Center, Graz, Austria * * This software is the confidential and proprietary information of Know-Center, * Graz, Austria. You shall not disclose such Confidential Information and shall * use it only in accordance with the terms of the license agreement you entered * into with Know-Center. * * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS * DERIVATIVES. * * $Id: VerificationFilter.java,v 1.5 2006/10/31 08:07:20 wprinz Exp $ */ package at.knowcenter.wag.egov.egiz.framework; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.PrintStream; import java.util.ArrayList; import java.util.List; import org.apache.log4j.Logger; import at.knowcenter.wag.egov.egiz.PdfAS; import at.knowcenter.wag.egov.egiz.PdfASID; import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger; import at.knowcenter.wag.egov.egiz.cfg.SettingsReader; import at.knowcenter.wag.egov.egiz.exceptions.InvalidIDException; import at.knowcenter.wag.egov.egiz.exceptions.NormalizeException; import at.knowcenter.wag.egov.egiz.exceptions.PDFDocumentException; import at.knowcenter.wag.egov.egiz.exceptions.PresentableException; import at.knowcenter.wag.egov.egiz.exceptions.SignatureException; import at.knowcenter.wag.egov.egiz.exceptions.SignatureTypesException; import at.knowcenter.wag.egov.egiz.framework.verificators.BinaryVerificator_1_0_0; import at.knowcenter.wag.egov.egiz.framework.verificators.TextualVerificator_1_0_0; import at.knowcenter.wag.egov.egiz.pdf.AbsoluteTextSignature; import at.knowcenter.wag.egov.egiz.pdf.BinarySignature; import at.knowcenter.wag.egov.egiz.pdf.Placeholder; import at.knowcenter.wag.egov.egiz.pdf.SignatureHolder; import at.knowcenter.wag.egov.egiz.pdf.StringInfo; import at.knowcenter.wag.exactparser.ParseDocument; import at.knowcenter.wag.exactparser.parsing.IndirectObjectReference; import at.knowcenter.wag.exactparser.parsing.PDFUtils; import at.knowcenter.wag.exactparser.parsing.results.ArrayParseResult; import at.knowcenter.wag.exactparser.parsing.results.DictionaryParseResult; import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult; import at.knowcenter.wag.exactparser.parsing.results.IndirectObjectReferenceParseResult; import at.knowcenter.wag.exactparser.parsing.results.NumberParseResult; import at.knowcenter.wag.exactparser.parsing.results.ObjectParseResult; /** * This filter transforms an arbitrary input pdf into an ordered List of * SignatureHolders for verification. * *

* The pdf document is parsed and the signature blocks (textual, binary, etc.) * are extracted as verifyable SignatureHolder objects in the order they appear * in the document. *

* * @author wprinz */ public class VerificationFilter { public static final byte[] EGIZ_DICT_NAME = { 'E', 'G', 'I', 'Z', 'S', 'i', 'g', 'D', 'i', 'c', 't' }; public static final byte[] EGIZ_KZ_NAME = { 'I', 'D' }; /** * The logger definition. */ private static final Logger logger_ = ConfigLogger.getLogger(VerificationFilter.class); /** * Default constructor. */ public VerificationFilter() { // empty block. } /** * Extracts the List of SignatureHolders from the given PDF document. * * @param pdf * The PDF document. * @return Returns the ordered List of SignatureHolder objects (the first * signature will be at index 0) extracted from the document or an * empty list, if none could be found. * @throws PresentableException */ public List extractSignaturesFromPdf(final byte[] pdf) throws PresentableException { List holders = new ArrayList(); List blocks = null; try { blocks = ParseDocument.parseDocument(pdf); } catch (Exception e) { throw new PDFDocumentException(201); } // for (int i = 0; i < blocks.size(); i++) // { // FooterParseResult bpr = (FooterParseResult) blocks.get(i); // // logger_.debug("block[" + i + "] from " + bpr.start_index + " to // // " + bpr.next_index); // } unrollLinearization(blocks); for (int i = 0; i < blocks.size(); i++) { FooterParseResult bpr = (FooterParseResult) blocks.get(i); int prev_end = 0; if (i > 0) { FooterParseResult prev_bpr = (FooterParseResult) blocks.get(i - 1); prev_end = prev_bpr.next_index; } // logger_.debug("block from " + prev_end + " to " + // bpr.next_index); if (containsEGIZDict(pdf, bpr)) { logger_.debug("Parsing Binary Sig:"); PdfASID kz = extractKZFromEGIZBlock(pdf, bpr); if (!kz.toString().equals(BinaryVerificator_1_0_0.MY_ID.toString())) { logger_.debug("Warning: Binary Kennzeichnung not recognized:" + kz.toString()); } Verificator verificator = new BinaryVerificator_1_0_0(); List binary_holders = verificator.parseBlock(pdf, bpr, prev_end); holders.addAll(binary_holders); logger_.debug(":Parsing Binary Sig END - holders.size = " + holders.size()); } else { logger_.debug("Extracting text for: " + prev_end + " to " + bpr.next_index); Verificator verificator = new TextualVerificator_1_0_0(); List text_holders = verificator.parseBlock(pdf, bpr, prev_end); logger_.debug("text_holders = " + text_holders.size()); if (prev_end == 0) { String rest_text = null; if (!text_holders.isEmpty()) { SignatureHolder first_holder = (SignatureHolder) text_holders.get(0); rest_text = first_holder.getSignedText(); } else { rest_text = PdfAS.extractNormalizedTextTextual(pdf, bpr.next_index); } List old_holders = PdfAS.extractSignatureHoldersTextual(rest_text, true); logger_.debug("Found old holders = " + old_holders.size()); if (!old_holders.isEmpty()) { // there must be only one old holder. holders.add(0, old_holders.get(0)); } } if (!text_holders.isEmpty()) { List actual_text_holders = throwOutBinHolders(text_holders); holders.addAll(actual_text_holders); } logger_.debug(":Extracting tex END - holders.size = " + holders.size()); } } for (int i = 0; i < holders.size(); i++) { SignatureHolder holder = (SignatureHolder) holders.get(i); PdfASID kz = holder.getSignatureObject().getKZ(); if (kz != null) { checkKZ(kz); } } return holders; } /** * Checks, if the given KZ is recognized by this application or logs a warning if it isn't. * @param kz The Kennzeichnung. */ protected void checkKZ (PdfASID kz) { if (!kz.getVendor().equals(SignatorFactory.VENDOR)) { logger_.warn("The vendor " + kz.getVendor() + " isn't known by this application."); } if (!kz.getVersion().equals(SignatorFactory.VERSION_1_0_0)) { logger_.warn("The version " + kz.getVersion() + " istn't supported by this application. This might cause problems."); } } /** * Throws out SignatureHolders with a binary KZ. * * @param text_holders * The List of SignatureHolder objects. * @return Returns the List of SignatureHolder objects, where no object has * binary KZ. */ private List throwOutBinHolders(List text_holders) { List actual_text_holders = new ArrayList(); for (int i = 0; i < text_holders.size(); i++) { SignatureHolder sh = (SignatureHolder) text_holders.get(i); PdfASID kz = null; try { kz = sh.getSignatureObject().getKZ(); } catch (InvalidIDException e) { e.printStackTrace(); } if (kz != null && kz.getType().equals(SignatorFactory.TYPE_BINARY)) { logger_.info("Throwing out binary signature: " + kz); continue; } actual_text_holders.add(sh); } return actual_text_holders; } /** * Removes the linearization footer from the list of update blocks. * * @param blocks * The list of FooterParseResult objects in \prev order. */ protected void unrollLinearization(List blocks) { int linearization_index = -1; for (int i = 0; i < blocks.size(); i++) { FooterParseResult bpr = (FooterParseResult) blocks.get(i); if (bpr.sxpr.xref_index == 0) { if (linearization_index >= 0) { throw new RuntimeException("There is more than one linearization block! index = " + i); } linearization_index = i; } } if (linearization_index >= 0) { // logger_.debug("The document is linearized - unrolling // linearization block " + linearization_index); blocks.remove(linearization_index); } } /** * Extracts the List of SignatureHolders from the given plain text document. * *

* Note that this can only extract text signatures. *

* * @param raw_text * The plain text document. * @return Returns the ordered List of SignatureHolder objects (the first * signature will be at index 0) extracted from the document or an * empty list, if none could be found. * @throws SignatureException * @throws PDFDocumentException * @throws SignatureTypesException * @throws NormalizeException */ public List extractSignaturesFromPlainText(final String raw_text) throws PDFDocumentException, SignatureException, SignatureTypesException, NormalizeException { String normalized_text = PdfAS.normalizeText(raw_text); //List text_holders = PdfAS.extractSignatureHoldersTextual(normalized_text, false); List text_holders = AbsoluteTextSignature.extractSignatureHoldersFromText(normalized_text); String rest_text = normalized_text; if (!text_holders.isEmpty()) { SignatureHolder holder = (SignatureHolder) text_holders.get(0); rest_text = holder.getSignedText(); } List old_holders = PdfAS.extractSignatureHoldersTextual(rest_text, true); if (!old_holders.isEmpty()) { text_holders.addAll(0, old_holders); } List actual_text_holders = throwOutBinHolders(text_holders); return actual_text_holders; } /** * Tells, if the given incremental update block contains a binary signature. * *

* According to definition, if a block is a binary block, it must/cannot * contain other signatures than this one. *

* * @param block * The incremental update block. * @return Returns true, if this block is a binary signature block, false * otherwise. */ protected boolean containsEGIZDict(final byte[] pdf, final FooterParseResult block) { int dict_index = PDFUtils.indexOfName(pdf, block.tpr.dpr.names, EGIZ_DICT_NAME); if (dict_index <= 0) { return false; } return true; } /** * Extracts the PDF AS ID of the egiz block. * * @param pdf * The pdf. * @param block * The IU block. * @return Returns the extracted PDF AS ID. * @throws PDFDocumentException * Forwarded exception. * @throws InvalidIDException * Forwarded exception. */ protected PdfASID extractKZFromEGIZBlock(final byte[] pdf, final FooterParseResult block) throws PDFDocumentException, InvalidIDException { int egiz_index = PDFUtils.indexOfName(pdf, block.tpr.dpr.names, VerificationFilter.EGIZ_DICT_NAME); if (egiz_index < 0) { throw new PDFDocumentException(301, "egiz_index = " + egiz_index); } IndirectObjectReferenceParseResult egiz_dict_iorpr = (IndirectObjectReferenceParseResult) block.tpr.dpr.values.get(egiz_index); // logger_.debug("egiz_dict_ir = " + egiz_dict_iorpr.ior.object_number // + " " + egiz_dict_iorpr.ior.generation_number); IndirectObjectReference ior = egiz_dict_iorpr.ior; final int egiz_dict_offset = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(block.xpr, ior); // logger_.debug("egiz_dict_offset = " + egiz_dict_offset); ObjectParseResult obj = PDFUtils.parseObject(pdf, egiz_dict_offset); DictionaryParseResult egiz_dict = (DictionaryParseResult) obj.object; int kz_index = PDFUtils.indexOfName(pdf, egiz_dict.names, EGIZ_KZ_NAME); if (kz_index < 0) { throw new PDFDocumentException(301, "kz_index = " + kz_index); } ArrayParseResult kz_apr = (ArrayParseResult) egiz_dict.values.get(kz_index); String kz_string = restoreKZ(pdf, kz_apr); PdfASID kz = new PdfASID(kz_string); return kz; } /** * Restores the Kennzeichnung String from an Array. * * @param pdf * The PDF. * @param kz_apr * The Array, as parsed from the EGIZ Dict. * @return Returns the restored KZ. * @throws PDFDocumentException * Forwarded exception. */ public static String restoreKZ(byte[] pdf, ArrayParseResult kz_apr) throws PDFDocumentException { try { List partition = new ArrayList(); for (int i = 0; i < kz_apr.elements.size() / 2; i++) { NumberParseResult start_npr = (NumberParseResult) kz_apr.elements.get(i * 2); NumberParseResult length_npr = (NumberParseResult) kz_apr.elements.get(i * 2 + 1); StringInfo si = new StringInfo(); si.string_start = start_npr.number; si.string_length = length_npr.number; partition.add(si); } String KZ = Placeholder.reconstructStringFromPartition(pdf, partition, BinarySignature.ENCODING_WIN); return KZ; } catch (IOException e1) { throw new PDFDocumentException(201); } } protected static void printFoundHolders (String list_caption, List found_holders, PrintStream writer) { writer.println("------------------------------------"); writer.println(list_caption + ": #=" + found_holders.size()); for (int i = 0; i < found_holders.size(); i++) { SignatureHolder holder = (SignatureHolder) found_holders.get(i); String kz = "invalid"; try { PdfASID kz_id = holder.getSignatureObject().getKZ(); if (kz_id == null) { kz = "old signature"; } else { kz = kz_id.toString(); } } catch (InvalidIDException e) { e.printStackTrace(); } writer.println(" holder[" + i + "]: " + holder.getSignatureObject().getSignationType() + ", KZ=" + kz); } writer.println(":" + list_caption); writer.println("------------------------------------"); } public static void main(String[] args) throws IOException, PresentableException { SettingsReader.initializeForCommandLine(); File in = new File(args[0]); FileInputStream fis = new FileInputStream(in); byte[] pdf = new byte[(int) in.length()]; fis.read(pdf); fis.close(); String text = PdfAS.extractNormalizedTextTextual(pdf, pdf.length);; VerificationFilter vf = new VerificationFilter(); //List found = vf.extractSignaturesFromPdf(pdf); List found = vf.extractSignaturesFromPlainText(text); printFoundHolders("Final Holders", found, System.out); } }