From 6025b6016517c6d898d8957d1d7e03ba71431912 Mon Sep 17 00:00:00 2001 From: tknall Date: Fri, 1 Dec 2006 12:20:24 +0000 Subject: Initial import of release 2.2. git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@4 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../verificators/TextualVerificator_1_0_0.java | 136 +++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 src/main/java/at/knowcenter/wag/egov/egiz/framework/verificators/TextualVerificator_1_0_0.java (limited to 'src/main/java/at/knowcenter/wag/egov/egiz/framework/verificators/TextualVerificator_1_0_0.java') diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/framework/verificators/TextualVerificator_1_0_0.java b/src/main/java/at/knowcenter/wag/egov/egiz/framework/verificators/TextualVerificator_1_0_0.java new file mode 100644 index 0000000..bbcebef --- /dev/null +++ b/src/main/java/at/knowcenter/wag/egov/egiz/framework/verificators/TextualVerificator_1_0_0.java @@ -0,0 +1,136 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: TextualVerificator_1_0_0.java,v 1.4 2006/10/31 08:08:00 wprinz Exp $ + */ +package at.knowcenter.wag.egov.egiz.framework.verificators; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.log4j.Logger; + +import at.knowcenter.wag.egov.egiz.PdfAS; +import at.knowcenter.wag.egov.egiz.PdfASID; +import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger; +import at.knowcenter.wag.egov.egiz.exceptions.PresentableException; +import at.knowcenter.wag.egov.egiz.framework.SignatorFactory; +import at.knowcenter.wag.egov.egiz.framework.Verificator; +import at.knowcenter.wag.egov.egiz.pdf.AbsoluteTextSignature; +import at.knowcenter.wag.egov.egiz.pdf.SignatureHolder; +import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult; + + +/** + * The textual verificator. + * + *

+ * All holders of the document so far including the block itself are extracted. + * If at least one has been found it is checked that this one doesn't belong to + * a previous block. + *

+ *

+ * Note that the KZ is not explicitely checked here, so this algorithm will find + * all blocks. + *

+ *

+ * Note that it will not find old style blocks as they don't have the SIG_KZ. + *

+ * + * @author wprinz + */ +public class TextualVerificator_1_0_0 implements Verificator +{ + /** + * The Pdf-AS ID of this Verificator. + */ + public static final PdfASID MY_ID = new PdfASID(SignatorFactory.VENDOR, SignatorFactory.TYPE_TEXTUAL, SignatorFactory.VERSION_1_0_0); + + /** + * The logger definition. + */ + private static final Logger logger_ = ConfigLogger.getLogger(TextualVerificator_1_0_0.class); + + /** + * Default constructor. + */ + public TextualVerificator_1_0_0() + { + // Default constructor. + } + + /** + * @see at.knowcenter.wag.egov.egiz.framework.Verificator#parseBlock(byte[], + * at.knowcenter.wag.exactparser.parsing.results.FooterParseResult, int) + */ + public List parseBlock(byte[] pdf, FooterParseResult block, + int start_of_whole_block) throws PresentableException + { + String block_text = PdfAS.extractNormalizedTextTextual(pdf, block.next_index); + + logger_.debug("Scanning block:"); + //List signature_holders = PdfAS.extractSignatureHoldersTextual(block_text, false); + List signature_holders = AbsoluteTextSignature.extractSignatureHoldersFromText(block_text); + logger_.debug(": end of Scanning block"); + + // logger_.debug("signature_holders = " + signature_holders.size()); + + if (signature_holders.isEmpty()) + { + return signature_holders; + } + + List text_holder_candidates = null; + if (start_of_whole_block > 0) + { + text_holder_candidates = new ArrayList(); + + String prev_text = PdfAS.extractNormalizedTextTextual(pdf, start_of_whole_block); + + logger_.debug("Scanning prev block:"); + //List prev_signature_holders = PdfAS.extractSignatureHoldersTextual(prev_text, false); + List prev_signature_holders = AbsoluteTextSignature.extractSignatureHoldersFromText(prev_text); + logger_.debug(": end of Scanning prev block"); + + // logger_.debug("prev_signature_holders = " + + // prev_signature_holders.size()); + + for (int i = prev_signature_holders.size(); i < signature_holders.size(); i++) + { + SignatureHolder holder = (SignatureHolder) signature_holders.get(i); + text_holder_candidates.add(holder); + } + } + else + { + logger_.debug("there is no prev - so all found signatures are possible candidates."); + text_holder_candidates = signature_holders; + } + + List text_holders = new ArrayList(); + for (int i = 0; i < text_holder_candidates.size(); i++) + { + SignatureHolder holder = (SignatureHolder) text_holder_candidates.get(i); + if (!holder.getSignatureObject().isTextual()) + { + logger_.debug("Skipping found signature block because it's not textual."); + continue; + } + text_holders.add(holder); + } + + return text_holders; + } +} -- cgit v1.2.3