From 535a04fa05f739ec16dd81666e3b0f82dfbd442d Mon Sep 17 00:00:00 2001 From: tknall Date: Wed, 9 Jan 2013 15:41:29 +0000 Subject: pdf-as-lib maven project files moved to pdf-as-lib git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/pdf-as/trunk@926 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../verificators/TextualVerificator_1_0_0.java | 147 +++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/framework/verificators/TextualVerificator_1_0_0.java (limited to 'pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/framework/verificators/TextualVerificator_1_0_0.java') diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/framework/verificators/TextualVerificator_1_0_0.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/framework/verificators/TextualVerificator_1_0_0.java new file mode 100644 index 0000000..58302be --- /dev/null +++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/framework/verificators/TextualVerificator_1_0_0.java @@ -0,0 +1,147 @@ +/** + * Copyright 2006 by Know-Center, Graz, Austria + * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a + * joint initiative of the Federal Chancellery Austria and Graz University of + * Technology. + * + * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by + * the European Commission - subsequent versions of the EUPL (the "Licence"); + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * http://www.osor.eu/eupl/ + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and + * limitations under the Licence. + * + * This product combines work with different licenses. See the "NOTICE" text + * file for details on the various modules and licenses. + * The "NOTICE" text file is part of the distribution. Any derivative works + * that you distribute must include a readable copy of the "NOTICE" text file. + * + * $Id: TextualVerificator_1_0_0.java,v 1.5 2006/11/28 07:45:09 wprinz Exp $ + */ +package at.knowcenter.wag.egov.egiz.framework.verificators; + +import java.io.ByteArrayInputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.log4j.Logger; + +import at.knowcenter.wag.egov.egiz.PdfAS; +import at.knowcenter.wag.egov.egiz.PdfASID; +import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger; +import at.knowcenter.wag.egov.egiz.exceptions.PresentableException; +import at.knowcenter.wag.egov.egiz.framework.SignatorFactory; +import at.knowcenter.wag.egov.egiz.framework.Verificator; +import at.knowcenter.wag.egov.egiz.pdf.AbsoluteTextSignature; +import at.knowcenter.wag.egov.egiz.pdf.SignatureHolder; +import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult; + + +/** + * The textual verificator. + * + *

+ * All holders of the document so far including the block itself are extracted. + * If at least one has been found it is checked that this one doesn't belong to + * a previous block. + *

+ *

+ * Note that the KZ is not explicitely checked here, so this algorithm will find + * all blocks. + *

+ *

+ * Note that it will not find old style blocks as they don't have the SIG_KZ. + *

+ * + * @deprecated moved to the new framework + * + * @author wprinz + */ +public class TextualVerificator_1_0_0 implements Verificator +{ + /** + * The Pdf-AS ID of this Verificator. + */ + public static final PdfASID MY_ID = new PdfASID(SignatorFactory.VENDOR, SignatorFactory.TYPE_TEXTUAL, SignatorFactory.VERSION_1_0_0); + + /** + * The logger definition. + */ + private static final Logger logger_ = ConfigLogger.getLogger(TextualVerificator_1_0_0.class); + + /** + * Default constructor. + */ + public TextualVerificator_1_0_0() + { + // Default constructor. + } + + /** + * @see at.knowcenter.wag.egov.egiz.framework.Verificator#parseBlock(byte[], + * at.knowcenter.wag.exactparser.parsing.results.FooterParseResult, int) + */ + public List parseBlock(byte[] pdf, FooterParseResult block, + int start_of_whole_block) throws PresentableException + { + String block_text = PdfAS.extractNormalizedTextTextual(pdf, block.next_index); + + logger_.debug("Scanning block:"); + //List signature_holders = PdfAS.extractSignatureHoldersTextual(block_text, false); + List signature_holders = AbsoluteTextSignature.extractSignatureHoldersFromText(block_text); + logger_.debug(": end of Scanning block"); + + // logger_.debug("signature_holders = " + signature_holders.size()); + + if (signature_holders.isEmpty()) + { + return signature_holders; + } + + List text_holder_candidates = null; + if (start_of_whole_block > 0) + { + text_holder_candidates = new ArrayList(); + + String prev_text = PdfAS.extractNormalizedTextTextual(pdf, start_of_whole_block); + + logger_.debug("Scanning prev block:"); + //List prev_signature_holders = PdfAS.extractSignatureHoldersTextual(prev_text, false); + List prev_signature_holders = AbsoluteTextSignature.extractSignatureHoldersFromText(prev_text); + logger_.debug(": end of Scanning prev block"); + + // logger_.debug("prev_signature_holders = " + + // prev_signature_holders.size()); + + for (int i = prev_signature_holders.size(); i < signature_holders.size(); i++) + { + SignatureHolder holder = (SignatureHolder) signature_holders.get(i); + text_holder_candidates.add(holder); + } + } + else + { + logger_.debug("there is no prev - so all found signatures are possible candidates."); + text_holder_candidates = signature_holders; + } + + List text_holders = new ArrayList(); + for (int i = 0; i < text_holder_candidates.size(); i++) + { + SignatureHolder holder = (SignatureHolder) text_holder_candidates.get(i); + if (!holder.getSignatureObject().isTextual()) + { + logger_.debug("Skipping found signature block because it's not textual."); + continue; + } + text_holders.add(holder); + } + + return text_holders; + } +} -- cgit v1.2.3