From 535a04fa05f739ec16dd81666e3b0f82dfbd442d Mon Sep 17 00:00:00 2001 From: tknall Date: Wed, 9 Jan 2013 15:41:29 +0000 Subject: pdf-as-lib maven project files moved to pdf-as-lib git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/pdf-as/trunk@926 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../vfilter/helper/VerificationFilterHelper.java | 162 +++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java (limited to 'pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java') diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java new file mode 100644 index 0000000..69803e7 --- /dev/null +++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/vfilter/helper/VerificationFilterHelper.java @@ -0,0 +1,162 @@ +/** + * Copyright 2006 by Know-Center, Graz, Austria + * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a + * joint initiative of the Federal Chancellery Austria and Graz University of + * Technology. + * + * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by + * the European Commission - subsequent versions of the EUPL (the "Licence"); + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * http://www.osor.eu/eupl/ + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and + * limitations under the Licence. + * + * This product combines work with different licenses. See the "NOTICE" text + * file for details on the various modules and licenses. + * The "NOTICE" text file is part of the distribution. Any derivative works + * that you distribute must include a readable copy of the "NOTICE" text file. + */ +package at.gv.egiz.pdfas.impl.vfilter.helper; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import at.gv.egiz.pdfas.exceptions.framework.VerificationFilterException; +import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper; +import at.gv.egiz.pdfas.impl.vfilter.Partition; +import at.gv.egiz.pdfas.impl.vfilter.partition.BinaryPartition; +import at.gv.egiz.pdfas.impl.vfilter.partition.TextPartition; +import at.gv.egiz.pdfas.framework.input.PdfDataSource; +import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult; + +/** + * Contains helpful methods used by the VerificationFilter. + * + * @author wprinz + */ +public final class VerificationFilterHelper +{ + /** + * Partitions the list of Incremental Update blocks into text and binary + * partitions. + * + *

+ * A partition is a sequence of Incremental Update blocks of the same type. + *

+ *

+ * An Incremental Update block is considered to have the type "binary" if it + * contains an egiz dictionary. A block not containing an egiz dictionary is + * considert to have the type "text". + *

+ * + * @param pdf + * The PDF. + * @param blocks + * The Incremental Update blocks. + * @return Returns the partitioning of the blocks. + * @throws VerificationFilterException + * Thrown if something goes wrong. + */ + public static List partition(PdfDataSource pdf, List blocks) throws VerificationFilterException + { + List partitions = new ArrayList(blocks.size()); + + Iterator it = blocks.iterator(); + while (it.hasNext()) + { + FooterParseResult fpr = (FooterParseResult) it.next(); + + byte[] data = DataSourceHelper.convertDataSourceToByteArray(pdf); + if (VerificationFilterBinaryHelper.containsEGIZDict(data, fpr)) + { + BinaryPartition bp = null; + if (partitions.isEmpty() || ((Partition) partitions.get(partitions.size() - 1)).isTextPartition()) + { + bp = new BinaryPartition(); + bp.blocks = new ArrayList(blocks.size()); + partitions.add(bp); + } + else + { + bp = (BinaryPartition) partitions.get(partitions.size() - 1); + } + assert bp != null; + + bp.blocks.add(fpr); + } + else + { + TextPartition tp = null; + if (partitions.isEmpty() || !((Partition) partitions.get(partitions.size() - 1)).isTextPartition()) + { + tp = new TextPartition(); + tp.blocks = new ArrayList(blocks.size()); + partitions.add(tp); + } + else + { + tp = (TextPartition) partitions.get(partitions.size() - 1); + } + assert tp != null; + + tp.blocks.add(fpr); + } + } + + assert partitions.size() >= 1 : "There must be at least one partition"; + + return partitions; + } + + /** + * Determines the end of the given partiton. + * + * @param partition + * The partition. + * @return Returns the end index of the given partition. + */ + public static int getEndOfPartition(Partition partition) + { + List blocks = null; + if (partition instanceof TextPartition) + { + blocks = ((TextPartition) partition).blocks; + } + else + { + blocks = ((BinaryPartition) partition).blocks; + } + + return ((FooterParseResult) blocks.get(blocks.size() - 1)).next_index; + } + + /** + * Finds the last text partition in the given list of partitions. + * + * @param partitions + * The partitions. + * @return Returns the last TextPartition. + */ + public static TextPartition findLastTextPartition(List partitions) + { + Partition lastTextPartition = (Partition) partitions.get(partitions.size() - 1); + + if (!lastTextPartition.isTextPartition()) + { + assert partitions.size() > 1 : "The only one partition cannot be a binary partition - where is the original document?"; + Partition previousToLastPartition = (Partition) partitions.get(partitions.size() - 2); + assert previousToLastPartition.isTextPartition() : "The previous to last partition must be a text partition or something is wrong with the partitioning algorithm."; + + lastTextPartition = previousToLastPartition; + } + + return (TextPartition) lastTextPartition; + } + +} -- cgit v1.2.3