/** * Copyright 2006 by Know-Center, Graz, Austria * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a * joint initiative of the Federal Chancellery Austria and Graz University of * Technology. * * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by * the European Commission - subsequent versions of the EUPL (the "Licence"); * You may not use this work except in compliance with the Licence. * You may obtain a copy of the Licence at: * http://www.osor.eu/eupl/ * * Unless required by applicable law or agreed to in writing, software * distributed under the Licence is distributed on an "AS IS" basis, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Licence for the specific language governing permissions and * limitations under the Licence. * * This product combines work with different licenses. See the "NOTICE" text * file for details on the various modules and licenses. * The "NOTICE" text file is part of the distribution. Any derivative works * that you distribute must include a readable copy of the "NOTICE" text file. */ package at.gv.egiz.pdfas.impl.vfilter.helper; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import at.gv.egiz.pdfas.exceptions.framework.VerificationFilterException; import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper; import at.gv.egiz.pdfas.impl.vfilter.Partition; import at.gv.egiz.pdfas.impl.vfilter.partition.BinaryPartition; import at.gv.egiz.pdfas.impl.vfilter.partition.TextPartition; import at.gv.egiz.pdfas.framework.input.PdfDataSource; import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult; /** * Contains helpful methods used by the VerificationFilter. * * @author wprinz */ public final class VerificationFilterHelper { /** * Partitions the list of Incremental Update blocks into text and binary * partitions. * *

* A partition is a sequence of Incremental Update blocks of the same type. *

*

* An Incremental Update block is considered to have the type "binary" if it * contains an egiz dictionary. A block not containing an egiz dictionary is * considert to have the type "text". *

* * @param pdf * The PDF. * @param blocks * The Incremental Update blocks. * @return Returns the partitioning of the blocks. * @throws VerificationFilterException * Thrown if something goes wrong. */ public static List partition(PdfDataSource pdf, List blocks) throws VerificationFilterException { List partitions = new ArrayList(blocks.size()); Iterator it = blocks.iterator(); while (it.hasNext()) { FooterParseResult fpr = (FooterParseResult) it.next(); byte[] data = DataSourceHelper.convertDataSourceToByteArray(pdf); if (VerificationFilterBinaryHelper.containsEGIZDict(data, fpr)) { BinaryPartition bp = null; if (partitions.isEmpty() || ((Partition) partitions.get(partitions.size() - 1)).isTextPartition()) { bp = new BinaryPartition(); bp.blocks = new ArrayList(blocks.size()); partitions.add(bp); } else { bp = (BinaryPartition) partitions.get(partitions.size() - 1); } assert bp != null; bp.blocks.add(fpr); } else { TextPartition tp = null; if (partitions.isEmpty() || !((Partition) partitions.get(partitions.size() - 1)).isTextPartition()) { tp = new TextPartition(); tp.blocks = new ArrayList(blocks.size()); partitions.add(tp); } else { tp = (TextPartition) partitions.get(partitions.size() - 1); } assert tp != null; tp.blocks.add(fpr); } } assert partitions.size() >= 1 : "There must be at least one partition"; return partitions; } /** * Determines the end of the given partiton. * * @param partition * The partition. * @return Returns the end index of the given partition. */ public static int getEndOfPartition(Partition partition) { List blocks = null; if (partition instanceof TextPartition) { blocks = ((TextPartition) partition).blocks; } else { blocks = ((BinaryPartition) partition).blocks; } return ((FooterParseResult) blocks.get(blocks.size() - 1)).next_index; } /** * Finds the last text partition in the given list of partitions. * * @param partitions * The partitions. * @return Returns the last TextPartition. */ public static TextPartition findLastTextPartition(List partitions) { Partition lastTextPartition = (Partition) partitions.get(partitions.size() - 1); if (!lastTextPartition.isTextPartition()) { assert partitions.size() > 1 : "The only one partition cannot be a binary partition - where is the original document?"; Partition previousToLastPartition = (Partition) partitions.get(partitions.size() - 2); assert previousToLastPartition.isTextPartition() : "The previous to last partition must be a text partition or something is wrong with the partitioning algorithm."; lastTextPartition = previousToLastPartition; } return (TextPartition) lastTextPartition; } }