From cd02a128515400fdb97c276e70631d5bdb5ff509 Mon Sep 17 00:00:00 2001 From: netconomy Date: Mon, 17 Dec 2007 15:41:15 +0000 Subject: =?UTF-8?q?R=C3=BCckbau=20FULL=5FCONSERVATIVE=20partitioning?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@238 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../pdfas/impl/vfilter/VerificationFilterImpl.java | 43 ++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'src/main/java/at/gv/egiz/pdfas/impl') diff --git a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java index bd5a146..d192f7a 100644 --- a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java +++ b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java @@ -258,6 +258,15 @@ public class VerificationFilterImpl implements VerificationFilter List binarySignatures = extractBinarySignaturesOnly(pdf, blocks); SignatureHolder oldSignature = null; + + //List originalPartitions = partitions; + // This gives every IU block an own text partition + // This allows text signatures to be found correctly if there are + // IU blocks with disturbing text after them. + // On the other hand, these requires extra text extractions and + // signature searches and thereby is slow. + List flattedOutPartitions = flattenOutTextPartitions(partitions, blocks); + partitions = flattedOutPartitions; List partitionResults = new ArrayList(partitions.size()); for (int i = 0; i < partitions.size(); i++) @@ -320,6 +329,40 @@ public class VerificationFilterImpl implements VerificationFilter return signatureHolderChain; } + + protected List flattenOutTextPartitions (List partitions, List blocks) + { + + List blockPartitions = new ArrayList(blocks.size()); + Iterator it = partitions.iterator(); + while (it.hasNext()) + { + Partition p = (Partition)it.next(); + if (p instanceof TextPartition) + { + TextPartition tp = (TextPartition)p; + Iterator blockIt = tp.blocks.iterator(); + while (blockIt.hasNext()) + { + FooterParseResult fpr = (FooterParseResult)blockIt.next(); + TextPartition newPt = new TextPartition(); + newPt.blocks = new ArrayList(1); + newPt.blocks.add(fpr); + blockPartitions.add(newPt); + } + } + else + { + // binary partition + blockPartitions.add(p); + } + } + + // note: successive binary blocks are still combined to one binary partition + assert blockPartitions.size() <= blocks.size(); + + return blockPartitions; + } protected String extractText(PdfDataSource pdf, int endOfDocument) throws PresentableException -- cgit v1.2.3