diff options
author | netconomy <netconomy@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> | 2007-12-17 15:41:15 +0000 |
---|---|---|
committer | netconomy <netconomy@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> | 2007-12-17 15:41:15 +0000 |
commit | cd02a128515400fdb97c276e70631d5bdb5ff509 (patch) | |
tree | 502bd301680327b3b50d16ad4cc20f4d14b4947f /src | |
parent | 8bf7db5feeba468ae6cbfe642b0aed899b00d535 (diff) | |
download | pdf-as-3-cd02a128515400fdb97c276e70631d5bdb5ff509.tar.gz pdf-as-3-cd02a128515400fdb97c276e70631d5bdb5ff509.tar.bz2 pdf-as-3-cd02a128515400fdb97c276e70631d5bdb5ff509.zip |
Rückbau FULL_CONSERVATIVE partitioning
git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@238 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
Diffstat (limited to 'src')
-rw-r--r-- | src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java index bd5a146..d192f7a 100644 --- a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java +++ b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java @@ -258,6 +258,15 @@ public class VerificationFilterImpl implements VerificationFilter List binarySignatures = extractBinarySignaturesOnly(pdf, blocks);
SignatureHolder oldSignature = null;
+
+ //List originalPartitions = partitions;
+ // This gives every IU block an own text partition
+ // This allows text signatures to be found correctly if there are
+ // IU blocks with disturbing text after them.
+ // On the other hand, these requires extra text extractions and
+ // signature searches and thereby is slow.
+ List flattedOutPartitions = flattenOutTextPartitions(partitions, blocks);
+ partitions = flattedOutPartitions;
List partitionResults = new ArrayList(partitions.size());
for (int i = 0; i < partitions.size(); i++)
@@ -320,6 +329,40 @@ public class VerificationFilterImpl implements VerificationFilter return signatureHolderChain;
}
+
+ protected List flattenOutTextPartitions (List partitions, List blocks)
+ {
+
+ List blockPartitions = new ArrayList(blocks.size());
+ Iterator it = partitions.iterator();
+ while (it.hasNext())
+ {
+ Partition p = (Partition)it.next();
+ if (p instanceof TextPartition)
+ {
+ TextPartition tp = (TextPartition)p;
+ Iterator blockIt = tp.blocks.iterator();
+ while (blockIt.hasNext())
+ {
+ FooterParseResult fpr = (FooterParseResult)blockIt.next();
+ TextPartition newPt = new TextPartition();
+ newPt.blocks = new ArrayList(1);
+ newPt.blocks.add(fpr);
+ blockPartitions.add(newPt);
+ }
+ }
+ else
+ {
+ // binary partition
+ blockPartitions.add(p);
+ }
+ }
+
+ // note: successive binary blocks are still combined to one binary partition
+ assert blockPartitions.size() <= blocks.size();
+
+ return blockPartitions;
+ }
protected String extractText(PdfDataSource pdf, int endOfDocument) throws PresentableException
|