aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java
diff options
context:
space:
mode:
authorferbas <ferbas@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2010-01-05 14:35:20 +0000
committerferbas <ferbas@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2010-01-05 14:35:20 +0000
commit1b303f5abfdadd03a9e863ebd3cb8713c4d67cc6 (patch)
tree3d945fe4b5655d86635dc4804f12d810f527ce59 /src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java
parentee4c98f1bb6e97c719be494238856299d821a054 (diff)
downloadpdf-as-3-1b303f5abfdadd03a9e863ebd3cb8713c4d67cc6.tar.gz
pdf-as-3-1b303f5abfdadd03a9e863ebd3cb8713c4d67cc6.tar.bz2
pdf-as-3-1b303f5abfdadd03a9e863ebd3cb8713c4d67cc6.zip
added text sig version 1.2.0
fixed text extraction encoding bug git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@529 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
Diffstat (limited to 'src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java')
-rw-r--r--src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java46
1 files changed, 43 insertions, 3 deletions
diff --git a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java
index acf6622..47f217a 100644
--- a/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java
+++ b/src/main/java/at/gv/egiz/pdfas/impl/vfilter/VerificationFilterImpl.java
@@ -504,14 +504,18 @@ public class VerificationFilterImpl implements VerificationFilter
return blockPartitions;
}
- protected String extractText(PdfDataSource pdf, int endOfDocument) throws PresentableException
+ protected String extractText(PdfDataSource pdf, int endOfDocument) throws PresentableException {
+ return extractText(pdf, endOfDocument, "utf8");
+ }
+ protected String extractText(PdfDataSource pdf, int endOfDocument, String encoding) throws PresentableException
{
+
log.debug("EXTRACTING TEXT... end index = " + endOfDocument);
DelimitedPdfDataSource dds = new DelimitedPdfDataSource(pdf, endOfDocument);
//DelimitedInputStream dis = new DelimitedInputStream(pdf.createInputStream(), endOfDocument);
- return PdfAS.extractNormalizedTextTextual(dds);
+ return PdfAS.extractNormalizedTextTextual(dds, encoding);
}
@@ -714,6 +718,42 @@ public class VerificationFilterImpl implements VerificationFilter
log.debug("Extracting signatures finished.");
log.debug("Number of found signatures: " + extractedSignatures.size());
+ if (extractedSignatures.size() > 0) {
+ List cp1252SignaturesPositions = new ArrayList();
+ //boolean iscp1252Sig = false;
+ for (int i = 0; i < extractedSignatures.size(); i++) {
+ SignatureHolder sh = (SignatureHolder)extractedSignatures.get(i);
+ PdfASID kzid = sh.getSignatureObject().getKZ();
+ if (kzid != null && kzid.isOldCp1252Version()) {
+ log.debug("found cp1252 signature");
+ cp1252SignaturesPositions.add(new Integer(i));
+ //iscp1252Sig = true;
+ //break;
+ }
+ }
+ if (cp1252SignaturesPositions.size() > 0) {
+ log.debug("redo text and signature extraction with cp1252 encoding");
+ extractedText = extractText(pdf, endOfDocument, "cp1252");
+ log.debug("Extracting text finished.");
+
+ log.debug("Extracting signatures:");
+ List cp1252ExtractedSignatures = extractNewSignaturesFromText(extractedText);
+ log.debug("Extracting signatures finished.");
+ log.debug("Number of found signatures: " + extractedSignatures.size());
+
+ if (cp1252ExtractedSignatures.size() != extractedSignatures.size()) {
+ log.error("Invalid cp1252 signatures found. Skipping cp1252 compatibility.");
+ }
+ // merge signature holders
+ for (int i = 0; i < cp1252SignaturesPositions.size(); i++) {
+ int replaceIndex = ((Integer)cp1252SignaturesPositions.get(i)).intValue();
+ extractedSignatures.remove(replaceIndex);
+ extractedSignatures.add(replaceIndex, cp1252ExtractedSignatures.get(replaceIndex));
+ }
+ }
+
+ }
+
if (log.isDebugEnabled())
{
log.debug("extracted signatures:");
@@ -788,7 +828,7 @@ public class VerificationFilterImpl implements VerificationFilter
DelimitedPdfDataSource dds = new DelimitedPdfDataSource(pdf, pdf.getLength());
String text = null;
try {
- text = PdfAS.extractNormalizedTextTextual(dds);
+ text = PdfAS.extractNormalizedTextTextual(dds, "utf-8");
} catch (PresentableException e) {
throw new VerificationFilterException(e);
}