aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorferbas <ferbas@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2010-01-05 14:37:21 +0000
committerferbas <ferbas@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2010-01-05 14:37:21 +0000
commitf7f25c895855b4fd4f3d778e26242385c58f0829 (patch)
tree9a3505a77053256cfb6cd98735534814514152b2
parent1b303f5abfdadd03a9e863ebd3cb8713c4d67cc6 (diff)
downloadpdf-as-3-f7f25c895855b4fd4f3d778e26242385c58f0829.tar.gz
pdf-as-3-f7f25c895855b4fd4f3d778e26242385c58f0829.tar.bz2
pdf-as-3-f7f25c895855b4fd4f3d778e26242385c58f0829.zip
added text sig version 1.2.0
fixed text extraction encoding bug git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@530 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
-rw-r--r--src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java20
1 files changed, 17 insertions, 3 deletions
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java b/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java
index 78e4eec..57e003a 100644
--- a/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java
+++ b/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java
@@ -1099,10 +1099,24 @@ public abstract class PdfAS
return ObjectExtractor.extractNonTextInfo(pdfDs);
}
+
+ /**
+ * @deprecated
+ * Use version with explicit encoding {@link PdfAS#extractNormalizedTextTextual(PdfDataSource, String)}.
+ * This one uses cp1252.
+ *
+ * @param pdfDataSource
+ * @return
+ * @throws PresentableException
+ */
+ public static String extractNormalizedTextTextual(PdfDataSource pdfDataSource) throws PresentableException {
+ return extractNormalizedTextTextual(pdfDataSource, "cp1252");
+
+ }
- public static String extractNormalizedTextTextual(PdfDataSource pdfDataSource) throws PresentableException
+ public static String extractNormalizedTextTextual(PdfDataSource pdfDataSource, String encoding) throws PresentableException
{
- String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource);
+ String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource, encoding);
String document_text = normalizeText(raw_document_text);
return document_text;
}
@@ -1112,7 +1126,7 @@ public abstract class PdfAS
public static String extractNormalizedTextTextual(byte [] pdf, int length) throws PresentableException
{
ByteArrayPdfDataSourceImpl pdfDataSource = new ByteArrayPdfDataSourceImpl(pdf, length);
- String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource);
+ String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource, "cp1252");
String document_text = normalizeText(raw_document_text);
return document_text;
}