diff options
author | ferbas <ferbas@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> | 2010-01-05 14:37:21 +0000 |
---|---|---|
committer | ferbas <ferbas@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> | 2010-01-05 14:37:21 +0000 |
commit | f7f25c895855b4fd4f3d778e26242385c58f0829 (patch) | |
tree | 9a3505a77053256cfb6cd98735534814514152b2 /src/main/java/at/knowcenter/wag/egov | |
parent | 1b303f5abfdadd03a9e863ebd3cb8713c4d67cc6 (diff) | |
download | pdf-as-3-f7f25c895855b4fd4f3d778e26242385c58f0829.tar.gz pdf-as-3-f7f25c895855b4fd4f3d778e26242385c58f0829.tar.bz2 pdf-as-3-f7f25c895855b4fd4f3d778e26242385c58f0829.zip |
added text sig version 1.2.0
fixed text extraction encoding bug
git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@530 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
Diffstat (limited to 'src/main/java/at/knowcenter/wag/egov')
-rw-r--r-- | src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java | 20 |
1 files changed, 17 insertions, 3 deletions
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java b/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java index 78e4eec..57e003a 100644 --- a/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java +++ b/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java @@ -1099,10 +1099,24 @@ public abstract class PdfAS return ObjectExtractor.extractNonTextInfo(pdfDs);
}
+
+ /**
+ * @deprecated
+ * Use version with explicit encoding {@link PdfAS#extractNormalizedTextTextual(PdfDataSource, String)}.
+ * This one uses cp1252.
+ *
+ * @param pdfDataSource
+ * @return
+ * @throws PresentableException
+ */
+ public static String extractNormalizedTextTextual(PdfDataSource pdfDataSource) throws PresentableException {
+ return extractNormalizedTextTextual(pdfDataSource, "cp1252");
+
+ }
- public static String extractNormalizedTextTextual(PdfDataSource pdfDataSource) throws PresentableException
+ public static String extractNormalizedTextTextual(PdfDataSource pdfDataSource, String encoding) throws PresentableException
{
- String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource);
+ String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource, encoding);
String document_text = normalizeText(raw_document_text);
return document_text;
}
@@ -1112,7 +1126,7 @@ public abstract class PdfAS public static String extractNormalizedTextTextual(byte [] pdf, int length) throws PresentableException
{
ByteArrayPdfDataSourceImpl pdfDataSource = new ByteArrayPdfDataSourceImpl(pdf, length);
- String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource);
+ String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource, "cp1252");
String document_text = normalizeText(raw_document_text);
return document_text;
}
|