diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java | 20 |
1 files changed, 17 insertions, 3 deletions
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java b/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java index 78e4eec..57e003a 100644 --- a/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java +++ b/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java @@ -1099,10 +1099,24 @@ public abstract class PdfAS return ObjectExtractor.extractNonTextInfo(pdfDs);
}
+
+ /**
+ * @deprecated
+ * Use version with explicit encoding {@link PdfAS#extractNormalizedTextTextual(PdfDataSource, String)}.
+ * This one uses cp1252.
+ *
+ * @param pdfDataSource
+ * @return
+ * @throws PresentableException
+ */
+ public static String extractNormalizedTextTextual(PdfDataSource pdfDataSource) throws PresentableException {
+ return extractNormalizedTextTextual(pdfDataSource, "cp1252");
+
+ }
- public static String extractNormalizedTextTextual(PdfDataSource pdfDataSource) throws PresentableException
+ public static String extractNormalizedTextTextual(PdfDataSource pdfDataSource, String encoding) throws PresentableException
{
- String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource);
+ String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource, encoding);
String document_text = normalizeText(raw_document_text);
return document_text;
}
@@ -1112,7 +1126,7 @@ public abstract class PdfAS public static String extractNormalizedTextTextual(byte [] pdf, int length) throws PresentableException
{
ByteArrayPdfDataSourceImpl pdfDataSource = new ByteArrayPdfDataSourceImpl(pdf, length);
- String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource);
+ String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource, "cp1252");
String document_text = normalizeText(raw_document_text);
return document_text;
}
|