diff options
Diffstat (limited to 'src/main/java')
| -rw-r--r-- | src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java | 20 | 
1 files changed, 17 insertions, 3 deletions
| diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java b/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java index 78e4eec..57e003a 100644 --- a/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java +++ b/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java @@ -1099,10 +1099,24 @@ public abstract class PdfAS       return ObjectExtractor.extractNonTextInfo(pdfDs);
    }
 +
 +  /**
 +   * @deprecated
 +   * Use version with explicit encoding {@link PdfAS#extractNormalizedTextTextual(PdfDataSource, String)}.
 +   *  This one uses cp1252. 
 +   * 
 +   * @param pdfDataSource
 +   * @return
 +   * @throws PresentableException
 +   */
 +  public static String extractNormalizedTextTextual(PdfDataSource pdfDataSource) throws PresentableException {
 +     return extractNormalizedTextTextual(pdfDataSource, "cp1252");
 +     
 +  }
 -  public static String extractNormalizedTextTextual(PdfDataSource pdfDataSource) throws PresentableException
 +  public static String extractNormalizedTextTextual(PdfDataSource pdfDataSource, String encoding) throws PresentableException
    {
 -     String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource);
 +     String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource, encoding);
       String document_text = normalizeText(raw_document_text);
       return document_text;   
    }
 @@ -1112,7 +1126,7 @@ public abstract class PdfAS    public static String extractNormalizedTextTextual(byte [] pdf,  int length) throws PresentableException
    {
      ByteArrayPdfDataSourceImpl pdfDataSource = new ByteArrayPdfDataSourceImpl(pdf, length);
 -    String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource);
 +    String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource, "cp1252");
      String document_text = normalizeText(raw_document_text);
      return document_text;   
    }
 | 
