diff options
| author | ferbas <ferbas@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> | 2010-01-05 14:37:21 +0000 | 
|---|---|---|
| committer | ferbas <ferbas@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> | 2010-01-05 14:37:21 +0000 | 
| commit | f7f25c895855b4fd4f3d778e26242385c58f0829 (patch) | |
| tree | 9a3505a77053256cfb6cd98735534814514152b2 /src/main/java | |
| parent | 1b303f5abfdadd03a9e863ebd3cb8713c4d67cc6 (diff) | |
| download | pdf-as-3-f7f25c895855b4fd4f3d778e26242385c58f0829.tar.gz pdf-as-3-f7f25c895855b4fd4f3d778e26242385c58f0829.tar.bz2 pdf-as-3-f7f25c895855b4fd4f3d778e26242385c58f0829.zip | |
added text sig version 1.2.0
fixed text extraction  encoding bug
git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@530 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
Diffstat (limited to 'src/main/java')
| -rw-r--r-- | src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java | 20 | 
1 files changed, 17 insertions, 3 deletions
| diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java b/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java index 78e4eec..57e003a 100644 --- a/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java +++ b/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java @@ -1099,10 +1099,24 @@ public abstract class PdfAS       return ObjectExtractor.extractNonTextInfo(pdfDs);
    }
 +
 +  /**
 +   * @deprecated
 +   * Use version with explicit encoding {@link PdfAS#extractNormalizedTextTextual(PdfDataSource, String)}.
 +   *  This one uses cp1252. 
 +   * 
 +   * @param pdfDataSource
 +   * @return
 +   * @throws PresentableException
 +   */
 +  public static String extractNormalizedTextTextual(PdfDataSource pdfDataSource) throws PresentableException {
 +     return extractNormalizedTextTextual(pdfDataSource, "cp1252");
 +     
 +  }
 -  public static String extractNormalizedTextTextual(PdfDataSource pdfDataSource) throws PresentableException
 +  public static String extractNormalizedTextTextual(PdfDataSource pdfDataSource, String encoding) throws PresentableException
    {
 -     String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource);
 +     String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource, encoding);
       String document_text = normalizeText(raw_document_text);
       return document_text;   
    }
 @@ -1112,7 +1126,7 @@ public abstract class PdfAS    public static String extractNormalizedTextTextual(byte [] pdf,  int length) throws PresentableException
    {
      ByteArrayPdfDataSourceImpl pdfDataSource = new ByteArrayPdfDataSourceImpl(pdf, length);
 -    String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource);
 +    String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource, "cp1252");
      String document_text = normalizeText(raw_document_text);
      return document_text;   
    }
 | 
