diff options
Diffstat (limited to 'src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java')
-rw-r--r-- | src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java | 30 |
1 files changed, 22 insertions, 8 deletions
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java b/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java index e2505ea..6fc7b84 100644 --- a/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java +++ b/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java @@ -28,6 +28,7 @@ import java.util.Vector; import at.gv.egiz.pdfas.commandline.CommandlineConnectorChooser;
import at.gv.egiz.pdfas.exceptions.ErrorCode;
+import at.gv.egiz.pdfas.impl.input.ByteArrayPdfDataSourceImpl;
import at.gv.egiz.pdfas.impl.input.DelimitedInputStream;
import at.gv.egiz.pdfas.impl.input.TextDataSourceImpl;
import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper;
@@ -1026,19 +1027,30 @@ public abstract class PdfAS // return extractNormalizedTextTextual(pdf, pdf.length);
// }
- public static String extractNormalizedTextTextual(InputStream pdfInputStream) throws PresentableException
+ public static String extractNormalizedTextTextual(PdfDataSource pdfDataSource) throws PresentableException
{
- String raw_document_text = TextualSignature.extractTextTextual(pdfInputStream);
+ String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource);
String document_text = normalizeText(raw_document_text);
return document_text;
}
- public static String extractNormalizedTextTextual(InputStream pdfInputStream, int length) throws PresentableException
+ /**
+ @deprecated
+ */
+ public static String extractNormalizedTextTextual(byte [] pdf, int length) throws PresentableException
{
- DelimitedInputStream dis = new DelimitedInputStream(pdfInputStream, length);
- String raw_document_text = TextualSignature.extractTextTextual(dis);
+ ByteArrayPdfDataSourceImpl pdfDataSource = new ByteArrayPdfDataSourceImpl(pdf, length);
+ String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource);
String document_text = normalizeText(raw_document_text);
return document_text;
}
+
+// public static String extractNormalizedTextTextual(InputStream pdfInputStream, int length) throws PresentableException
+// {
+// DelimitedInputStream dis = new DelimitedInputStream(pdfInputStream, length);
+// String raw_document_text = TextualSignature.extractTextTextual(dis);
+// String document_text = normalizeText(raw_document_text);
+// return document_text;
+// }
// /**
// * Extracts and normalizes the text from the pdf.
// *
@@ -1380,9 +1392,11 @@ public abstract class PdfAS {
try
{
- InputStream is = pdfDataSource.createInputStream();
- PdfReader reader = new PdfReader(is);
- is.close();
+ //InputStream is = pdfDataSource.createInputStream();
+ // PERF: byte array instead of stream
+ byte [] pdf_data = pdfDataSource.getAsByteArray();
+ PdfReader reader = new PdfReader(pdf_data);
+ //is.close();
return reader;
}
catch (IOException e)
|