aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java')
-rw-r--r--src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java30
1 files changed, 22 insertions, 8 deletions
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java b/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java
index e2505ea..6fc7b84 100644
--- a/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java
+++ b/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java
@@ -28,6 +28,7 @@ import java.util.Vector;
import at.gv.egiz.pdfas.commandline.CommandlineConnectorChooser;
import at.gv.egiz.pdfas.exceptions.ErrorCode;
+import at.gv.egiz.pdfas.impl.input.ByteArrayPdfDataSourceImpl;
import at.gv.egiz.pdfas.impl.input.DelimitedInputStream;
import at.gv.egiz.pdfas.impl.input.TextDataSourceImpl;
import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper;
@@ -1026,19 +1027,30 @@ public abstract class PdfAS
// return extractNormalizedTextTextual(pdf, pdf.length);
// }
- public static String extractNormalizedTextTextual(InputStream pdfInputStream) throws PresentableException
+ public static String extractNormalizedTextTextual(PdfDataSource pdfDataSource) throws PresentableException
{
- String raw_document_text = TextualSignature.extractTextTextual(pdfInputStream);
+ String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource);
String document_text = normalizeText(raw_document_text);
return document_text;
}
- public static String extractNormalizedTextTextual(InputStream pdfInputStream, int length) throws PresentableException
+ /**
+ @deprecated
+ */
+ public static String extractNormalizedTextTextual(byte [] pdf, int length) throws PresentableException
{
- DelimitedInputStream dis = new DelimitedInputStream(pdfInputStream, length);
- String raw_document_text = TextualSignature.extractTextTextual(dis);
+ ByteArrayPdfDataSourceImpl pdfDataSource = new ByteArrayPdfDataSourceImpl(pdf, length);
+ String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource);
String document_text = normalizeText(raw_document_text);
return document_text;
}
+
+// public static String extractNormalizedTextTextual(InputStream pdfInputStream, int length) throws PresentableException
+// {
+// DelimitedInputStream dis = new DelimitedInputStream(pdfInputStream, length);
+// String raw_document_text = TextualSignature.extractTextTextual(dis);
+// String document_text = normalizeText(raw_document_text);
+// return document_text;
+// }
// /**
// * Extracts and normalizes the text from the pdf.
// *
@@ -1380,9 +1392,11 @@ public abstract class PdfAS
{
try
{
- InputStream is = pdfDataSource.createInputStream();
- PdfReader reader = new PdfReader(is);
- is.close();
+ //InputStream is = pdfDataSource.createInputStream();
+ // PERF: byte array instead of stream
+ byte [] pdf_data = pdfDataSource.getAsByteArray();
+ PdfReader reader = new PdfReader(pdf_data);
+ //is.close();
return reader;
}
catch (IOException e)