From 85e574618b04a34d5e41444d17ce7e6d5a93cc5b Mon Sep 17 00:00:00 2001 From: netconomy Date: Thu, 6 Sep 2007 12:18:45 +0000 Subject: =?UTF-8?q?Streaming=20R=C3=BCckbau?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@210 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../java/at/knowcenter/wag/egov/egiz/PdfAS.java | 30 ++++++++++++++++------ 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java') diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java b/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java index e2505ea..6fc7b84 100644 --- a/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java +++ b/src/main/java/at/knowcenter/wag/egov/egiz/PdfAS.java @@ -28,6 +28,7 @@ import java.util.Vector; import at.gv.egiz.pdfas.commandline.CommandlineConnectorChooser; import at.gv.egiz.pdfas.exceptions.ErrorCode; +import at.gv.egiz.pdfas.impl.input.ByteArrayPdfDataSourceImpl; import at.gv.egiz.pdfas.impl.input.DelimitedInputStream; import at.gv.egiz.pdfas.impl.input.TextDataSourceImpl; import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper; @@ -1026,19 +1027,30 @@ public abstract class PdfAS // return extractNormalizedTextTextual(pdf, pdf.length); // } - public static String extractNormalizedTextTextual(InputStream pdfInputStream) throws PresentableException + public static String extractNormalizedTextTextual(PdfDataSource pdfDataSource) throws PresentableException { - String raw_document_text = TextualSignature.extractTextTextual(pdfInputStream); + String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource); String document_text = normalizeText(raw_document_text); return document_text; } - public static String extractNormalizedTextTextual(InputStream pdfInputStream, int length) throws PresentableException + /** + @deprecated + */ + public static String extractNormalizedTextTextual(byte [] pdf, int length) throws PresentableException { - DelimitedInputStream dis = new DelimitedInputStream(pdfInputStream, length); - String raw_document_text = TextualSignature.extractTextTextual(dis); + ByteArrayPdfDataSourceImpl pdfDataSource = new ByteArrayPdfDataSourceImpl(pdf, length); + String raw_document_text = TextualSignature.extractTextTextual(pdfDataSource); String document_text = normalizeText(raw_document_text); return document_text; } + +// public static String extractNormalizedTextTextual(InputStream pdfInputStream, int length) throws PresentableException +// { +// DelimitedInputStream dis = new DelimitedInputStream(pdfInputStream, length); +// String raw_document_text = TextualSignature.extractTextTextual(dis); +// String document_text = normalizeText(raw_document_text); +// return document_text; +// } // /** // * Extracts and normalizes the text from the pdf. // * @@ -1380,9 +1392,11 @@ public abstract class PdfAS { try { - InputStream is = pdfDataSource.createInputStream(); - PdfReader reader = new PdfReader(is); - is.close(); + //InputStream is = pdfDataSource.createInputStream(); + // PERF: byte array instead of stream + byte [] pdf_data = pdfDataSource.getAsByteArray(); + PdfReader reader = new PdfReader(pdf_data); + //is.close(); return reader; } catch (IOException e) -- cgit v1.2.3