From 3d982813b34f6f230baf4a467cdc37ec92a77595 Mon Sep 17 00:00:00 2001 From: netconomy Date: Fri, 17 Aug 2007 06:10:56 +0000 Subject: Performance git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@167 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../wag/egov/egiz/pdf/TextualSignature.java | 26 +++++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java') diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java index a52d6dd..668bbcb 100644 --- a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java +++ b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java @@ -23,6 +23,9 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import at.gv.egiz.pdfas.performance.PerformanceCounters; +import at.gv.egiz.pdfas.exceptions.pdf.TextExtractionException; + import org.apache.log4j.Logger; import org.pdfbox.pdfparser.PDFParser; import org.pdfbox.pdmodel.PDDocument; @@ -31,7 +34,6 @@ import org.pdfbox.util.PDFTextStripper; import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger; import at.knowcenter.wag.egov.egiz.cfg.SettingsReader; import at.knowcenter.wag.egov.egiz.exceptions.PresentableException; -import at.knowcenter.wag.egov.egiz.framework.signators.TextualSignator_1_0_0; import com.lowagie.text.Document; import com.lowagie.text.DocumentException; @@ -52,6 +54,7 @@ public class TextualSignature * The logger definition. */ private static final Logger logger_ = ConfigLogger.getLogger(TextualSignature.class); + /** * Extracts the document text from a given pdf. * @@ -61,8 +64,10 @@ public class TextualSignature * @throws PresentableException * Forwarded exception. */ - public static String extractTextTextual(InputStream pdf_stream) throws PresentableException + public static String extractTextTextual(InputStream pdf_stream) throws TextExtractionException { + PerformanceCounters.textExtractions.increment(); + try { int first_page_rotation = 0; @@ -79,8 +84,10 @@ public class TextualSignature //iText PdfReader reader = new PdfReader(pdf_stream); + pdf_stream.close(); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); + // PERF: PDF normalization needs byte array - this is costy + ByteArrayOutputStream baos = new ByteArrayOutputStream(4096); // For some reason the Reader -> ImportPage -> Writer mechanism produces // problems en mass. @@ -136,7 +143,7 @@ public class TextualSignature // } baos.close(); - byte[] normalizedPDF = baos.toByteArray(); + byte[] normalizedPDF = baos.toByteArray(); ByteArrayInputStream bais = new ByteArrayInputStream(normalizedPDF); //PDFBox-parser @@ -164,9 +171,13 @@ public class TextualSignature return text; } - catch (Exception e) + catch (IOException e) + { + throw new TextExtractionException(e); + } + catch (DocumentException e) { - throw new PresentableException(e); + throw new TextExtractionException(e); } } @@ -195,6 +206,9 @@ public class TextualSignature { //iText PdfReader reader = new PdfReader(input_pdf); + input_pdf.close(); + + // PERF: PDF Normalization needs byte array ByteArrayOutputStream baos = new ByteArrayOutputStream(); // For some reason the Reader -> ImportPage -> Writer mechanism produces // problems en mass. -- cgit v1.2.3