diff options
author | netconomy <netconomy@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> | 2007-08-17 06:10:56 +0000 |
---|---|---|
committer | netconomy <netconomy@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> | 2007-08-17 06:10:56 +0000 |
commit | 3d982813b34f6f230baf4a467cdc37ec92a77595 (patch) | |
tree | 85319d39cee2ded1bb7a2b2dd9e8ea37e3778248 /src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java | |
parent | 07f6c8f33b2d700276fe6ec6339ff836c8710131 (diff) | |
download | pdf-as-3-3d982813b34f6f230baf4a467cdc37ec92a77595.tar.gz pdf-as-3-3d982813b34f6f230baf4a467cdc37ec92a77595.tar.bz2 pdf-as-3-3d982813b34f6f230baf4a467cdc37ec92a77595.zip |
Performance
git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@167 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
Diffstat (limited to 'src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java')
-rw-r--r-- | src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java | 26 |
1 files changed, 20 insertions, 6 deletions
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java index a52d6dd..668bbcb 100644 --- a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java +++ b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java @@ -23,6 +23,9 @@ import java.io.File; import java.io.IOException;
import java.io.InputStream;
+import at.gv.egiz.pdfas.performance.PerformanceCounters;
+import at.gv.egiz.pdfas.exceptions.pdf.TextExtractionException;
+
import org.apache.log4j.Logger;
import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.pdmodel.PDDocument;
@@ -31,7 +34,6 @@ import org.pdfbox.util.PDFTextStripper; import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger;
import at.knowcenter.wag.egov.egiz.cfg.SettingsReader;
import at.knowcenter.wag.egov.egiz.exceptions.PresentableException;
-import at.knowcenter.wag.egov.egiz.framework.signators.TextualSignator_1_0_0;
import com.lowagie.text.Document;
import com.lowagie.text.DocumentException;
@@ -52,6 +54,7 @@ public class TextualSignature * The logger definition.
*/
private static final Logger logger_ = ConfigLogger.getLogger(TextualSignature.class);
+
/**
* Extracts the document text from a given pdf.
*
@@ -61,8 +64,10 @@ public class TextualSignature * @throws PresentableException
* Forwarded exception.
*/
- public static String extractTextTextual(InputStream pdf_stream) throws PresentableException
+ public static String extractTextTextual(InputStream pdf_stream) throws TextExtractionException
{
+ PerformanceCounters.textExtractions.increment();
+
try
{
int first_page_rotation = 0;
@@ -79,8 +84,10 @@ public class TextualSignature //iText
PdfReader reader = new PdfReader(pdf_stream);
+ pdf_stream.close();
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ // PERF: PDF normalization needs byte array - this is costy
+ ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
// For some reason the Reader -> ImportPage -> Writer mechanism produces
// problems en mass.
@@ -136,7 +143,7 @@ public class TextualSignature // }
baos.close();
- byte[] normalizedPDF = baos.toByteArray();
+ byte[] normalizedPDF = baos.toByteArray();
ByteArrayInputStream bais = new ByteArrayInputStream(normalizedPDF);
//PDFBox-parser
@@ -164,9 +171,13 @@ public class TextualSignature return text;
}
- catch (Exception e)
+ catch (IOException e)
+ {
+ throw new TextExtractionException(e);
+ }
+ catch (DocumentException e)
{
- throw new PresentableException(e);
+ throw new TextExtractionException(e);
}
}
@@ -195,6 +206,9 @@ public class TextualSignature {
//iText
PdfReader reader = new PdfReader(input_pdf);
+ input_pdf.close();
+
+ // PERF: PDF Normalization needs byte array
ByteArrayOutputStream baos = new ByteArrayOutputStream();
// For some reason the Reader -> ImportPage -> Writer mechanism produces
// problems en mass.
|