aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java
diff options
context:
space:
mode:
authornetconomy <netconomy@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2007-08-17 06:10:56 +0000
committernetconomy <netconomy@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2007-08-17 06:10:56 +0000
commit3d982813b34f6f230baf4a467cdc37ec92a77595 (patch)
tree85319d39cee2ded1bb7a2b2dd9e8ea37e3778248 /src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java
parent07f6c8f33b2d700276fe6ec6339ff836c8710131 (diff)
downloadpdf-as-3-3d982813b34f6f230baf4a467cdc37ec92a77595.tar.gz
pdf-as-3-3d982813b34f6f230baf4a467cdc37ec92a77595.tar.bz2
pdf-as-3-3d982813b34f6f230baf4a467cdc37ec92a77595.zip
Performance
git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@167 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
Diffstat (limited to 'src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java')
-rw-r--r--src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java26
1 files changed, 20 insertions, 6 deletions
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java
index a52d6dd..668bbcb 100644
--- a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java
+++ b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java
@@ -23,6 +23,9 @@ import java.io.File;
import java.io.IOException;
import java.io.InputStream;
+import at.gv.egiz.pdfas.performance.PerformanceCounters;
+import at.gv.egiz.pdfas.exceptions.pdf.TextExtractionException;
+
import org.apache.log4j.Logger;
import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.pdmodel.PDDocument;
@@ -31,7 +34,6 @@ import org.pdfbox.util.PDFTextStripper;
import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger;
import at.knowcenter.wag.egov.egiz.cfg.SettingsReader;
import at.knowcenter.wag.egov.egiz.exceptions.PresentableException;
-import at.knowcenter.wag.egov.egiz.framework.signators.TextualSignator_1_0_0;
import com.lowagie.text.Document;
import com.lowagie.text.DocumentException;
@@ -52,6 +54,7 @@ public class TextualSignature
* The logger definition.
*/
private static final Logger logger_ = ConfigLogger.getLogger(TextualSignature.class);
+
/**
* Extracts the document text from a given pdf.
*
@@ -61,8 +64,10 @@ public class TextualSignature
* @throws PresentableException
* Forwarded exception.
*/
- public static String extractTextTextual(InputStream pdf_stream) throws PresentableException
+ public static String extractTextTextual(InputStream pdf_stream) throws TextExtractionException
{
+ PerformanceCounters.textExtractions.increment();
+
try
{
int first_page_rotation = 0;
@@ -79,8 +84,10 @@ public class TextualSignature
//iText
PdfReader reader = new PdfReader(pdf_stream);
+ pdf_stream.close();
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ // PERF: PDF normalization needs byte array - this is costy
+ ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
// For some reason the Reader -> ImportPage -> Writer mechanism produces
// problems en mass.
@@ -136,7 +143,7 @@ public class TextualSignature
// }
baos.close();
- byte[] normalizedPDF = baos.toByteArray();
+ byte[] normalizedPDF = baos.toByteArray();
ByteArrayInputStream bais = new ByteArrayInputStream(normalizedPDF);
//PDFBox-parser
@@ -164,9 +171,13 @@ public class TextualSignature
return text;
}
- catch (Exception e)
+ catch (IOException e)
+ {
+ throw new TextExtractionException(e);
+ }
+ catch (DocumentException e)
{
- throw new PresentableException(e);
+ throw new TextExtractionException(e);
}
}
@@ -195,6 +206,9 @@ public class TextualSignature
{
//iText
PdfReader reader = new PdfReader(input_pdf);
+ input_pdf.close();
+
+ // PERF: PDF Normalization needs byte array
ByteArrayOutputStream baos = new ByteArrayOutputStream();
// For some reason the Reader -> ImportPage -> Writer mechanism produces
// problems en mass.