From 3d982813b34f6f230baf4a467cdc37ec92a77595 Mon Sep 17 00:00:00 2001 From: netconomy Date: Fri, 17 Aug 2007 06:10:56 +0000 Subject: Performance git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@167 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../gv/egiz/pdfas/framework/input/DataSource.java | 35 ++++++++++++ .../pdfas/framework/input/ExtractionStage.java | 66 ++++++++++++++++++++++ .../egiz/pdfas/framework/input/PdfDataSource.java | 21 +++++++ .../egiz/pdfas/framework/input/TextDataSource.java | 19 +++++++ 4 files changed, 141 insertions(+) create mode 100644 src/main/java/at/gv/egiz/pdfas/framework/input/DataSource.java create mode 100644 src/main/java/at/gv/egiz/pdfas/framework/input/ExtractionStage.java create mode 100644 src/main/java/at/gv/egiz/pdfas/framework/input/PdfDataSource.java create mode 100644 src/main/java/at/gv/egiz/pdfas/framework/input/TextDataSource.java (limited to 'src/main/java/at/gv/egiz/pdfas/framework/input') diff --git a/src/main/java/at/gv/egiz/pdfas/framework/input/DataSource.java b/src/main/java/at/gv/egiz/pdfas/framework/input/DataSource.java new file mode 100644 index 0000000..265cb0c --- /dev/null +++ b/src/main/java/at/gv/egiz/pdfas/framework/input/DataSource.java @@ -0,0 +1,35 @@ +/** + * + */ +package at.gv.egiz.pdfas.framework.input; + +import java.io.InputStream; + +/** + * The input document data source. + * + *

+ * Usually this is a PdfDataSource, but it may be a TextDataSource as well. + *

+ * + * @author wprinz + * + */ +public interface DataSource +{ + /** + * Creates a new InputStream that allows to read out the document's binary + * data from the beginning. + * + * @return Returns the InputStream with the binary data. + */ + public InputStream createInputStream(); + + /** + * Returns the length (number of bytes) of the stream. + * + * @return Returns the length (number of bytes) of the stream. + */ + public int getLength(); + +} diff --git a/src/main/java/at/gv/egiz/pdfas/framework/input/ExtractionStage.java b/src/main/java/at/gv/egiz/pdfas/framework/input/ExtractionStage.java new file mode 100644 index 0000000..36d9bd8 --- /dev/null +++ b/src/main/java/at/gv/egiz/pdfas/framework/input/ExtractionStage.java @@ -0,0 +1,66 @@ +/** + * + */ +package at.gv.egiz.pdfas.framework.input; + +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import at.gv.egiz.pdfas.exceptions.ErrorCode; +import at.gv.egiz.pdfas.exceptions.framework.VerificationFilterException; +import at.gv.egiz.pdfas.framework.vfilter.VerificationFilter; +import at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters; +import at.gv.egiz.pdfas.impl.input.IncrementalUpdateParser; +import at.gv.egiz.pdfas.impl.vfilter.VerificationFilterImpl; +import at.knowcenter.wag.egov.egiz.exceptions.PDFDocumentException; +import at.knowcenter.wag.egov.egiz.exceptions.PresentableException; + +/** + * Extracts all signatures from a given input DataSource. + * + * @author wprinz + */ +public class ExtractionStage +{ + /** + * The log. + */ + private static final Log log = LogFactory.getLog(ExtractionStage.class); + + public List extractSignatureHolders(final DataSource dataSource, VerificationFilterParameters parameters) throws PresentableException + { + if (dataSource instanceof PdfDataSource) + { + PdfDataSource pdfDataSource = (PdfDataSource) dataSource; + + List blocks = parsePdfIntoBlocks(pdfDataSource); + + VerificationFilter vf = new VerificationFilterImpl(); + List signatures = vf.extractSignatureHolders(pdfDataSource, blocks, parameters); + + return signatures; + } + + if (dataSource instanceof TextDataSource) + { + TextDataSource textDataSource = (TextDataSource) dataSource; + + VerificationFilter vf = new VerificationFilterImpl(); + List signatures = vf.extractSignaturHolders(textDataSource, parameters); + + return signatures; + } + + String msg = "The input DataSource is neither pdf nor text. class.name = " + dataSource.getClass().getName(); + log.error(msg); + throw new VerificationFilterException(ErrorCode.DOCUMENT_CANNOT_BE_READ, msg); + } + + protected List parsePdfIntoBlocks(PdfDataSource pdfDataSource) throws PDFDocumentException + { + List blocks = IncrementalUpdateParser.parsePdfIntoIUBlocks(pdfDataSource); + return blocks; + } +} diff --git a/src/main/java/at/gv/egiz/pdfas/framework/input/PdfDataSource.java b/src/main/java/at/gv/egiz/pdfas/framework/input/PdfDataSource.java new file mode 100644 index 0000000..b03a67e --- /dev/null +++ b/src/main/java/at/gv/egiz/pdfas/framework/input/PdfDataSource.java @@ -0,0 +1,21 @@ +/** + * + */ +package at.gv.egiz.pdfas.framework.input; + + +/** + * Represents the binary data of a PDF document. + * + *

+ * This interface allows Pdf data to be handled in an abstract way so that the + * storage (byta array, disk etc.) of pdf documents can be separated from the + * algorithms. + *

+ * + * @author wprinz + */ +public interface PdfDataSource extends DataSource +{ + // jsut a marker interface +} diff --git a/src/main/java/at/gv/egiz/pdfas/framework/input/TextDataSource.java b/src/main/java/at/gv/egiz/pdfas/framework/input/TextDataSource.java new file mode 100644 index 0000000..c5fd4b1 --- /dev/null +++ b/src/main/java/at/gv/egiz/pdfas/framework/input/TextDataSource.java @@ -0,0 +1,19 @@ +/** + * + */ +package at.gv.egiz.pdfas.framework.input; + +/** + * Represents a free-text input text to be processed. + * + * @author wprinz + */ +public interface TextDataSource extends DataSource +{ + /** + * Returns the text to be processed. + * @return Returns the text to be processed. + */ + public String getText(); + +} -- cgit v1.2.3