aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/at/gv/egiz/pdfas/framework/input
diff options
context:
space:
mode:
authornetconomy <netconomy@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2007-08-17 06:10:56 +0000
committernetconomy <netconomy@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2007-08-17 06:10:56 +0000
commit3d982813b34f6f230baf4a467cdc37ec92a77595 (patch)
tree85319d39cee2ded1bb7a2b2dd9e8ea37e3778248 /src/main/java/at/gv/egiz/pdfas/framework/input
parent07f6c8f33b2d700276fe6ec6339ff836c8710131 (diff)
downloadpdf-as-3-3d982813b34f6f230baf4a467cdc37ec92a77595.tar.gz
pdf-as-3-3d982813b34f6f230baf4a467cdc37ec92a77595.tar.bz2
pdf-as-3-3d982813b34f6f230baf4a467cdc37ec92a77595.zip
Performance
git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@167 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
Diffstat (limited to 'src/main/java/at/gv/egiz/pdfas/framework/input')
-rw-r--r--src/main/java/at/gv/egiz/pdfas/framework/input/DataSource.java35
-rw-r--r--src/main/java/at/gv/egiz/pdfas/framework/input/ExtractionStage.java66
-rw-r--r--src/main/java/at/gv/egiz/pdfas/framework/input/PdfDataSource.java21
-rw-r--r--src/main/java/at/gv/egiz/pdfas/framework/input/TextDataSource.java19
4 files changed, 141 insertions, 0 deletions
diff --git a/src/main/java/at/gv/egiz/pdfas/framework/input/DataSource.java b/src/main/java/at/gv/egiz/pdfas/framework/input/DataSource.java
new file mode 100644
index 0000000..265cb0c
--- /dev/null
+++ b/src/main/java/at/gv/egiz/pdfas/framework/input/DataSource.java
@@ -0,0 +1,35 @@
+/**
+ *
+ */
+package at.gv.egiz.pdfas.framework.input;
+
+import java.io.InputStream;
+
+/**
+ * The input document data source.
+ *
+ * <p>
+ * Usually this is a PdfDataSource, but it may be a TextDataSource as well.
+ * </p>
+ *
+ * @author wprinz
+ *
+ */
+public interface DataSource
+{
+ /**
+ * Creates a new InputStream that allows to read out the document's binary
+ * data from the beginning.
+ *
+ * @return Returns the InputStream with the binary data.
+ */
+ public InputStream createInputStream();
+
+ /**
+ * Returns the length (number of bytes) of the stream.
+ *
+ * @return Returns the length (number of bytes) of the stream.
+ */
+ public int getLength();
+
+}
diff --git a/src/main/java/at/gv/egiz/pdfas/framework/input/ExtractionStage.java b/src/main/java/at/gv/egiz/pdfas/framework/input/ExtractionStage.java
new file mode 100644
index 0000000..36d9bd8
--- /dev/null
+++ b/src/main/java/at/gv/egiz/pdfas/framework/input/ExtractionStage.java
@@ -0,0 +1,66 @@
+/**
+ *
+ */
+package at.gv.egiz.pdfas.framework.input;
+
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import at.gv.egiz.pdfas.exceptions.ErrorCode;
+import at.gv.egiz.pdfas.exceptions.framework.VerificationFilterException;
+import at.gv.egiz.pdfas.framework.vfilter.VerificationFilter;
+import at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters;
+import at.gv.egiz.pdfas.impl.input.IncrementalUpdateParser;
+import at.gv.egiz.pdfas.impl.vfilter.VerificationFilterImpl;
+import at.knowcenter.wag.egov.egiz.exceptions.PDFDocumentException;
+import at.knowcenter.wag.egov.egiz.exceptions.PresentableException;
+
+/**
+ * Extracts all signatures from a given input DataSource.
+ *
+ * @author wprinz
+ */
+public class ExtractionStage
+{
+ /**
+ * The log.
+ */
+ private static final Log log = LogFactory.getLog(ExtractionStage.class);
+
+ public List extractSignatureHolders(final DataSource dataSource, VerificationFilterParameters parameters) throws PresentableException
+ {
+ if (dataSource instanceof PdfDataSource)
+ {
+ PdfDataSource pdfDataSource = (PdfDataSource) dataSource;
+
+ List blocks = parsePdfIntoBlocks(pdfDataSource);
+
+ VerificationFilter vf = new VerificationFilterImpl();
+ List signatures = vf.extractSignatureHolders(pdfDataSource, blocks, parameters);
+
+ return signatures;
+ }
+
+ if (dataSource instanceof TextDataSource)
+ {
+ TextDataSource textDataSource = (TextDataSource) dataSource;
+
+ VerificationFilter vf = new VerificationFilterImpl();
+ List signatures = vf.extractSignaturHolders(textDataSource, parameters);
+
+ return signatures;
+ }
+
+ String msg = "The input DataSource is neither pdf nor text. class.name = " + dataSource.getClass().getName();
+ log.error(msg);
+ throw new VerificationFilterException(ErrorCode.DOCUMENT_CANNOT_BE_READ, msg);
+ }
+
+ protected List parsePdfIntoBlocks(PdfDataSource pdfDataSource) throws PDFDocumentException
+ {
+ List blocks = IncrementalUpdateParser.parsePdfIntoIUBlocks(pdfDataSource);
+ return blocks;
+ }
+}
diff --git a/src/main/java/at/gv/egiz/pdfas/framework/input/PdfDataSource.java b/src/main/java/at/gv/egiz/pdfas/framework/input/PdfDataSource.java
new file mode 100644
index 0000000..b03a67e
--- /dev/null
+++ b/src/main/java/at/gv/egiz/pdfas/framework/input/PdfDataSource.java
@@ -0,0 +1,21 @@
+/**
+ *
+ */
+package at.gv.egiz.pdfas.framework.input;
+
+
+/**
+ * Represents the binary data of a PDF document.
+ *
+ * <p>
+ * This interface allows Pdf data to be handled in an abstract way so that the
+ * storage (byta array, disk etc.) of pdf documents can be separated from the
+ * algorithms.
+ * </p>
+ *
+ * @author wprinz
+ */
+public interface PdfDataSource extends DataSource
+{
+ // jsut a marker interface
+}
diff --git a/src/main/java/at/gv/egiz/pdfas/framework/input/TextDataSource.java b/src/main/java/at/gv/egiz/pdfas/framework/input/TextDataSource.java
new file mode 100644
index 0000000..c5fd4b1
--- /dev/null
+++ b/src/main/java/at/gv/egiz/pdfas/framework/input/TextDataSource.java
@@ -0,0 +1,19 @@
+/**
+ *
+ */
+package at.gv.egiz.pdfas.framework.input;
+
+/**
+ * Represents a free-text input text to be processed.
+ *
+ * @author wprinz
+ */
+public interface TextDataSource extends DataSource
+{
+ /**
+ * Returns the text to be processed.
+ * @return Returns the text to be processed.
+ */
+ public String getText();
+
+}