aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/at/gv/egiz/pdfas/framework/vfilter
diff options
context:
space:
mode:
authornetconomy <netconomy@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2007-08-17 06:10:56 +0000
committernetconomy <netconomy@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2007-08-17 06:10:56 +0000
commit3d982813b34f6f230baf4a467cdc37ec92a77595 (patch)
tree85319d39cee2ded1bb7a2b2dd9e8ea37e3778248 /src/main/java/at/gv/egiz/pdfas/framework/vfilter
parent07f6c8f33b2d700276fe6ec6339ff836c8710131 (diff)
downloadpdf-as-3-3d982813b34f6f230baf4a467cdc37ec92a77595.tar.gz
pdf-as-3-3d982813b34f6f230baf4a467cdc37ec92a77595.tar.bz2
pdf-as-3-3d982813b34f6f230baf4a467cdc37ec92a77595.zip
Performance
git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@167 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
Diffstat (limited to 'src/main/java/at/gv/egiz/pdfas/framework/vfilter')
-rw-r--r--src/main/java/at/gv/egiz/pdfas/framework/vfilter/VerificationFilter.java52
-rw-r--r--src/main/java/at/gv/egiz/pdfas/framework/vfilter/VerificationFilterParameters.java76
2 files changed, 128 insertions, 0 deletions
diff --git a/src/main/java/at/gv/egiz/pdfas/framework/vfilter/VerificationFilter.java b/src/main/java/at/gv/egiz/pdfas/framework/vfilter/VerificationFilter.java
new file mode 100644
index 0000000..1633b09
--- /dev/null
+++ b/src/main/java/at/gv/egiz/pdfas/framework/vfilter/VerificationFilter.java
@@ -0,0 +1,52 @@
+/**
+ *
+ */
+package at.gv.egiz.pdfas.framework.vfilter;
+
+import java.util.List;
+
+import at.gv.egiz.pdfas.exceptions.framework.VerificationFilterException;
+import at.gv.egiz.pdfas.framework.input.PdfDataSource;
+import at.gv.egiz.pdfas.framework.input.TextDataSource;
+
+/**
+ * Extracts all signatures from a given PDF document or text.
+ *
+ * @see VerificationFilterParameters
+ *
+ * @author wprinz
+ */
+public interface VerificationFilter
+{
+
+ /**
+ * Extracts the signatures from the given PDF.
+ *
+ * @param pdf
+ * The PDF.
+ * @param blocks
+ * The List of Incremental Update blocks. Usually this comes from a
+ * preprocessing step.
+ * @param parameters
+ * The algorithm parameters.
+ * @return Returns a List of SignatureHolders containing the signatures. May
+ * be empty in case no signatures have been found.
+ * @throws VerificationFilterException
+ * Thrown if something goes wrong.
+ */
+ public List extractSignatureHolders(PdfDataSource pdf, List blocks, VerificationFilterParameters parameters) throws VerificationFilterException;
+
+ /**
+ * Extracts the text signatures from the given free-text.
+ *
+ * @param text
+ * The free-text.
+ * @param parameters
+ * The algorithm parameters.
+ * @return Returns a List of SignatureHolders containing the signatures. May
+ * be empty in case no signatures have been found.
+ * @throws VerificationFilterException
+ * Thrown if something goes wrong.
+ */
+ public List extractSignaturHolders(TextDataSource text, VerificationFilterParameters parameters) throws VerificationFilterException;
+}
diff --git a/src/main/java/at/gv/egiz/pdfas/framework/vfilter/VerificationFilterParameters.java b/src/main/java/at/gv/egiz/pdfas/framework/vfilter/VerificationFilterParameters.java
new file mode 100644
index 0000000..c518fef
--- /dev/null
+++ b/src/main/java/at/gv/egiz/pdfas/framework/vfilter/VerificationFilterParameters.java
@@ -0,0 +1,76 @@
+/**
+ *
+ */
+package at.gv.egiz.pdfas.framework.vfilter;
+
+/**
+ * The parameters of the VerificationFilter algorithm.
+ *
+ * @author wprinz
+ */
+public interface VerificationFilterParameters
+{
+
+ /**
+ * Tells the VerificationFilter to extract binary signatures only.
+ *
+ * <p>
+ * Not scanning for textual signatures allows the algorithm to skip text
+ * extraction and signature extraction, which are both time and memory
+ * intensive processes.
+ * </p>
+ *
+ * @return Returns true if the VerificationFilter should extract binary
+ * signatures only.
+ */
+ public boolean extractBinarySignaturesOnly();
+
+ /**
+ * Tells the VerificationFilter to assume that there are only singatures (and
+ * their Incremental Update blocks) younger than the original document.
+ *
+ * <p>
+ * This is equivalent to saying that the document was not updated using an
+ * Incremental update block other than a signature after being singed. The
+ * incremental update blocks after the original document contain only
+ * signatures (either text or binary).
+ * </p>
+ * <p>
+ * This is equivalent to saying that there exists no Incremental Update block
+ * that would render a text signature before it invalid.
+ * </p>
+ * <p>
+ * Under this assumption, the process of finding all text signatures
+ * simplifies to one text extraction of the whole document and one signature
+ * extraction. This is of course a massive performance gain.
+ * </p>
+ * <p>
+ * Actually the algorithm performs a text extraction of the whole document not
+ * including trailing binary signature Incremental Update blocks. This means
+ * that if a the last n Incremental Update blocks of a document are binary,
+ * there is no use extract text from them.
+ * </p>
+ * <p>
+ * Note that if there are Incremental Update blocks with text after a
+ * signature thus this assumption does not hold the signatures older than this
+ * block will break.
+ * </p>
+ *
+ * @return Returns true if the Verification filter should assume that there
+ * are only signature blocks after the original document.
+ */
+ public boolean assumeOnlySignatureUpdateBlocks();
+
+ /**
+ * Tells the VerificationFilter so scan for old signatures in the rest text.
+ *
+ * <p>
+ * The rest text is the text of the oldest text signature or the original
+ * document text if there is no text signature.
+ * </p>
+ *
+ * @return Returns true if the VerificationFilter should scan for old text
+ * signatures in the rest text.
+ */
+ public boolean scanForOldSignatures();
+}