From 3d982813b34f6f230baf4a467cdc37ec92a77595 Mon Sep 17 00:00:00 2001 From: netconomy Date: Fri, 17 Aug 2007 06:10:56 +0000 Subject: Performance git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@167 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../framework/vfilter/VerificationFilter.java | 52 +++++++++++++++ .../vfilter/VerificationFilterParameters.java | 76 ++++++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 src/main/java/at/gv/egiz/pdfas/framework/vfilter/VerificationFilter.java create mode 100644 src/main/java/at/gv/egiz/pdfas/framework/vfilter/VerificationFilterParameters.java (limited to 'src/main/java/at/gv/egiz/pdfas/framework/vfilter') diff --git a/src/main/java/at/gv/egiz/pdfas/framework/vfilter/VerificationFilter.java b/src/main/java/at/gv/egiz/pdfas/framework/vfilter/VerificationFilter.java new file mode 100644 index 0000000..1633b09 --- /dev/null +++ b/src/main/java/at/gv/egiz/pdfas/framework/vfilter/VerificationFilter.java @@ -0,0 +1,52 @@ +/** + * + */ +package at.gv.egiz.pdfas.framework.vfilter; + +import java.util.List; + +import at.gv.egiz.pdfas.exceptions.framework.VerificationFilterException; +import at.gv.egiz.pdfas.framework.input.PdfDataSource; +import at.gv.egiz.pdfas.framework.input.TextDataSource; + +/** + * Extracts all signatures from a given PDF document or text. + * + * @see VerificationFilterParameters + * + * @author wprinz + */ +public interface VerificationFilter +{ + + /** + * Extracts the signatures from the given PDF. + * + * @param pdf + * The PDF. + * @param blocks + * The List of Incremental Update blocks. Usually this comes from a + * preprocessing step. + * @param parameters + * The algorithm parameters. + * @return Returns a List of SignatureHolders containing the signatures. May + * be empty in case no signatures have been found. + * @throws VerificationFilterException + * Thrown if something goes wrong. + */ + public List extractSignatureHolders(PdfDataSource pdf, List blocks, VerificationFilterParameters parameters) throws VerificationFilterException; + + /** + * Extracts the text signatures from the given free-text. + * + * @param text + * The free-text. + * @param parameters + * The algorithm parameters. + * @return Returns a List of SignatureHolders containing the signatures. May + * be empty in case no signatures have been found. + * @throws VerificationFilterException + * Thrown if something goes wrong. + */ + public List extractSignaturHolders(TextDataSource text, VerificationFilterParameters parameters) throws VerificationFilterException; +} diff --git a/src/main/java/at/gv/egiz/pdfas/framework/vfilter/VerificationFilterParameters.java b/src/main/java/at/gv/egiz/pdfas/framework/vfilter/VerificationFilterParameters.java new file mode 100644 index 0000000..c518fef --- /dev/null +++ b/src/main/java/at/gv/egiz/pdfas/framework/vfilter/VerificationFilterParameters.java @@ -0,0 +1,76 @@ +/** + * + */ +package at.gv.egiz.pdfas.framework.vfilter; + +/** + * The parameters of the VerificationFilter algorithm. + * + * @author wprinz + */ +public interface VerificationFilterParameters +{ + + /** + * Tells the VerificationFilter to extract binary signatures only. + * + *

+ * Not scanning for textual signatures allows the algorithm to skip text + * extraction and signature extraction, which are both time and memory + * intensive processes. + *

+ * + * @return Returns true if the VerificationFilter should extract binary + * signatures only. + */ + public boolean extractBinarySignaturesOnly(); + + /** + * Tells the VerificationFilter to assume that there are only singatures (and + * their Incremental Update blocks) younger than the original document. + * + *

+ * This is equivalent to saying that the document was not updated using an + * Incremental update block other than a signature after being singed. The + * incremental update blocks after the original document contain only + * signatures (either text or binary). + *

+ *

+ * This is equivalent to saying that there exists no Incremental Update block + * that would render a text signature before it invalid. + *

+ *

+ * Under this assumption, the process of finding all text signatures + * simplifies to one text extraction of the whole document and one signature + * extraction. This is of course a massive performance gain. + *

+ *

+ * Actually the algorithm performs a text extraction of the whole document not + * including trailing binary signature Incremental Update blocks. This means + * that if a the last n Incremental Update blocks of a document are binary, + * there is no use extract text from them. + *

+ *

+ * Note that if there are Incremental Update blocks with text after a + * signature thus this assumption does not hold the signatures older than this + * block will break. + *

+ * + * @return Returns true if the Verification filter should assume that there + * are only signature blocks after the original document. + */ + public boolean assumeOnlySignatureUpdateBlocks(); + + /** + * Tells the VerificationFilter so scan for old signatures in the rest text. + * + *

+ * The rest text is the text of the oldest text signature or the original + * document text if there is no text signature. + *

+ * + * @return Returns true if the VerificationFilter should scan for old text + * signatures in the rest text. + */ + public boolean scanForOldSignatures(); +} -- cgit v1.2.3