aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java')
-rw-r--r--src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java656
1 files changed, 656 insertions, 0 deletions
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java
new file mode 100644
index 0000000..5523041
--- /dev/null
+++ b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java
@@ -0,0 +1,656 @@
+/**
+ * <copyright> Copyright (c) 2006 by Know-Center, Graz, Austria </copyright>
+ *
+ * This software is the confidential and proprietary information of Know-Center,
+ * Graz, Austria. You shall not disclose such Confidential Information and shall
+ * use it only in accordance with the terms of the license agreement you entered
+ * into with Know-Center.
+ *
+ * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
+ * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
+ * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY
+ * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS
+ * DERIVATIVES.
+ *
+ * $Id: AbsoluteTextSignature.java,v 1.1 2006/10/31 08:08:33 wprinz Exp $
+ */package at.knowcenter.wag.egov.egiz.pdf;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Vector;
+
+import org.apache.log4j.Logger;
+
+import at.knowcenter.wag.egov.egiz.PdfAS;
+import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger;
+import at.knowcenter.wag.egov.egiz.exceptions.SignatureException;
+import at.knowcenter.wag.egov.egiz.exceptions.SignatureTypesException;
+import at.knowcenter.wag.egov.egiz.framework.FoundBlock;
+import at.knowcenter.wag.egov.egiz.framework.FoundKey;
+import at.knowcenter.wag.egov.egiz.sig.SignatureObject;
+import at.knowcenter.wag.egov.egiz.sig.SignatureTypeDefinition;
+import at.knowcenter.wag.egov.egiz.sig.SignatureTypes;
+
+/**
+ * Contains methods and helpers that implement the absolute text signature.
+ * @author wprinz
+ */
+public class AbsoluteTextSignature
+{
+
+ /**
+ * The logger definition.
+ */
+ private static final Logger logger = ConfigLogger.getLogger(AbsoluteTextSignature.class);
+
+
+ /**
+ * Extracts all signature holders from a given text.
+ *
+ * <p>
+ * First the latest signature holder is extracted. Then the latest signature
+ * holder in the rest text, which is the second latest one, is extracted. Then
+ * the third latest signature holder is extracted and so forth until no more
+ * signature holders are found.
+ * </p>
+ *
+ * @param text
+ * The text.
+ * @return Returns the List of extracted signature holders ordered by their
+ * date ascendingly (the lowest, earliest date first, the latest,
+ * newest date last). An empty list is returned if no signature
+ * holders were found.
+ * @throws SignatureException
+ * F.e.
+ * @throws SignatureTypesException
+ * F.e.
+ */
+ public static List extractSignatureHoldersFromText(String text) throws SignatureException, SignatureTypesException
+ {
+ List holders = new ArrayList();
+
+ String current_text = text;
+ for (;;)
+ {
+ SignatureHolder signature_holder = extractLatestBlock(current_text);
+ if (signature_holder == null)
+ {
+ break;
+ }
+
+ holders.add(0, signature_holder);
+
+ current_text = signature_holder.getSignedText();
+ }
+
+ return holders;
+ }
+
+ /**
+ * Extracts the latest signature block from the given text and creates a
+ * SignatureHolder object that can be verified.
+ *
+ * @param text
+ * The text.
+ * @return Returns the SignatureObject extracted from the text, or null, if no
+ * latest block was found.
+ * @throws SignatureException
+ * F.e.
+ * @throws SignatureTypesException
+ * F.e.
+ */
+ public static SignatureHolder extractLatestBlock(String text) throws SignatureException, SignatureTypesException
+ {
+ FoundBlock latest_block = findLatestBlock(text);
+ if (latest_block == null)
+ {
+ return null;
+ }
+
+ String reconstructed_text = cutOutBlock(text, latest_block);
+
+ SignatureObject so = createSignatureObjectFromFoundBlock(text, latest_block);
+ TextualSignatureHolder tsh = new TextualSignatureHolder(reconstructed_text, so);
+
+ return tsh;
+ }
+
+ /**
+ * Finds the latest signature block for a given text.
+ *
+ * <p>
+ * The latest block is the one with the highest, most recent date. Usually
+ * this block will be extracted (cut out) of the text which will result in the
+ * originally signed text of this signature to be verified using the cut out
+ * data.
+ * </p>
+ *
+ * @param text
+ * The text to be analyzed.
+ * @return Returns the latest found block or null, if there was none.
+ * @throws SignatureException
+ * F.e.
+ * @throws SignatureTypesException
+ * F.e.
+ */
+ public static FoundBlock findLatestBlock(String text) throws SignatureException, SignatureTypesException
+ {
+// try
+// {
+// writeTextToFile(text, new File("C:\\wprinz\\text.utf8.txt"));
+// }
+// catch (IOException e)
+// {
+// e.printStackTrace();
+// }
+
+ SignatureTypes sig_types = SignatureTypes.getInstance();
+ List signatureTypes_ = sig_types.getSignatureTypeDefinitions();
+
+ List found_candidates = new ArrayList();
+
+ for (int i = 0; i < signatureTypes_.size(); i++)
+ {
+ SignatureTypeDefinition block_type = (SignatureTypeDefinition) signatureTypes_.get(i);
+ List found_candidates_for_type = findPotentialSignaturesForProfile(text, block_type);
+
+ found_candidates.addAll(found_candidates_for_type);
+ }
+
+ if (found_candidates.isEmpty())
+ {
+ logger.debug("no candidates found at all");
+ return null;
+ }
+
+ logger.debug("checking block integrity");
+ for (int i = 0; i < found_candidates.size(); i++)
+ {
+ FoundBlock found_block = (FoundBlock) found_candidates.get(i);
+
+ String date_value = getDateValue(text, found_block);
+ logger.debug("date_value = " + date_value);
+ EGIZDate date = EGIZDate.parseFromString(date_value);
+
+ logger.debug("found_block = " + date + " - " + found_block);
+
+ checkBlockIntegrity(text, found_block);
+ }
+
+ sortFoundBlocksByDate(text, found_candidates);
+
+ logger.debug("sorted blocks:");
+ for (int i = 0; i < found_candidates.size(); i++)
+ {
+ FoundBlock found_block = (FoundBlock) found_candidates.get(i);
+
+ String date_value = getDateValue(text, found_block);
+ EGIZDate date = EGIZDate.parseFromString(date_value);
+
+ logger.debug(" #" + i + ": " + date + " - " + found_block);
+ }
+
+ List latest_blocks = filterLastDateEqualBlocks(text, found_candidates);
+ logger.debug("latest blocks:");
+ for (int i = 0; i < latest_blocks.size(); i++)
+ {
+ FoundBlock found_block = (FoundBlock) latest_blocks.get(i);
+
+ String date_value = getDateValue(text, found_block);
+ EGIZDate date = EGIZDate.parseFromString(date_value);
+
+ logger.debug(" #" + i + ": " + date + " - " + found_block);
+ }
+
+ boolean semantic_equality = PdfAS.checkForSemanticEquality(latest_blocks);
+ logger.debug("semantic_equality = " + semantic_equality);
+ if (!semantic_equality)
+ {
+ throw new SignatureException(314, "The latest blocks weren't semantically equal.");
+ }
+
+ FoundBlock latest_block = (FoundBlock) latest_blocks.get(0);
+ logger.debug("latest block = " + latest_block);
+ return latest_block;
+ }
+
+ /**
+ * Finds the List of potential blocks within the given text for the given
+ * profile.
+ *
+ * @param text
+ * The text, in which potential block are to be sought.
+ * @param block_type
+ * The profile for which the text is to be sought.
+ * @return Returns the List of potential FoundBlocks or an empty List if none
+ * could be found.
+ */
+ public static List findPotentialSignaturesForProfile(String text,
+ SignatureTypeDefinition block_type)
+ {
+ logger.debug("find potential signatures for " + block_type.getType());
+
+ List found_blocks = new ArrayList();
+
+ final boolean old_style = false;
+
+ Vector keys = block_type.getRevertSortedKeys();
+ Vector captions = block_type.getRevertSortedCaptions();
+
+ String last_key = (String) keys.get(0);
+ logger.debug("last_key = " + last_key);
+ String last_caption = (String) captions.get(0);
+ logger.debug("last_caption = " + last_caption);
+
+ List found_last_captions = findIndices(text, last_caption);
+ if (logger.isDebugEnabled())
+ {
+ logger.debug("found " + found_last_captions.size() + " last captions.");
+ for (int i = 0; i < found_last_captions.size(); i++)
+ {
+ logger.debug(" found last caption at index " + found_last_captions.get(i));
+ }
+ }
+
+ for (int lci = 0; lci < found_last_captions.size(); lci++)
+ {
+ int last_caption_index = ((Integer) found_last_captions.get(lci)).intValue();
+ logger.debug("resolving signature block from last caption index " + last_caption_index);
+
+ int potential_block_end = findEndOfValue(text, last_caption_index);
+ logger.debug("potential_block_end = " + potential_block_end);
+
+ List found_keys = PdfAS.findBlockInText(text.substring(0, potential_block_end), block_type, old_style); // findRestKeys(text,
+ // keys,
+ // captions,
+ // last_caption_index);
+
+ if (found_keys == null)
+ {
+ logger.debug("Not all other captions could be found for the last_caption_index " + last_caption_index + " ==> discarding this index.");
+
+ continue;
+ }
+
+ // sort found keys ascendingly
+ PdfAS.sortFoundKeysAscendingly(found_keys);
+
+ boolean reverse_check_ok = reverseCheckFoundKeys(text, found_keys);
+ if (!reverse_check_ok)
+ {
+ logger.debug("The reverse check ruled this list of found keys out ==> they are discarded.");
+
+ continue;
+ }
+
+ logger.debug("The reverse check proved this list of found keys out ==> adding them as potential candidates.");
+
+ FoundBlock found_block = new FoundBlock();
+ found_block.std = block_type;
+ found_block.found_keys = found_keys;
+ found_block.end_index = findEndOfValue(text, last_caption_index);
+ found_blocks.add(found_block);
+ }
+
+ logger.debug("found " + found_blocks.size() + " potential signatures for " + block_type.getType());
+ return found_blocks;
+ }
+
+ /**
+ * Finds all indices of the given subtext within a given text.
+ *
+ * <p>
+ * This is usually used to find the indices of the last captions.
+ * </p>
+ *
+ * @param text
+ * The text to be searched.
+ * @param subtext
+ * The subtext to be sought.
+ * @return Returns the List of found indices.
+ */
+ public static List findIndices(String text, String subtext)
+ {
+ List found_indices = new ArrayList();
+ int search_from_index = 0;
+ for (;;)
+ {
+ int found_index = text.indexOf(subtext, search_from_index);
+ if (found_index < 0)
+ {
+ break;
+ }
+ found_indices.add(new Integer(found_index));
+ search_from_index = found_index + subtext.length();
+ }
+ return found_indices;
+ }
+
+ /**
+ * Finds the other keys/captions according to their order starting from the
+ * last_caption index upwards.
+ *
+ * @param text
+ * The text.
+ * @param keys
+ * The list of keys.
+ * @param captions
+ * The list of captions.
+ * @param last_caption_index
+ * The index of the last caption.
+ * @return Returns the List of found keys, if all keys could be found, or null
+ * if not all keys could be found.
+ */
+ public static List findRestKeys(String text, List keys, List captions,
+ int last_caption_index)
+ {
+ List found_keys = new ArrayList();
+
+ FoundKey last_caption_found_key = new FoundKey((String) keys.get(0), (String) captions.get(0), last_caption_index);
+ found_keys.add(last_caption_found_key);
+
+ String rest_text = text.substring(0, last_caption_index);
+
+ for (int i = 1; i < captions.size(); i++)
+ {
+ String sought_caption = (String) captions.get(i);
+ int index = rest_text.lastIndexOf(sought_caption);
+
+ if (index < 0)
+ {
+ return null;
+ }
+
+ FoundKey found_key = new FoundKey((String) keys.get(i), (String) captions.get(i), index);
+ found_keys.add(0, found_key);
+
+ rest_text = rest_text.substring(0, index);
+ }
+
+ return found_keys;
+ }
+
+ /**
+ * Performs a reverse (top to bottom) search for the found keys and checks
+ * that these indices are the same as those that were found during the regular
+ * (bottom up) search.
+ * <p>
+ * If a reverse check proves that the found keys are not at the same positions
+ * as during regular search, this list of found keys should be discarded.
+ * </p>
+ *
+ * @param text
+ * The text.
+ * @param found_keys
+ * The found keys to be reversely checked.
+ * @return Returns true, if all (also the non required) captions could be
+ * found at the same indices as during regular search, false
+ * otherwise.
+ */
+ public static boolean reverseCheckFoundKeys(String text, List found_keys)
+ {
+ int search_from_index = ((FoundKey) found_keys.get(0)).start_index;
+ for (int i = 0; i < found_keys.size(); i++)
+ {
+ FoundKey found_key = (FoundKey) found_keys.get(i);
+
+ int reverse_found_index = text.indexOf(found_key.caption, search_from_index);
+ if (reverse_found_index < 0)
+ {
+ throw new RuntimeException("The caption " + found_key.caption + " wasn't found in the text during reverse checking - there is something wrong.");
+ }
+
+ if (reverse_found_index != found_key.start_index)
+ {
+ logger.debug("The index for caption " + found_key.caption + " wasn't proved during reverse checking.");
+ return false;
+ }
+
+ search_from_index = found_key.start_index + found_key.caption.length();
+ }
+
+ return true;
+ }
+
+ /**
+ * Finds the end of the value in the text.
+ *
+ * <p>
+ * This simply scans for a '\n' from a given start index. The line up to and
+ * inclusive the '\n' is considered to be the value.
+ * </p>
+ * <p>
+ * Note that this method does NOT find the accurate value, if the value goes
+ * over multiple lines! This may bear a serious problem. Usually this method
+ * is only used to finding the end of the last value in a found block, because
+ * mid- values are exactly determined by their start index and the start of
+ * the next caption. Nevertheless, if the last value spans over multiple
+ * lines, this method will not retrieve it completely.
+ * </p>
+ *
+ * @param text
+ * The text.
+ * @param start_index
+ * The start index from where the end of the value is sought.
+ * @return Returns the end index of the value, which is the index of the first
+ * character not belonging to the value anymore (the character after
+ * the '\n').
+ */
+ public static int findEndOfValue(String text, int start_index)
+ {
+ int newline_index = text.indexOf('\n', start_index);
+ if (newline_index < 0)
+ {
+ return text.length();
+ }
+ return newline_index + 1;
+ }
+
+ /**
+ * Checks the integrity of a found block.
+ *
+ * <p>
+ * This is an assertive function.
+ * </p>
+ *
+ * @param text
+ * The text.
+ * @param found_block
+ * The found block.
+ */
+ public static void checkBlockIntegrity(String text, FoundBlock found_block)
+ {
+ for (int i = 0; i < found_block.found_keys.size() - 1; i++)
+ {
+ FoundKey this_key = (FoundKey) found_block.found_keys.get(i);
+ FoundKey next_key = (FoundKey) found_block.found_keys.get(i + 1);
+
+ int this_end_index = findEndOfValue(text, this_key.start_index);
+ if (this_end_index != next_key.start_index)
+ {
+ logger.warn("multi line value: " + this_key);
+ // throw new RuntimeException("The end index of found key " + this_key +
+ // " doesn't match the start index of found key " + next_key);
+ }
+ }
+
+ FoundKey last_key = (FoundKey) found_block.found_keys.get(found_block.found_keys.size() - 1);
+ if (findEndOfValue(text, last_key.start_index) != found_block.end_index)
+ {
+ throw new RuntimeException("The end index of last key " + last_key + " doesn't match the end index of the block " + found_block);
+ }
+
+ }
+
+ /**
+ * Cuts out the given found block from the text.
+ *
+ * @param text
+ * The text.
+ * @param block
+ * The found block.
+ * @return Returns the rest text without the block.
+ */
+ public static String cutOutBlock(String text, FoundBlock block)
+ {
+ int block_start_index = ((FoundKey) block.found_keys.get(0)).getStartIndex();
+ int block_end_index = block.end_index;
+
+ if (block_end_index == text.length())
+ {
+ // if the block is at the end of the text, remove the "\n" before the
+ // block as well.
+ String pre = text.substring(0, block_start_index - 1);
+ return pre;
+ }
+
+ String pre = text.substring(0, block_start_index);
+ String post = text.substring(block_end_index);
+
+ String rest_text = pre + post;
+ return rest_text;
+ }
+
+ /**
+ * Returns the value of the date field as String.
+ *
+ * @param text
+ * The text.
+ * @param block
+ * The found block.
+ * @return Returns the date value.
+ */
+ public static String getDateValue(String text, FoundBlock block)
+ {
+ FoundKey date_key = block.getDateFoundKey();
+ int date_value_start_index = date_key.start_index + date_key.caption.length();
+ int date_value_end_index = findEndOfValue(text, date_value_start_index);
+ String date_value = text.substring(date_value_start_index, date_value_end_index).trim();
+
+ return date_value;
+ }
+
+ /**
+ * Creates a SignatureObject from a found block by extracting the
+ * corresponding values.
+ *
+ * @param text
+ * The text.
+ * @param found_block
+ * The found block.
+ * @return Returns the created SignatureObject.
+ * @throws SignatureTypesException
+ * F.e.
+ * @throws SignatureException
+ * F.e.
+ */
+ public static SignatureObject createSignatureObjectFromFoundBlock(
+ String text, FoundBlock found_block) throws SignatureTypesException, SignatureException
+ {
+ SignatureObject signatureObject = new SignatureObject();
+
+ signatureObject.setSigType(found_block.std.getType());
+ signatureObject.initByType();
+
+ int end_index = found_block.end_index;
+ for (int i = found_block.found_keys.size() - 1; i >= 0; i--)
+ {
+ FoundKey cur_key = (FoundKey) found_block.found_keys.get(i);
+ int start_index = cur_key.getStartIndex() + cur_key.caption.length();
+
+ String value = text.substring(start_index, end_index);
+
+ signatureObject.setSigValueCaption(cur_key.getKey(), value, cur_key.caption);
+
+ end_index = cur_key.getStartIndex();
+ }
+
+ return signatureObject;
+
+ }
+
+ /**
+ * Parses the EGIZDate from a found block and the given text.
+ *
+ * @param text
+ * The text.
+ * @param found_block
+ * The found block.
+ * @return Returns the parsed EGIZDate.
+ */
+ public static EGIZDate getDateFromFoundBlock(String text,
+ FoundBlock found_block)
+ {
+ String date_value = getDateValue(text, found_block);
+ EGIZDate date = EGIZDate.parseFromString(date_value);
+ return date;
+ }
+
+ /**
+ * Sorts the List of found blocks by date.
+ *
+ * @param text
+ * The text.
+ * @param found_blocks
+ * The List of found blocks.
+ */
+ public static void sortFoundBlocksByDate(final String text, List found_blocks)
+ {
+ Collections.sort(found_blocks, new Comparator()
+ {
+ public int compare(Object arg0, Object arg1)
+ {
+ FoundBlock fb0 = (FoundBlock) arg0;
+ FoundBlock fb1 = (FoundBlock) arg1;
+
+ EGIZDate date0 = getDateFromFoundBlock(text, fb0);
+ EGIZDate date1 = getDateFromFoundBlock(text, fb1);
+ return date0.compareTo(date1);
+ }
+ });
+ }
+
+ /**
+ * Given a List of FoundBlock objects, this method returns the last blocks of
+ * this list that have the same date.
+ *
+ * <p>
+ * Usually a date sorted list (earliest first, latest last) will be provided
+ * to this method. Then the last date equal blocks are returned, which are the
+ * last blocks.
+ * </p>
+ *
+ * @param text
+ * The text to retrieve the values of the fields from.
+ * @param found_blocks
+ * The List of FoundBlock objects.
+ * @return Returns the List of the last date equal blocks.
+ */
+ public static List filterLastDateEqualBlocks(String text, List found_blocks)
+ {
+ List latest_blocks = new ArrayList();
+
+ latest_blocks.add(found_blocks.get(found_blocks.size() - 1));
+
+ for (int i = found_blocks.size() - 2; i >= 0; i--)
+ {
+ FoundBlock this_block = (FoundBlock) found_blocks.get(i);
+ FoundBlock succ_block = (FoundBlock) found_blocks.get(i + 1);
+
+ EGIZDate this_date = getDateFromFoundBlock(text, this_block);
+ EGIZDate succ_date = getDateFromFoundBlock(text, succ_block);
+
+ if (!this_date.equals(succ_date))
+ {
+ break;
+ }
+ latest_blocks.add(0, this_block);
+ }
+
+ return latest_blocks;
+ }
+
+}