From 6025b6016517c6d898d8957d1d7e03ba71431912 Mon Sep 17 00:00:00 2001 From: tknall Date: Fri, 1 Dec 2006 12:20:24 +0000 Subject: Initial import of release 2.2. git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@4 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../wag/egov/egiz/pdf/AbsoluteTextSignature.java | 656 +++++++++++++++++++++ 1 file changed, 656 insertions(+) create mode 100644 src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java (limited to 'src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java') diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java new file mode 100644 index 0000000..5523041 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java @@ -0,0 +1,656 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: AbsoluteTextSignature.java,v 1.1 2006/10/31 08:08:33 wprinz Exp $ + */package at.knowcenter.wag.egov.egiz.pdf; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Vector; + +import org.apache.log4j.Logger; + +import at.knowcenter.wag.egov.egiz.PdfAS; +import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger; +import at.knowcenter.wag.egov.egiz.exceptions.SignatureException; +import at.knowcenter.wag.egov.egiz.exceptions.SignatureTypesException; +import at.knowcenter.wag.egov.egiz.framework.FoundBlock; +import at.knowcenter.wag.egov.egiz.framework.FoundKey; +import at.knowcenter.wag.egov.egiz.sig.SignatureObject; +import at.knowcenter.wag.egov.egiz.sig.SignatureTypeDefinition; +import at.knowcenter.wag.egov.egiz.sig.SignatureTypes; + +/** + * Contains methods and helpers that implement the absolute text signature. + * @author wprinz + */ +public class AbsoluteTextSignature +{ + + /** + * The logger definition. + */ + private static final Logger logger = ConfigLogger.getLogger(AbsoluteTextSignature.class); + + + /** + * Extracts all signature holders from a given text. + * + *

+ * First the latest signature holder is extracted. Then the latest signature + * holder in the rest text, which is the second latest one, is extracted. Then + * the third latest signature holder is extracted and so forth until no more + * signature holders are found. + *

+ * + * @param text + * The text. + * @return Returns the List of extracted signature holders ordered by their + * date ascendingly (the lowest, earliest date first, the latest, + * newest date last). An empty list is returned if no signature + * holders were found. + * @throws SignatureException + * F.e. + * @throws SignatureTypesException + * F.e. + */ + public static List extractSignatureHoldersFromText(String text) throws SignatureException, SignatureTypesException + { + List holders = new ArrayList(); + + String current_text = text; + for (;;) + { + SignatureHolder signature_holder = extractLatestBlock(current_text); + if (signature_holder == null) + { + break; + } + + holders.add(0, signature_holder); + + current_text = signature_holder.getSignedText(); + } + + return holders; + } + + /** + * Extracts the latest signature block from the given text and creates a + * SignatureHolder object that can be verified. + * + * @param text + * The text. + * @return Returns the SignatureObject extracted from the text, or null, if no + * latest block was found. + * @throws SignatureException + * F.e. + * @throws SignatureTypesException + * F.e. + */ + public static SignatureHolder extractLatestBlock(String text) throws SignatureException, SignatureTypesException + { + FoundBlock latest_block = findLatestBlock(text); + if (latest_block == null) + { + return null; + } + + String reconstructed_text = cutOutBlock(text, latest_block); + + SignatureObject so = createSignatureObjectFromFoundBlock(text, latest_block); + TextualSignatureHolder tsh = new TextualSignatureHolder(reconstructed_text, so); + + return tsh; + } + + /** + * Finds the latest signature block for a given text. + * + *

+ * The latest block is the one with the highest, most recent date. Usually + * this block will be extracted (cut out) of the text which will result in the + * originally signed text of this signature to be verified using the cut out + * data. + *

+ * + * @param text + * The text to be analyzed. + * @return Returns the latest found block or null, if there was none. + * @throws SignatureException + * F.e. + * @throws SignatureTypesException + * F.e. + */ + public static FoundBlock findLatestBlock(String text) throws SignatureException, SignatureTypesException + { +// try +// { +// writeTextToFile(text, new File("C:\\wprinz\\text.utf8.txt")); +// } +// catch (IOException e) +// { +// e.printStackTrace(); +// } + + SignatureTypes sig_types = SignatureTypes.getInstance(); + List signatureTypes_ = sig_types.getSignatureTypeDefinitions(); + + List found_candidates = new ArrayList(); + + for (int i = 0; i < signatureTypes_.size(); i++) + { + SignatureTypeDefinition block_type = (SignatureTypeDefinition) signatureTypes_.get(i); + List found_candidates_for_type = findPotentialSignaturesForProfile(text, block_type); + + found_candidates.addAll(found_candidates_for_type); + } + + if (found_candidates.isEmpty()) + { + logger.debug("no candidates found at all"); + return null; + } + + logger.debug("checking block integrity"); + for (int i = 0; i < found_candidates.size(); i++) + { + FoundBlock found_block = (FoundBlock) found_candidates.get(i); + + String date_value = getDateValue(text, found_block); + logger.debug("date_value = " + date_value); + EGIZDate date = EGIZDate.parseFromString(date_value); + + logger.debug("found_block = " + date + " - " + found_block); + + checkBlockIntegrity(text, found_block); + } + + sortFoundBlocksByDate(text, found_candidates); + + logger.debug("sorted blocks:"); + for (int i = 0; i < found_candidates.size(); i++) + { + FoundBlock found_block = (FoundBlock) found_candidates.get(i); + + String date_value = getDateValue(text, found_block); + EGIZDate date = EGIZDate.parseFromString(date_value); + + logger.debug(" #" + i + ": " + date + " - " + found_block); + } + + List latest_blocks = filterLastDateEqualBlocks(text, found_candidates); + logger.debug("latest blocks:"); + for (int i = 0; i < latest_blocks.size(); i++) + { + FoundBlock found_block = (FoundBlock) latest_blocks.get(i); + + String date_value = getDateValue(text, found_block); + EGIZDate date = EGIZDate.parseFromString(date_value); + + logger.debug(" #" + i + ": " + date + " - " + found_block); + } + + boolean semantic_equality = PdfAS.checkForSemanticEquality(latest_blocks); + logger.debug("semantic_equality = " + semantic_equality); + if (!semantic_equality) + { + throw new SignatureException(314, "The latest blocks weren't semantically equal."); + } + + FoundBlock latest_block = (FoundBlock) latest_blocks.get(0); + logger.debug("latest block = " + latest_block); + return latest_block; + } + + /** + * Finds the List of potential blocks within the given text for the given + * profile. + * + * @param text + * The text, in which potential block are to be sought. + * @param block_type + * The profile for which the text is to be sought. + * @return Returns the List of potential FoundBlocks or an empty List if none + * could be found. + */ + public static List findPotentialSignaturesForProfile(String text, + SignatureTypeDefinition block_type) + { + logger.debug("find potential signatures for " + block_type.getType()); + + List found_blocks = new ArrayList(); + + final boolean old_style = false; + + Vector keys = block_type.getRevertSortedKeys(); + Vector captions = block_type.getRevertSortedCaptions(); + + String last_key = (String) keys.get(0); + logger.debug("last_key = " + last_key); + String last_caption = (String) captions.get(0); + logger.debug("last_caption = " + last_caption); + + List found_last_captions = findIndices(text, last_caption); + if (logger.isDebugEnabled()) + { + logger.debug("found " + found_last_captions.size() + " last captions."); + for (int i = 0; i < found_last_captions.size(); i++) + { + logger.debug(" found last caption at index " + found_last_captions.get(i)); + } + } + + for (int lci = 0; lci < found_last_captions.size(); lci++) + { + int last_caption_index = ((Integer) found_last_captions.get(lci)).intValue(); + logger.debug("resolving signature block from last caption index " + last_caption_index); + + int potential_block_end = findEndOfValue(text, last_caption_index); + logger.debug("potential_block_end = " + potential_block_end); + + List found_keys = PdfAS.findBlockInText(text.substring(0, potential_block_end), block_type, old_style); // findRestKeys(text, + // keys, + // captions, + // last_caption_index); + + if (found_keys == null) + { + logger.debug("Not all other captions could be found for the last_caption_index " + last_caption_index + " ==> discarding this index."); + + continue; + } + + // sort found keys ascendingly + PdfAS.sortFoundKeysAscendingly(found_keys); + + boolean reverse_check_ok = reverseCheckFoundKeys(text, found_keys); + if (!reverse_check_ok) + { + logger.debug("The reverse check ruled this list of found keys out ==> they are discarded."); + + continue; + } + + logger.debug("The reverse check proved this list of found keys out ==> adding them as potential candidates."); + + FoundBlock found_block = new FoundBlock(); + found_block.std = block_type; + found_block.found_keys = found_keys; + found_block.end_index = findEndOfValue(text, last_caption_index); + found_blocks.add(found_block); + } + + logger.debug("found " + found_blocks.size() + " potential signatures for " + block_type.getType()); + return found_blocks; + } + + /** + * Finds all indices of the given subtext within a given text. + * + *

+ * This is usually used to find the indices of the last captions. + *

+ * + * @param text + * The text to be searched. + * @param subtext + * The subtext to be sought. + * @return Returns the List of found indices. + */ + public static List findIndices(String text, String subtext) + { + List found_indices = new ArrayList(); + int search_from_index = 0; + for (;;) + { + int found_index = text.indexOf(subtext, search_from_index); + if (found_index < 0) + { + break; + } + found_indices.add(new Integer(found_index)); + search_from_index = found_index + subtext.length(); + } + return found_indices; + } + + /** + * Finds the other keys/captions according to their order starting from the + * last_caption index upwards. + * + * @param text + * The text. + * @param keys + * The list of keys. + * @param captions + * The list of captions. + * @param last_caption_index + * The index of the last caption. + * @return Returns the List of found keys, if all keys could be found, or null + * if not all keys could be found. + */ + public static List findRestKeys(String text, List keys, List captions, + int last_caption_index) + { + List found_keys = new ArrayList(); + + FoundKey last_caption_found_key = new FoundKey((String) keys.get(0), (String) captions.get(0), last_caption_index); + found_keys.add(last_caption_found_key); + + String rest_text = text.substring(0, last_caption_index); + + for (int i = 1; i < captions.size(); i++) + { + String sought_caption = (String) captions.get(i); + int index = rest_text.lastIndexOf(sought_caption); + + if (index < 0) + { + return null; + } + + FoundKey found_key = new FoundKey((String) keys.get(i), (String) captions.get(i), index); + found_keys.add(0, found_key); + + rest_text = rest_text.substring(0, index); + } + + return found_keys; + } + + /** + * Performs a reverse (top to bottom) search for the found keys and checks + * that these indices are the same as those that were found during the regular + * (bottom up) search. + *

+ * If a reverse check proves that the found keys are not at the same positions + * as during regular search, this list of found keys should be discarded. + *

+ * + * @param text + * The text. + * @param found_keys + * The found keys to be reversely checked. + * @return Returns true, if all (also the non required) captions could be + * found at the same indices as during regular search, false + * otherwise. + */ + public static boolean reverseCheckFoundKeys(String text, List found_keys) + { + int search_from_index = ((FoundKey) found_keys.get(0)).start_index; + for (int i = 0; i < found_keys.size(); i++) + { + FoundKey found_key = (FoundKey) found_keys.get(i); + + int reverse_found_index = text.indexOf(found_key.caption, search_from_index); + if (reverse_found_index < 0) + { + throw new RuntimeException("The caption " + found_key.caption + " wasn't found in the text during reverse checking - there is something wrong."); + } + + if (reverse_found_index != found_key.start_index) + { + logger.debug("The index for caption " + found_key.caption + " wasn't proved during reverse checking."); + return false; + } + + search_from_index = found_key.start_index + found_key.caption.length(); + } + + return true; + } + + /** + * Finds the end of the value in the text. + * + *

+ * This simply scans for a '\n' from a given start index. The line up to and + * inclusive the '\n' is considered to be the value. + *

+ *

+ * Note that this method does NOT find the accurate value, if the value goes + * over multiple lines! This may bear a serious problem. Usually this method + * is only used to finding the end of the last value in a found block, because + * mid- values are exactly determined by their start index and the start of + * the next caption. Nevertheless, if the last value spans over multiple + * lines, this method will not retrieve it completely. + *

+ * + * @param text + * The text. + * @param start_index + * The start index from where the end of the value is sought. + * @return Returns the end index of the value, which is the index of the first + * character not belonging to the value anymore (the character after + * the '\n'). + */ + public static int findEndOfValue(String text, int start_index) + { + int newline_index = text.indexOf('\n', start_index); + if (newline_index < 0) + { + return text.length(); + } + return newline_index + 1; + } + + /** + * Checks the integrity of a found block. + * + *

+ * This is an assertive function. + *

+ * + * @param text + * The text. + * @param found_block + * The found block. + */ + public static void checkBlockIntegrity(String text, FoundBlock found_block) + { + for (int i = 0; i < found_block.found_keys.size() - 1; i++) + { + FoundKey this_key = (FoundKey) found_block.found_keys.get(i); + FoundKey next_key = (FoundKey) found_block.found_keys.get(i + 1); + + int this_end_index = findEndOfValue(text, this_key.start_index); + if (this_end_index != next_key.start_index) + { + logger.warn("multi line value: " + this_key); + // throw new RuntimeException("The end index of found key " + this_key + + // " doesn't match the start index of found key " + next_key); + } + } + + FoundKey last_key = (FoundKey) found_block.found_keys.get(found_block.found_keys.size() - 1); + if (findEndOfValue(text, last_key.start_index) != found_block.end_index) + { + throw new RuntimeException("The end index of last key " + last_key + " doesn't match the end index of the block " + found_block); + } + + } + + /** + * Cuts out the given found block from the text. + * + * @param text + * The text. + * @param block + * The found block. + * @return Returns the rest text without the block. + */ + public static String cutOutBlock(String text, FoundBlock block) + { + int block_start_index = ((FoundKey) block.found_keys.get(0)).getStartIndex(); + int block_end_index = block.end_index; + + if (block_end_index == text.length()) + { + // if the block is at the end of the text, remove the "\n" before the + // block as well. + String pre = text.substring(0, block_start_index - 1); + return pre; + } + + String pre = text.substring(0, block_start_index); + String post = text.substring(block_end_index); + + String rest_text = pre + post; + return rest_text; + } + + /** + * Returns the value of the date field as String. + * + * @param text + * The text. + * @param block + * The found block. + * @return Returns the date value. + */ + public static String getDateValue(String text, FoundBlock block) + { + FoundKey date_key = block.getDateFoundKey(); + int date_value_start_index = date_key.start_index + date_key.caption.length(); + int date_value_end_index = findEndOfValue(text, date_value_start_index); + String date_value = text.substring(date_value_start_index, date_value_end_index).trim(); + + return date_value; + } + + /** + * Creates a SignatureObject from a found block by extracting the + * corresponding values. + * + * @param text + * The text. + * @param found_block + * The found block. + * @return Returns the created SignatureObject. + * @throws SignatureTypesException + * F.e. + * @throws SignatureException + * F.e. + */ + public static SignatureObject createSignatureObjectFromFoundBlock( + String text, FoundBlock found_block) throws SignatureTypesException, SignatureException + { + SignatureObject signatureObject = new SignatureObject(); + + signatureObject.setSigType(found_block.std.getType()); + signatureObject.initByType(); + + int end_index = found_block.end_index; + for (int i = found_block.found_keys.size() - 1; i >= 0; i--) + { + FoundKey cur_key = (FoundKey) found_block.found_keys.get(i); + int start_index = cur_key.getStartIndex() + cur_key.caption.length(); + + String value = text.substring(start_index, end_index); + + signatureObject.setSigValueCaption(cur_key.getKey(), value, cur_key.caption); + + end_index = cur_key.getStartIndex(); + } + + return signatureObject; + + } + + /** + * Parses the EGIZDate from a found block and the given text. + * + * @param text + * The text. + * @param found_block + * The found block. + * @return Returns the parsed EGIZDate. + */ + public static EGIZDate getDateFromFoundBlock(String text, + FoundBlock found_block) + { + String date_value = getDateValue(text, found_block); + EGIZDate date = EGIZDate.parseFromString(date_value); + return date; + } + + /** + * Sorts the List of found blocks by date. + * + * @param text + * The text. + * @param found_blocks + * The List of found blocks. + */ + public static void sortFoundBlocksByDate(final String text, List found_blocks) + { + Collections.sort(found_blocks, new Comparator() + { + public int compare(Object arg0, Object arg1) + { + FoundBlock fb0 = (FoundBlock) arg0; + FoundBlock fb1 = (FoundBlock) arg1; + + EGIZDate date0 = getDateFromFoundBlock(text, fb0); + EGIZDate date1 = getDateFromFoundBlock(text, fb1); + return date0.compareTo(date1); + } + }); + } + + /** + * Given a List of FoundBlock objects, this method returns the last blocks of + * this list that have the same date. + * + *

+ * Usually a date sorted list (earliest first, latest last) will be provided + * to this method. Then the last date equal blocks are returned, which are the + * last blocks. + *

+ * + * @param text + * The text to retrieve the values of the fields from. + * @param found_blocks + * The List of FoundBlock objects. + * @return Returns the List of the last date equal blocks. + */ + public static List filterLastDateEqualBlocks(String text, List found_blocks) + { + List latest_blocks = new ArrayList(); + + latest_blocks.add(found_blocks.get(found_blocks.size() - 1)); + + for (int i = found_blocks.size() - 2; i >= 0; i--) + { + FoundBlock this_block = (FoundBlock) found_blocks.get(i); + FoundBlock succ_block = (FoundBlock) found_blocks.get(i + 1); + + EGIZDate this_date = getDateFromFoundBlock(text, this_block); + EGIZDate succ_date = getDateFromFoundBlock(text, succ_block); + + if (!this_date.equals(succ_date)) + { + break; + } + latest_blocks.add(0, this_block); + } + + return latest_blocks; + } + +} -- cgit v1.2.3