From 535a04fa05f739ec16dd81666e3b0f82dfbd442d Mon Sep 17 00:00:00 2001 From: tknall Date: Wed, 9 Jan 2013 15:41:29 +0000 Subject: pdf-as-lib maven project files moved to pdf-as-lib git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/pdf-as/trunk@926 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../wag/egov/egiz/pdf/AbsoluteTextSignature.java | 956 --------------------- 1 file changed, 956 deletions(-) delete mode 100644 src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java (limited to 'src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java') diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java deleted file mode 100644 index fd59d34..0000000 --- a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java +++ /dev/null @@ -1,956 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: AbsoluteTextSignature.java,v 1.1 2006/10/31 08:08:33 wprinz Exp $ - */ -package at.knowcenter.wag.egov.egiz.pdf; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.Iterator; -import java.util.List; -import java.util.Vector; - -import org.apache.log4j.Logger; - -import at.knowcenter.wag.egov.egiz.PdfAS; -import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger; -import at.knowcenter.wag.egov.egiz.exceptions.SignatureException; -import at.knowcenter.wag.egov.egiz.exceptions.SignatureTypesException; -import at.knowcenter.wag.egov.egiz.framework.FoundBlock; -import at.knowcenter.wag.egov.egiz.framework.FoundKey; -import at.knowcenter.wag.egov.egiz.sig.SignatureObject; -import at.knowcenter.wag.egov.egiz.sig.SignatureTypeDefinition; -import at.knowcenter.wag.egov.egiz.sig.SignatureTypes; - -/** - * Contains methods and helpers that implement the absolute text signature. - * - * @author wprinz - */ -public class AbsoluteTextSignature -{ - - /** - * The logger definition. - */ - private static final Logger logger = ConfigLogger.getLogger(AbsoluteTextSignature.class); - - /** - * Returns a List of SignatureTypeDefinitions that can be extracted from text. - * - *

- * These SignatureTypeDefinitions are all text extractable, which means that they define all required fields as visible. - *

- *

- * This method filters out all SignatureTypeDefinitions that are not text extractable. - *

- * - * @return Returns a List of SignatureTypeDefinitions that can be extracted from text. - * @throws SignatureTypesException F.e. - */ - public static List getSignatureTypesForTextAnalysis() throws SignatureTypesException - { - SignatureTypes sig_types = SignatureTypes.getInstance(); - List allSignatureTypes = sig_types.getSignatureTypeDefinitions(); - - List textSignatureTypes = new ArrayList(allSignatureTypes.size()); - Iterator it = allSignatureTypes.iterator(); - while (it.hasNext()) - { - SignatureTypeDefinition std = (SignatureTypeDefinition) it.next(); - if (!std.isTextExtractable()) - { - logger.debug("The profile " + std.getType() + " is not text extractable and is thereby not used for text analysis."); - continue; - } - textSignatureTypes.add(std); - } - - return textSignatureTypes; - } - - /** - * Extracts all signature holders from a given text. - * - *

- * First the latest signature holder is extracted. Then the latest signature - * holder in the rest text, which is the second latest one, is extracted. Then - * the third latest signature holder is extracted and so forth until no more - * signature holders are found. - *

- * - * @param text - * The text. - * @return Returns the List of extracted signature holders ordered by their - * date ascendingly (the lowest, earliest date first, the latest, - * newest date last). An empty list is returned if no signature - * holders were found. - * @throws SignatureException - * F.e. - * @throws SignatureTypesException - * F.e. - */ - public static List extractSignatureHoldersFromText(String text) throws SignatureException, SignatureTypesException - { - List holders = new ArrayList(); - String current_text = text; - for (;;) - { - TextualSignatureHolder signature_holder = extractLatestBlock(current_text); - if (signature_holder == null) - { - break; - } - holders.add(0, signature_holder); - current_text = signature_holder.getSignedText(); - } - return holders; - } - - /** - * Extracts the latest signature block from the given text and creates a - * SignatureHolder object that can be verified. - * - * @param text - * The text. - * @return Returns the SignatureObject extracted from the text, or null, if no - * latest block was found. - * @throws SignatureException - * F.e. - * @throws SignatureTypesException - * F.e. - */ - public static TextualSignatureHolder extractLatestBlock(String text) throws SignatureException, SignatureTypesException - { - FoundBlock latest_block = findLatestBlock(text); - if (latest_block == null) - { - return null; - } - String reconstructed_text = cutOutBlock(text, latest_block); - SignatureObject so = createSignatureObjectFromFoundBlock(text, latest_block); - TextualSignatureHolder tsh = new TextualSignatureHolder(reconstructed_text, so); - return tsh; - } - - /** - * Finds the latest signature block for a given text. - * - *

- * The latest block is the one with the highest, most recent date. Usually - * this block will be extracted (cut out) of the text which will result in the - * originally signed text of this signature to be verified using the cut out - * data. - *

- * - * @param text - * The text to be analyzed. - * @return Returns the latest found block or null, if there was none. - * @throws SignatureException - * F.e. - * @throws SignatureTypesException - * F.e. - */ - public static FoundBlock findLatestBlock(String text) throws SignatureException, SignatureTypesException - { - List signatureTypes_ = getSignatureTypesForTextAnalysis(); - - List found_potential_candidates = new ArrayList(); - - for (int i = 0; i < signatureTypes_.size(); i++) - { - SignatureTypeDefinition block_type = (SignatureTypeDefinition) signatureTypes_.get(i); - List found_candidates_for_type = findPotentialSignaturesForProfile(text, block_type); - - found_potential_candidates.addAll(found_candidates_for_type); - } - - if (found_potential_candidates.isEmpty()) - { - logger.debug("no candidates found at all"); - return null; - } - - List found_candidates = new ArrayList(); - logger.debug("checking block integrity"); - for (int i = 0; i < found_potential_candidates.size(); i++) - { - FoundBlock found_block = (FoundBlock) found_potential_candidates.get(i); - String date_value = getDateValue(text, found_block); - try - { - EGIZDate date = EGIZDate.parseFromString(date_value); - - logger.debug("found_block = " + date + " - " + found_block); - - checkBlockIntegrity(text, found_block); - found_candidates.add(found_block); - } - catch (Exception e) - { - logger.debug("Exception while checking the integrity of the found block " + found_block + ". Ignoring this block.", e); - } - } - - sortFoundBlocksByDate(text, found_candidates); - if (logger.isDebugEnabled()) - { - logger.debug("sorted blocks:"); - for (int i = 0; i < found_candidates.size(); i++) - { - FoundBlock found_block = (FoundBlock) found_candidates.get(i); - - String date_value = getDateValue(text, found_block); - EGIZDate date = EGIZDate.parseFromString(date_value); - - logger.debug(" #" + i + ": " + date + " - " + found_block); - } - } - - List latest_blocks = filterLastDateEqualBlocks(text, found_candidates); - if (logger.isDebugEnabled()) - { - logger.debug("latest blocks:"); - for (int i = 0; i < latest_blocks.size(); i++) - { - FoundBlock found_block = (FoundBlock) latest_blocks.get(i); - - String date_value = getDateValue(text, found_block); - EGIZDate date = EGIZDate.parseFromString(date_value); - - logger.debug(" #" + i + ": " + date + " - " + found_block); - } - } - - // The semantic equality check has been outdated by the - // advanced choosing algorithm. - // boolean semantic_equality = - // PdfAS.checkForSemanticEquality(latest_blocks); - // logger.debug("semantic_equality = " + semantic_equality); - // if (!semantic_equality) - // { - // throw new SignatureException(314, "The latest blocks weren't semantically - // equal."); - // } - - FoundBlock latest_block = chooseMostPossibleBlock(latest_blocks); - - logger.debug("latest block = " + latest_block); - return latest_block; - } - - /** - * Finds the List of potential blocks within the given text for the given - * profile. - * - * @param text - * The text, in which potential block are to be sought. - * @param block_type - * The profile for which the text is to be sought. - * @return Returns the List of potential FoundBlocks or an empty List if none - * could be found. - */ - public static List findPotentialSignaturesForProfile(String text, - SignatureTypeDefinition block_type) - { - logger.debug("find potential signatures for " + block_type.getType()); - List found_blocks = new ArrayList(); - - final boolean old_style = false; - - Vector keys = block_type.getRevertSortedKeys(); - Vector captions = block_type.getRevertSortedCaptions(); - - String last_key = (String) keys.get(0); - logger.debug("last_key = " + last_key); - String last_caption = (String) captions.get(0); - logger.debug("last_caption = " + last_caption); - String current_last_caption= last_caption; - List found_last_captions = findIndicesWithStartingNL(text, last_caption); - if (last_key.equals(SignatureTypes.SIG_ID)) - { - logger.debug("Last key is SIG_ID, so it may not be present. Searching for the previous to last key."); - String prevlast_key = (String) keys.get(1); - String prevlast_caption = (String) captions.get(1); - current_last_caption = prevlast_caption; - List found_prevlast_captions = findIndicesWithStartingNL(text, prevlast_caption); - if (!found_prevlast_captions.isEmpty()) - { - found_last_captions.addAll(found_prevlast_captions); - } - } - if (logger.isDebugEnabled()) - { - logger.debug("found " + found_last_captions.size() + " last captions."); - for (int i = 0; i < found_last_captions.size(); i++) - { - logger.debug(" found last caption at index " + found_last_captions.get(i)); - } - } - - for (int lci = 0; lci < found_last_captions.size(); lci++) - { - int last_caption_index = ((Integer) found_last_captions.get(lci)).intValue(); - logger.debug("resolving signature block from last caption index " + last_caption_index); - int potential_block_end = findEndOfValue(text, last_caption_index); - if (potential_block_end == (last_caption_index + current_last_caption.length()+1)) - { - potential_block_end = findEndOfValue(text, potential_block_end); - } - - // FIXME: complete HOTFIX - /* - int extendedValueEnd = potential_block_end; - String cv; - do { - extendedValueEnd = findEndOfValue(text, extendedValueEnd); - cv = text.substring(last_caption_index + current_last_caption.length()+1, extendedValueEnd); - } while (extendedValueEnd < text.length()); - */ - - logger.debug("potential_block_end = " + potential_block_end); - List found_keys = PdfAS.findBlockInText(text.substring(0, potential_block_end), block_type, old_style); // findRestKeys(text, - // keys, - // captions, - // last_caption_index); - - if (found_keys == null) - { - logger.debug("Not all other captions could be found for the last_caption_index " + last_caption_index + " ==> discarding this index."); - - continue; - } - - // sort found keys ascendingly - PdfAS.sortFoundKeysAscendingly(found_keys); - - boolean reverse_check_ok = reverseCheckFoundKeys(text, found_keys); - if (!reverse_check_ok) - { - logger.debug("The reverse check ruled this list of found keys out ==> they are discarded."); - - continue; - } - - logger.debug("The reverse check proved this list of found keys out ==> adding them as potential candidates."); - - FoundBlock found_block = new FoundBlock(); - found_block.std = block_type; - found_block.found_keys = found_keys; - found_block.end_index = potential_block_end;//findEndOfValue(text, last_caption_index); - found_blocks.add(found_block); - } - - logger.debug("found " + found_blocks.size() + " potential signatures for " + block_type.getType()); - return found_blocks; - } - - /** - * Finds all indices of the given subtext (starting at a new line) within a - * given text. - * - *

- * This is usually used to find the indices of the last captions. - *

- * - * @param text - * The text to be searched. - * @param subtext - * The subtext to be sought. - * @return Returns the List of found indices. - */ - public static List findIndicesWithStartingNL(String text, String subtext) - { - List found_indices = new ArrayList(); - - // // for some reason "^" + subtext doesn't work as a pattern - // String pattern = "\n" + subtext; - // Pattern p = Pattern.compile(pattern); - // Matcher m = p.matcher(text); - // - // while (m.find()) - // { - // int found_index = m.start() + 1; // +1 removes the newline - // found_indices.add(new Integer(found_index)); - // } - - int search_from_index = 0; - for (;;) - { - int found_index = text.indexOf("\n" + subtext, search_from_index); - if (found_index < 0) - { - break; - } - found_index += 1; // The +1 compensates the "\n" - found_indices.add(new Integer(found_index)); - search_from_index = found_index + subtext.length(); - } - return found_indices; - } - - /** - * Finds the other keys/captions according to their order starting from the - * last_caption index upwards. - * - * @param text - * The text. - * @param keys - * The list of keys. - * @param captions - * The list of captions. - * @param last_caption_index - * The index of the last caption. - * @return Returns the List of found keys, if all keys could be found, or null - * if not all keys could be found. - */ - public static List findRestKeys(String text, List keys, List captions, - int last_caption_index) - { - List found_keys = new ArrayList(); - - FoundKey last_caption_found_key = new FoundKey((String) keys.get(0), (String) captions.get(0), last_caption_index); - found_keys.add(last_caption_found_key); - - String rest_text = text.substring(0, last_caption_index); - - for (int i = 1; i < captions.size(); i++) - { - String sought_caption = (String) captions.get(i); - int index = rest_text.lastIndexOf(sought_caption); - - if (index < 0) - { - return null; - } - FoundKey found_key = new FoundKey((String) keys.get(i), (String) captions.get(i), index); - found_keys.add(0, found_key); - - rest_text = rest_text.substring(0, index); - } - - return found_keys; - } - - /** - * Performs a reverse (top to bottom) search for the found keys and checks - * that these indices are the same as those that were found during the regular - * (bottom up) search. - *

- * If a reverse check proves that the found keys are not at the same positions - * as during regular search, this list of found keys should be discarded. - *

- * - * @param text - * The text. - * @param found_keys - * The found keys to be reversely checked. - * @return Returns true, if all (also the non required) captions could be - * found at the same indices as during regular search, false - * otherwise. - */ - public static boolean reverseCheckFoundKeys(String text, List found_keys) - { - int search_from_index = ((FoundKey) found_keys.get(0)).start_index; - for (int i = 0; i < found_keys.size(); i++) - { - FoundKey found_key = (FoundKey) found_keys.get(i); - - int reverse_found_index = text.indexOf(found_key.caption, search_from_index); - - // dferbas fix #331 ?? - - if (reverse_found_index < 0) - { - throw new RuntimeException("The caption " + found_key.caption + " wasn't found in the text during reverse checking - there is something wrong."); - } - - if (reverse_found_index != found_key.start_index) - { - logger.debug("The index for caption " + found_key.caption + " wasn't proved during reverse checking."); - return false; - } - search_from_index = found_key.start_index + found_key.caption.length(); - } - return true; - } - - /** - * Finds the end of the value in the text. - * - *

- * This simply scans for a '\n' from a given start index. The line up to and - * inclusive the '\n' is considered to be the value. - *

- *

- * Note that this method does NOT find the accurate value, if the value goes - * over multiple lines! This may bear a serious problem. Usually this method - * is only used to finding the end of the last value in a found block, because - * mid- values are exactly determined by their start index and the start of - * the next caption. Nevertheless, if the last value spans over multiple - * lines, this method will not retrieve it completely. - *

- * - * @param text - * The text. - * @param start_index - * The start index from where the end of the value is sought. - * @return Returns the end index of the value, which is the index of the first - * character not belonging to the value anymore (the character after - * the '\n'). - */ - public static int findEndOfValue(String text, int start_index) - { - int newline_index = text.indexOf('\n', start_index); - if (newline_index < 0) - { - return text.length(); - } - return newline_index + 1; - } - - /** - * Checks the integrity of a found block. - * - *

- * This is an assertive function. - *

- * - * @param text - * The text. - * @param found_block - * The found block. - */ - public static void checkBlockIntegrity(String text, FoundBlock found_block) - { - for (int i = 0; i < found_block.found_keys.size() - 1; i++) - { - FoundKey this_key = (FoundKey) found_block.found_keys.get(i); - FoundKey next_key = (FoundKey) found_block.found_keys.get(i + 1); - - int this_end_index = findEndOfValue(text, this_key.start_index); - if (this_end_index != next_key.start_index) - { - logger.debug("multi line value: " + this_key); - // throw new RuntimeException("The end index of found key " + this_key + - // " doesn't match the start index of found key " + next_key); - } - } - - FoundKey last_key = (FoundKey) found_block.found_keys.get(found_block.found_keys.size() - 1); - int end_of_block = findEndOfValue(text, last_key.start_index); - if (end_of_block == (last_key.start_index+last_key.caption.length()+1)) - { - end_of_block = findEndOfValue(text,end_of_block); - } - if (end_of_block != found_block.end_index) - { - throw new RuntimeException("The end index of last key " + last_key + " doesn't match the end index of the block " + found_block); - } - } - - /** - * Cuts out the given found block from the text. - * - * @param text - * The text. - * @param block - * The found block. - * @return Returns the rest text without the block. - */ - public static String cutOutBlock(String text, FoundBlock block) - { - int block_start_index = ((FoundKey) block.found_keys.get(0)).getStartIndex(); - int block_end_index = block.end_index; - - if (block_start_index == 0 && block_end_index == text.length()) - { - // the block is the whole text - the rest text is empty. - // This may happen if a (no-text) empty document contains a binary signature. - // Then the "signed text" of the binary signature is empty. - return ""; - } - - if (block_end_index == text.length()) - { - // if the block is at the end of the text, remove the "\n" before the - // block as well. - String pre = text.substring(0, block_start_index - 1); - return pre; - } - - String pre = text.substring(0, block_start_index); - String post = text.substring(block_end_index); - - String rest_text = pre + post; - return rest_text; - } - - /** - * Returns the value of the date field as String. - * - * @param text - * The text. - * @param block - * The found block. - * @return Returns the date value. - */ - public static String getDateValue(String text, FoundBlock block) - { - FoundKey date_key = block.getDateFoundKey(); - int date_value_start_index = date_key.start_index + date_key.caption.length(); - int date_value_end_index = findEndOfValue(text, date_value_start_index); - if (date_value_end_index == (date_value_start_index+1)) - { - date_value_end_index = findEndOfValue(text, date_value_end_index); - } - String date_value = text.substring(date_value_start_index, date_value_end_index).trim(); - logger.debug("DateString="+date_value); - return date_value; - } - - /** - * Creates a SignatureObject from a found block by extracting the - * corresponding values. - * - * @param text - * The text. - * @param found_block - * The found block. - * @return Returns the created SignatureObject. - * @throws SignatureTypesException - * F.e. - * @throws SignatureException - * F.e. - */ - public static SignatureObject createSignatureObjectFromFoundBlock( - String text, FoundBlock found_block) throws SignatureTypesException, SignatureException - { - SignatureObject signatureObject = new SignatureObject(); - - signatureObject.setSigType(found_block.std.getType()); - signatureObject.initByType(); - - int end_index = found_block.end_index; - for (int i = found_block.found_keys.size() - 1; i >= 0; i--) - { - FoundKey cur_key = (FoundKey) found_block.found_keys.get(i); - int start_index = cur_key.getStartIndex() + cur_key.caption.length(); - - String value = text.substring(start_index, end_index); - - signatureObject.setSigValueCaption(cur_key.getKey(), value, cur_key.caption); - - end_index = cur_key.getStartIndex(); - } - - return signatureObject; - - } - - /** - * Parses the EGIZDate from a found block and the given text. - * - * @param text - * The text. - * @param found_block - * The found block. - * @return Returns the parsed EGIZDate. - */ - public static EGIZDate getDateFromFoundBlock(String text, - FoundBlock found_block) - { - String date_value = getDateValue(text, found_block); - EGIZDate date = EGIZDate.parseFromString(date_value); - return date; - } - - /** - * Sorts the List of found blocks by date. - * - * @param text - * The text. - * @param found_blocks - * The List of found blocks. - */ - public static void sortFoundBlocksByDate(final String text, List found_blocks) - { - Collections.sort(found_blocks, new Comparator() - { - public int compare(Object arg0, Object arg1) - { - FoundBlock fb0 = (FoundBlock) arg0; - FoundBlock fb1 = (FoundBlock) arg1; - - EGIZDate date0 = getDateFromFoundBlock(text, fb0); - EGIZDate date1 = getDateFromFoundBlock(text, fb1); - return date0.compareTo(date1); - } - }); - } - - /** - * Given a List of FoundBlock objects, this method returns the last blocks of - * this list that have the same date. - * - *

- * Usually a date sorted list (earliest first, latest last) will be provided - * to this method. Then the last date equal blocks are returned, which are the - * last blocks. - *

- * - * @param text - * The text to retrieve the values of the fields from. - * @param found_blocks - * The List of FoundBlock objects. - * @return Returns the List of the last date equal blocks. - */ - public static List filterLastDateEqualBlocks(String text, List found_blocks) - { - List latest_blocks = new ArrayList(); - latest_blocks.add(found_blocks.get(found_blocks.size() - 1)); - for (int i = found_blocks.size() - 2; i >= 0; i--) - { - FoundBlock this_block = (FoundBlock) found_blocks.get(i); - FoundBlock succ_block = (FoundBlock) found_blocks.get(i + 1); - - EGIZDate this_date = getDateFromFoundBlock(text, this_block); - EGIZDate succ_date = getDateFromFoundBlock(text, succ_block); - - if (!this_date.equals(succ_date)) - { - break; - } - latest_blocks.add(0, this_block); - } - - return latest_blocks; - } - /** - * Chooses the most possible (best choice) block of the list of blocks. - * - *

- * The strategy to find the most possible block is to choose the very one - * block with the maximum number of captions. This block has extracted most - * information from the text. - *

- *

- * If there are still multiple blocks with the same number of cations, the - * blocks are compared caption-wise. The block with all captions being longer - * or equal to all other blocks' captions wins. - *

- * - * @param found_blocks - * The List of semantically equal blocks. - * @return Returns the best choice FoundBlock. - * @throws SignatureException - */ - public static FoundBlock chooseMostPossibleBlock(List found_blocks) throws SignatureException - { - // int largest_block_index = 0; - // FoundBlock largest_block = (FoundBlock) found_blocks.get(0); - // - // for (int i = 1; i < found_blocks.size(); i++) - // { - // FoundBlock current_block = (FoundBlock) found_blocks.get(i); - // - // if (current_block.found_keys.size() > largest_block.found_keys.size()) - // { - // largest_block = current_block; - // largest_block_index = i; - // } - // } - - List vertically_largest = filterVerticallyLargestBlocks(found_blocks); - if (logger.isDebugEnabled()) - { - logger.debug("vertically largest blocks:"); - for (int i = 0; i < vertically_largest.size(); i++) - { - FoundBlock found_block = (FoundBlock) vertically_largest.get(i); - logger.debug(" #" + i + ": " + found_block); - } - } - - List horizontally_largest = filterHorizontallyLargestBlocks(vertically_largest); - if (logger.isDebugEnabled()) - { - logger.debug("horizontally largest blocks:"); - for (int i = 0; i < horizontally_largest.size(); i++) - { - FoundBlock found_block = (FoundBlock) horizontally_largest.get(i); - logger.debug(" #" + i + ": " + found_block); - } - } - FoundBlock largest_block = (FoundBlock) horizontally_largest.get(0); - logger.debug("Chose largest block: " + largest_block); - return largest_block; - } - - /** - * Filters out all blocks but the vertically largest ones. - * - *

- * A vertically largest block has the most found keys. - *

- * - * @param found_blocks - * The List of FoundBlock objects to be filtered. - * @return Returns the List of the vertically largest FoundBlock objects. - */ - public static List filterVerticallyLargestBlocks(List found_blocks) - { - // determine the size of the largest block(s) - int largest_size = Integer.MIN_VALUE; - for (int i = 0; i < found_blocks.size(); i++) - { - FoundBlock fb = (FoundBlock) found_blocks.get(i); - final int current_size = fb.found_keys.size(); - if (current_size > largest_size) - { - largest_size = current_size; - } - } - - // keep all blocks that have the largest_size - List largest_blocks = new ArrayList(); - for (int i = 0; i < found_blocks.size(); i++) - { - FoundBlock fb = (FoundBlock) found_blocks.get(i); - if (fb.found_keys.size() < largest_size) - { - continue; - } - largest_blocks.add(fb); - } - return largest_blocks; - } - - /** - * Filters out all blocks but the horizonally largest ones. - * - *

- * A vertically largest block has the most found keys. - *

- * - * @param found_blocks - * The List of FoundBlock objects to be filtered. All of these - * FoundBlock objects must have the same number of found keys. - * @return Returns the List of the horizontally largest FoundBlock objects. - * @throws SignatureException - */ - public static List filterHorizontallyLargestBlocks(List found_blocks) throws SignatureException - { - List horizontally_largest = new ArrayList(); - FoundBlock largest_block = (FoundBlock) found_blocks.get(0); - horizontally_largest.add(largest_block); - - for (int i = 1; i < found_blocks.size(); i++) - { - FoundBlock fb = (FoundBlock) found_blocks.get(i); - - if (isHorizontallyEqual(fb, largest_block)) - { - horizontally_largest.add(fb); - continue; - } - - if (isHorizontallyLarger(fb, largest_block)) - { - horizontally_largest = new ArrayList(); - largest_block = fb; - horizontally_largest.add(largest_block); - } - else - { - if (!isHorizontallyLarger(largest_block, fb)) - { - // The block is neither equal nor larger nor lower. - // We cannot exactly determine which one to use. - throw new SignatureException(315, "The blocks are neither larger nor lower nor equal. Cannot decide which one to pick. fb = " + fb + ", largest_block = " + largest_block); - } - } - - } - - return horizontally_largest; - } - - protected static boolean isHorizontallyEqual(FoundBlock fb0, FoundBlock fb1) - { - final int num_keys = fb0.found_keys.size(); - if (num_keys != fb1.found_keys.size()) - { - throw new IllegalArgumentException("Cannot compare FoundBlock keys: fb0 doesn't have the same number of keys as fb1. " + fb0.found_keys.size() + " vs. " + fb1.found_keys.size()); - } - - for (int i = 0; i < num_keys; i++) - { - FoundKey fk0 = (FoundKey) fb0.found_keys.get(i); - FoundKey fk1 = (FoundKey) fb1.found_keys.get(i); - - if (fk0.caption.length() != fk1.caption.length()) - { - return false; - } - } - - return true; - } - - protected static boolean isHorizontallyLarger(FoundBlock fb0, FoundBlock fb1) - { - final int num_keys = fb0.found_keys.size(); - if (num_keys != fb1.found_keys.size()) - { - throw new IllegalArgumentException("Cannot compare FoundBlock keys: fb0 doesn't have the same number of keys as fb1. " + fb0.found_keys.size() + " vs. " + fb1.found_keys.size()); - } - - boolean larger = false; - - for (int i = 0; i < num_keys; i++) - { - FoundKey fk0 = (FoundKey) fb0.found_keys.get(i); - FoundKey fk1 = (FoundKey) fb1.found_keys.get(i); - - if (fk0.caption.length() == fk1.caption.length()) - { - continue; - } - - if (fk0.caption.length() > fk1.caption.length()) - { - larger = true; - continue; - } - - // if (fk0.caption.length() < fk1.caption.length()) - return false; - } - - return larger; - } - -} -- cgit v1.2.3