From ddefaa2f875bb73acdb2064ae13d21877457420c Mon Sep 17 00:00:00 2001 From: knowcenter Date: Tue, 13 Mar 2007 12:46:59 +0000 Subject: fixxed Bug with rotated pdf-files git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@50 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../wag/egov/egiz/pdf/AbsoluteTextSignature.java | 63 +++++++++------------- 1 file changed, 24 insertions(+), 39 deletions(-) diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java index b9f4773..a5d1be8 100644 --- a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java +++ b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java @@ -72,7 +72,6 @@ public class AbsoluteTextSignature public static List extractSignatureHoldersFromText(String text) throws SignatureException, SignatureTypesException { List holders = new ArrayList(); - String current_text = text; for (;;) { @@ -81,12 +80,9 @@ public class AbsoluteTextSignature { break; } - holders.add(0, signature_holder); - current_text = signature_holder.getSignedText(); } - return holders; } @@ -110,12 +106,9 @@ public class AbsoluteTextSignature { return null; } - String reconstructed_text = cutOutBlock(text, latest_block); - SignatureObject so = createSignatureObjectFromFoundBlock(text, latest_block); TextualSignatureHolder tsh = new TextualSignatureHolder(reconstructed_text, so); - return tsh; } @@ -147,12 +140,12 @@ public class AbsoluteTextSignature // { // e.printStackTrace(); // } - + SignatureTypes sig_types = SignatureTypes.getInstance(); List signatureTypes_ = sig_types.getSignatureTypeDefinitions(); List found_potential_candidates = new ArrayList(); - + for (int i = 0; i < signatureTypes_.size(); i++) { SignatureTypeDefinition block_type = (SignatureTypeDefinition) signatureTypes_.get(i); @@ -172,9 +165,7 @@ public class AbsoluteTextSignature for (int i = 0; i < found_potential_candidates.size(); i++) { FoundBlock found_block = (FoundBlock) found_potential_candidates.get(i); - String date_value = getDateValue(text, found_block); - logger.debug("date_value = " + date_value); try { EGIZDate date = EGIZDate.parseFromString(date_value); @@ -182,7 +173,6 @@ public class AbsoluteTextSignature logger.debug("found_block = " + date + " - " + found_block); checkBlockIntegrity(text, found_block); - found_candidates.add(found_block); } catch (Exception e) @@ -253,7 +243,6 @@ public class AbsoluteTextSignature SignatureTypeDefinition block_type) { logger.debug("find potential signatures for " + block_type.getType()); - List found_blocks = new ArrayList(); final boolean old_style = false; @@ -265,15 +254,14 @@ public class AbsoluteTextSignature logger.debug("last_key = " + last_key); String last_caption = (String) captions.get(0); logger.debug("last_caption = " + last_caption); - + String current_last_caption= last_caption; List found_last_captions = findIndicesWithStartingNL(text, last_caption); if (last_key.equals(SignatureTypes.SIG_ID)) { logger.debug("Last key is SIG_ID, so it may not be present. Searching for the previous to last key."); String prevlast_key = (String) keys.get(1); - logger.debug("last_key = " + prevlast_key); String prevlast_caption = (String) captions.get(1); - logger.debug("prevlast_caption = " + last_caption); + current_last_caption = prevlast_caption; List found_prevlast_captions = findIndicesWithStartingNL(text, prevlast_caption); if (!found_prevlast_captions.isEmpty()) { @@ -293,10 +281,12 @@ public class AbsoluteTextSignature { int last_caption_index = ((Integer) found_last_captions.get(lci)).intValue(); logger.debug("resolving signature block from last caption index " + last_caption_index); - int potential_block_end = findEndOfValue(text, last_caption_index); + if (potential_block_end == (last_caption_index + current_last_caption.length()+1)) + { + potential_block_end = findEndOfValue(text, potential_block_end); + } logger.debug("potential_block_end = " + potential_block_end); - List found_keys = PdfAS.findBlockInText(text.substring(0, potential_block_end), block_type, old_style); // findRestKeys(text, // keys, // captions, @@ -325,7 +315,7 @@ public class AbsoluteTextSignature FoundBlock found_block = new FoundBlock(); found_block.std = block_type; found_block.found_keys = found_keys; - found_block.end_index = findEndOfValue(text, last_caption_index); + found_block.end_index = potential_block_end;//findEndOfValue(text, last_caption_index); found_blocks.add(found_block); } @@ -411,7 +401,6 @@ public class AbsoluteTextSignature { return null; } - FoundKey found_key = new FoundKey((String) keys.get(i), (String) captions.get(i), index); found_keys.add(0, found_key); @@ -450,16 +439,14 @@ public class AbsoluteTextSignature { throw new RuntimeException("The caption " + found_key.caption + " wasn't found in the text during reverse checking - there is something wrong."); } - + if (reverse_found_index != found_key.start_index) { logger.debug("The index for caption " + found_key.caption + " wasn't proved during reverse checking."); return false; } - search_from_index = found_key.start_index + found_key.caption.length(); } - return true; } @@ -489,16 +476,14 @@ public class AbsoluteTextSignature */ public static int findEndOfValue(String text, int start_index) { - // FIXME[tknall]: this method does not work properly for landscape documents because always starts with "\n". Look for errors in PdfAS.java, method findBlockInText(...) to set the start_index accordingly. - // Hint: Captions and values of landscape documents are separated with " \n" and not only with " ". int newline_index = text.indexOf('\n', start_index); if (newline_index < 0) { return text.length(); - } + } return newline_index + 1; } - + /** * Checks the integrity of a found block. * @@ -528,11 +513,15 @@ public class AbsoluteTextSignature } FoundKey last_key = (FoundKey) found_block.found_keys.get(found_block.found_keys.size() - 1); - if (findEndOfValue(text, last_key.start_index) != found_block.end_index) + int end_of_block = findEndOfValue(text, last_key.start_index); + if (end_of_block == (last_key.start_index+last_key.caption.length()+1)) { - throw new RuntimeException("The end index of last key " + last_key + " doesn't match the end index of the block " + found_block); + end_of_block = findEndOfValue(text,end_of_block); + } + if (end_of_block != found_block.end_index) + { + throw new RuntimeException("The end index of last key " + last_key + " doesn't match the end index of the block " + found_block); } - } /** @@ -578,8 +567,12 @@ public class AbsoluteTextSignature FoundKey date_key = block.getDateFoundKey(); int date_value_start_index = date_key.start_index + date_key.caption.length(); int date_value_end_index = findEndOfValue(text, date_value_start_index); + if (date_value_end_index == (date_value_start_index+1)) + { + date_value_end_index = findEndOfValue(text, date_value_end_index); + } String date_value = text.substring(date_value_start_index, date_value_end_index).trim(); - + logger.debug("DateString="+date_value); return date_value; } @@ -682,9 +675,7 @@ public class AbsoluteTextSignature public static List filterLastDateEqualBlocks(String text, List found_blocks) { List latest_blocks = new ArrayList(); - latest_blocks.add(found_blocks.get(found_blocks.size() - 1)); - for (int i = found_blocks.size() - 2; i >= 0; i--) { FoundBlock this_block = (FoundBlock) found_blocks.get(i); @@ -702,7 +693,6 @@ public class AbsoluteTextSignature return latest_blocks; } - /** * Chooses the most possible (best choice) block of the list of blocks. * @@ -745,7 +735,6 @@ public class AbsoluteTextSignature for (int i = 0; i < vertically_largest.size(); i++) { FoundBlock found_block = (FoundBlock) vertically_largest.get(i); - logger.debug(" #" + i + ": " + found_block); } } @@ -757,13 +746,10 @@ public class AbsoluteTextSignature for (int i = 0; i < horizontally_largest.size(); i++) { FoundBlock found_block = (FoundBlock) horizontally_largest.get(i); - logger.debug(" #" + i + ": " + found_block); } } - FoundBlock largest_block = (FoundBlock) horizontally_largest.get(0); - logger.debug("Chose largest block: " + largest_block); return largest_block; } @@ -804,7 +790,6 @@ public class AbsoluteTextSignature } largest_blocks.add(fb); } - return largest_blocks; } -- cgit v1.2.3