From 535a04fa05f739ec16dd81666e3b0f82dfbd442d Mon Sep 17 00:00:00 2001 From: tknall Date: Wed, 9 Jan 2013 15:41:29 +0000 Subject: pdf-as-lib maven project files moved to pdf-as-lib git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/pdf-as/trunk@926 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../wag/egov/egiz/framework/FoundBlock.java | 230 +++++++++++++++++++++ 1 file changed, 230 insertions(+) create mode 100644 pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/framework/FoundBlock.java (limited to 'pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/framework/FoundBlock.java') diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/framework/FoundBlock.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/framework/FoundBlock.java new file mode 100644 index 0000000..4076129 --- /dev/null +++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/framework/FoundBlock.java @@ -0,0 +1,230 @@ +/** + * Copyright 2006 by Know-Center, Graz, Austria + * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a + * joint initiative of the Federal Chancellery Austria and Graz University of + * Technology. + * + * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by + * the European Commission - subsequent versions of the EUPL (the "Licence"); + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * http://www.osor.eu/eupl/ + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and + * limitations under the Licence. + * + * This product combines work with different licenses. See the "NOTICE" text + * file for details on the various modules and licenses. + * The "NOTICE" text file is part of the distribution. Any derivative works + * that you distribute must include a readable copy of the "NOTICE" text file. + * + * $Id: FoundBlock.java,v 1.2 2006/10/31 08:07:29 wprinz Exp $ + */ +package at.knowcenter.wag.egov.egiz.framework; + +import java.util.ArrayList; +import java.util.List; + +import at.knowcenter.wag.egov.egiz.sig.SignatureTypeDefinition; +import at.knowcenter.wag.egov.egiz.sig.SignatureTypes; + +/** + * Contains all the information about a found Block in text extracton. + * + *

+ * This is basically the ordered list of found captions. + *

+ * + * @author wprinz + */ +public class FoundBlock +{ + /** + * The ordered list of found keys. + */ + public List found_keys = null; + + /** + * The end index of the block. + */ + public int end_index = 0; + + /** + * The type of the block. + */ + public SignatureTypeDefinition std = null; + + /** + * Returns the first key of this block. + * + * @return Returns the first key of this block. + */ + public FoundKey getFirstKey() + { + return (FoundKey) this.found_keys.get(found_keys.size() - 1); + } + + /** + * Returns the last key of this block. + * + * @return Returns the last key of this block. + */ + public FoundKey getLastKey() + { + return (FoundKey) this.found_keys.get(0); + } + + /** + * Returns the size of this block. + * + *

+ * Note that this doesn't give the exact size of the block, but rather a value + * suirable for comparison. + *

+ * + * @return Returns the size of this block. + */ + public int getSize() + { + int size = getLastKey().start_index - getFirstKey().start_index; + return size; + } + + /** + * @see java.lang.Object#toString() + */ + public String toString() + { + return "FoundBlock: std=" + this.std.getType() + ", #=" + this.found_keys.size() + ", size = " + getSize(); + } + + /** + * Tells, if this block is semantically equal to the other block. + * + * Two blocks are semantically equal, if all the required fields (except SIG_ID) have the + * same captions in the same order. + * + * @param other_block + * The other block. + * @return Returns true, of this block is semantically equal to the other one, + * false otherwise. + */ + public boolean isSemanticallyEqual(FoundBlock other_block) + { + return this.std.isSemanticallyEqual(other_block.std); + +// List this_keys = filterOutNonRequiredFoundKeys(filterOutSIG_ID(this.found_keys)); +// List other_keys = filterOutNonRequiredFoundKeys(filterOutSIG_ID(other_block.found_keys)); +// +// if (this_keys.size() != other_keys.size()) +// { +// return false; +// } +// +// for (int i = 0; i < this_keys.size(); i++) +// { +// FoundKey this_found_key = (FoundKey) this_keys.get(i); +// FoundKey other_found_key = (FoundKey) other_keys.get(i); +// +// if (!this_found_key.isSemanticallyEqual(other_found_key)) +// { +// return false; +// } +// } +// return true; + } + + /** + * Filters out all non required keys from the List of found keys. + * + * @param found_keys The List of found keys. + * + * @return Returns the subset List which contains only the required keys. + */ + protected static List filterOutNonRequiredFoundKeys (List found_keys) + { + List required_found_keys = new ArrayList(found_keys.size()); + for (int i = 0; i < found_keys.size(); i++) + { + FoundKey this_found_key = (FoundKey) found_keys.get(i); + + if (!SignatureTypes.isRequiredKey(this_found_key.key)) + { + continue; + } + + required_found_keys.add(this_found_key); + } + return required_found_keys; + } + + /** + * Filters out a SIG_ID found key. + * @param found_keys The List of found keys. + * @return Returns the subset List which contains all keys but the SIG_ID. + */ + protected static List filterOutSIG_ID (List found_keys) + { + List nonsigid_found_keys = new ArrayList(found_keys.size()); + for (int i = 0; i < found_keys.size(); i++) + { + FoundKey this_found_key = (FoundKey) found_keys.get(i); + + if (this_found_key.key.equals(SignatureTypes.SIG_ID)) + { + continue; + } + + nonsigid_found_keys.add(this_found_key); + } + return nonsigid_found_keys; + } + + /** + * Tells, if this block is strictly semantically equal to the other block. + * + * Two blocks are strictly semantically equal, if they contain the same keys with the + * same captions in the same order. + * + * @param other_block + * The other block. + * @return Returns true, of this block is semantically equal to the other one, + * false otherwise. + */ + public boolean isStrictlySemanticallyEqual(FoundBlock other_block) + { + if (this.found_keys.size() != other_block.found_keys.size()) + { + return false; + } + + for (int i = 0; i < this.found_keys.size(); i++) + { + FoundKey this_found_key = (FoundKey) this.found_keys.get(i); + FoundKey other_found_key = (FoundKey) other_block.found_keys.get(i); + + if (!this_found_key.isSemanticallyEqual(other_found_key)) + { + return false; + } + } + return true; + } + + + public FoundKey getDateFoundKey () + { + for (int i = 0; i < this.found_keys.size(); i++) + { + FoundKey found_key = (FoundKey) this.found_keys.get(i); + if (found_key.key.equals(SignatureTypes.SIG_DATE)) + { + return found_key; + } + } + throw new RuntimeException("There is no SIG_DATE in the list of found_keys. This must not happen."); + } +} -- cgit v1.2.3