diff options
author | wprinz <wprinz@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> | 2008-05-30 09:55:02 +0000 |
---|---|---|
committer | wprinz <wprinz@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> | 2008-05-30 09:55:02 +0000 |
commit | 401225a15dd17795325d94fcfd151eac131edc8f (patch) | |
tree | 6da764d71bab4d56beecdeae94f6b1aa10c643e4 /src | |
parent | 6ff68a25387d1dc03a5614c27dd5b90569d44176 (diff) | |
download | pdf-as-3-401225a15dd17795325d94fcfd151eac131edc8f.tar.gz pdf-as-3-401225a15dd17795325d94fcfd151eac131edc8f.tar.bz2 pdf-as-3-401225a15dd17795325d94fcfd151eac131edc8f.zip |
CR Unsichtbare Felder
git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@264 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
Diffstat (limited to 'src')
-rw-r--r-- | src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java | 47 |
1 files changed, 36 insertions, 11 deletions
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java index 4b9a2aa..d67e67e 100644 --- a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java +++ b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java @@ -20,6 +20,7 @@ package at.knowcenter.wag.egov.egiz.pdf; import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
+import java.util.Iterator;
import java.util.List;
import java.util.Vector;
@@ -49,6 +50,40 @@ public class AbsoluteTextSignature private static final Logger logger = ConfigLogger.getLogger(AbsoluteTextSignature.class);
/**
+ * Returns a List of SignatureTypeDefinitions that can be extracted from text.
+ *
+ * <p>
+ * These SignatureTypeDefinitions are all text extractable, which means that they define all required fields as visible.
+ * </p>
+ * <p>
+ * This method filters out all SignatureTypeDefinitions that are not text extractable.
+ * </p>
+ *
+ * @return Returns a List of SignatureTypeDefinitions that can be extracted from text.
+ * @throws SignatureTypesException F.e.
+ */
+ public static List getSignatureTypesForTextAnalysis() throws SignatureTypesException
+ {
+ SignatureTypes sig_types = SignatureTypes.getInstance();
+ List allSignatureTypes = sig_types.getSignatureTypeDefinitions();
+
+ List textSignatureTypes = new ArrayList(allSignatureTypes.size());
+ Iterator it = allSignatureTypes.iterator();
+ while (it.hasNext())
+ {
+ SignatureTypeDefinition std = (SignatureTypeDefinition) it.next();
+ if (!std.isTextExtractable())
+ {
+ logger.debug("The profile " + std.getType() + " is not text extractable and is thereby not used for text analysis.");
+ continue;
+ }
+ textSignatureTypes.add(std);
+ }
+
+ return allSignatureTypes;
+ }
+
+ /**
* Extracts all signature holders from a given text.
*
* <p>
@@ -132,18 +167,8 @@ public class AbsoluteTextSignature */
public static FoundBlock findLatestBlock(String text) throws SignatureException, SignatureTypesException
{
- // try
- // {
- // writeTextToFile(text, new File("C:\\wprinz\\text.utf8.txt"));
- // }
- // catch (IOException e)
- // {
- // e.printStackTrace();
- // }
+ List signatureTypes_ = getSignatureTypesForTextAnalysis();
- SignatureTypes sig_types = SignatureTypes.getInstance();
- List signatureTypes_ = sig_types.getSignatureTypeDefinitions();
-
List found_potential_candidates = new ArrayList();
for (int i = 0; i < signatureTypes_.size(); i++)
|