aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/at/knowcenter/wag/egov
diff options
context:
space:
mode:
authorwprinz <wprinz@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2008-05-30 09:55:02 +0000
committerwprinz <wprinz@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2008-05-30 09:55:02 +0000
commit401225a15dd17795325d94fcfd151eac131edc8f (patch)
tree6da764d71bab4d56beecdeae94f6b1aa10c643e4 /src/main/java/at/knowcenter/wag/egov
parent6ff68a25387d1dc03a5614c27dd5b90569d44176 (diff)
downloadpdf-as-3-401225a15dd17795325d94fcfd151eac131edc8f.tar.gz
pdf-as-3-401225a15dd17795325d94fcfd151eac131edc8f.tar.bz2
pdf-as-3-401225a15dd17795325d94fcfd151eac131edc8f.zip
CR Unsichtbare Felder
git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@264 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
Diffstat (limited to 'src/main/java/at/knowcenter/wag/egov')
-rw-r--r--src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java47
1 files changed, 36 insertions, 11 deletions
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java
index 4b9a2aa..d67e67e 100644
--- a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java
+++ b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/AbsoluteTextSignature.java
@@ -20,6 +20,7 @@ package at.knowcenter.wag.egov.egiz.pdf;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
+import java.util.Iterator;
import java.util.List;
import java.util.Vector;
@@ -49,6 +50,40 @@ public class AbsoluteTextSignature
private static final Logger logger = ConfigLogger.getLogger(AbsoluteTextSignature.class);
/**
+ * Returns a List of SignatureTypeDefinitions that can be extracted from text.
+ *
+ * <p>
+ * These SignatureTypeDefinitions are all text extractable, which means that they define all required fields as visible.
+ * </p>
+ * <p>
+ * This method filters out all SignatureTypeDefinitions that are not text extractable.
+ * </p>
+ *
+ * @return Returns a List of SignatureTypeDefinitions that can be extracted from text.
+ * @throws SignatureTypesException F.e.
+ */
+ public static List getSignatureTypesForTextAnalysis() throws SignatureTypesException
+ {
+ SignatureTypes sig_types = SignatureTypes.getInstance();
+ List allSignatureTypes = sig_types.getSignatureTypeDefinitions();
+
+ List textSignatureTypes = new ArrayList(allSignatureTypes.size());
+ Iterator it = allSignatureTypes.iterator();
+ while (it.hasNext())
+ {
+ SignatureTypeDefinition std = (SignatureTypeDefinition) it.next();
+ if (!std.isTextExtractable())
+ {
+ logger.debug("The profile " + std.getType() + " is not text extractable and is thereby not used for text analysis.");
+ continue;
+ }
+ textSignatureTypes.add(std);
+ }
+
+ return allSignatureTypes;
+ }
+
+ /**
* Extracts all signature holders from a given text.
*
* <p>
@@ -132,18 +167,8 @@ public class AbsoluteTextSignature
*/
public static FoundBlock findLatestBlock(String text) throws SignatureException, SignatureTypesException
{
- // try
- // {
- // writeTextToFile(text, new File("C:\\wprinz\\text.utf8.txt"));
- // }
- // catch (IOException e)
- // {
- // e.printStackTrace();
- // }
+ List signatureTypes_ = getSignatureTypesForTextAnalysis();
- SignatureTypes sig_types = SignatureTypes.getInstance();
- List signatureTypes_ = sig_types.getSignatureTypeDefinitions();
-
List found_potential_candidates = new ArrayList();
for (int i = 0; i < signatureTypes_.size(); i++)