diff options
Diffstat (limited to 'src/main/java/at/knowcenter/wag')
-rw-r--r-- | src/main/java/at/knowcenter/wag/egov/egiz/pdf/ObjectExtractor.java | 33 |
1 files changed, 31 insertions, 2 deletions
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/ObjectExtractor.java b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/ObjectExtractor.java index ecee0dd..cd6d449 100644 --- a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/ObjectExtractor.java +++ b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/ObjectExtractor.java @@ -7,6 +7,8 @@ import java.util.List; import java.util.Map;
import org.apache.log4j.Logger;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
@@ -43,10 +45,16 @@ public class ObjectExtractor { try {
PDAnnotation anno = (PDAnnotation) it.next();
log.debug("found annotation: " +anno);
+ if (log.isTraceEnabled()) {
+ log.trace("annotation def: " + dictToString(anno.getDictionary()));
+ }
String ft = anno.getDictionary().getNameAsString("FT");
if (ft != null && ft.equals("Sig")) { // skip signature widgets
- log.debug("found signature widged, skip further extraction");
- break;
+ COSDictionary sigDict = (COSDictionary) anno.getDictionary().getDictionaryObject("V");
+ if (sigDict != null && AdobeSignatureHelper.ADOBE_SIG_FILTER.equals(sigDict.getNameAsString("Filter"))) {
+ log.debug("found PDF-AS signature widged, skip further extraction");
+ continue;
+ }
}
NonTextObjectInfo objInfo = new NonTextObjectInfo();
objInfo.setName(anno.getDictionary().getString( "NM" ));
@@ -68,6 +76,27 @@ public class ObjectExtractor { }
}
}
+
+ /**
+ * Create string representation from COSDictionary
+ * @param dict
+ * @return
+ */
+ public static String dictToString(COSDictionary dict)
+ {
+ try {
+ String retVal = "COSDictionary{";
+ for (int i = 0; i<dict.size(); i++)
+ {
+ COSName key = (COSName)dict.keyList().get(i);
+ retVal = retVal + "(" + key + ":" + dict.getItem(key) + ") ";
+ }
+ retVal = retVal + "}";
+ return retVal;
+ } catch (Exception e) {
+ return "no detail available";
+ }
+ }
/**
* Find resources (images) in pdf documents
|