aboutsummaryrefslogtreecommitdiff
path: root/pdf-as-lib/src/main/java/demo/ExtractNonTextualObjectsDemo.java
diff options
context:
space:
mode:
Diffstat (limited to 'pdf-as-lib/src/main/java/demo/ExtractNonTextualObjectsDemo.java')
-rw-r--r--pdf-as-lib/src/main/java/demo/ExtractNonTextualObjectsDemo.java83
1 files changed, 83 insertions, 0 deletions
diff --git a/pdf-as-lib/src/main/java/demo/ExtractNonTextualObjectsDemo.java b/pdf-as-lib/src/main/java/demo/ExtractNonTextualObjectsDemo.java
new file mode 100644
index 0000000..ee35f26
--- /dev/null
+++ b/pdf-as-lib/src/main/java/demo/ExtractNonTextualObjectsDemo.java
@@ -0,0 +1,83 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ */
+package demo;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+
+import at.gv.egiz.pdfas.api.analyze.NonTextObjectInfo;
+import at.gv.egiz.pdfas.api.io.DataSource;
+import at.gv.egiz.pdfas.impl.api.commons.PdfDataSourceAdapter;
+import at.gv.egiz.pdfas.io.FileBasedDataSource;
+import at.knowcenter.wag.egov.egiz.PdfAS;
+
+public class ExtractNonTextualObjectsDemo {
+
+ /**
+ * Disable instantiation.
+ */
+ private ExtractNonTextualObjectsDemo() {
+ }
+
+ /**
+ * Starts a demo that extracts non-textual elements from a document.
+ *
+ * @param args
+ * The parameter(s).
+ */
+ public static void main(String[] args) {
+
+ if (args == null || args.length == 0) {
+ System.err.println("Please provide path of file to be analyzed.");
+ System.exit(1);
+ }
+
+ File testFile = new File(args[0]);
+
+ try {
+
+ // set source
+ DataSource dataSource = new FileBasedDataSource(testFile, "application/pdf");
+
+ List nonTextualObjects = PdfAS.extractNonTextualObjects(new PdfDataSourceAdapter(dataSource));
+ if (nonTextualObjects != null && !nonTextualObjects.isEmpty()) {
+ System.out.println(nonTextualObjects.size() + " non-textual object(s) found.");
+ Iterator noit = nonTextualObjects.iterator();
+ while (noit.hasNext()) {
+ NonTextObjectInfo info = (NonTextObjectInfo) noit.next();
+ System.out.println(" -> " + info.toString());
+ }
+ } else {
+ System.out.println("No non-textual objects found.");
+ }
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ }
+
+}