From 535a04fa05f739ec16dd81666e3b0f82dfbd442d Mon Sep 17 00:00:00 2001 From: tknall Date: Wed, 9 Jan 2013 15:41:29 +0000 Subject: pdf-as-lib maven project files moved to pdf-as-lib git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/pdf-as/trunk@926 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../java/demo/ExtractNonTextualObjectsDemo.java | 83 ++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 pdf-as-lib/src/main/java/demo/ExtractNonTextualObjectsDemo.java (limited to 'pdf-as-lib/src/main/java/demo/ExtractNonTextualObjectsDemo.java') diff --git a/pdf-as-lib/src/main/java/demo/ExtractNonTextualObjectsDemo.java b/pdf-as-lib/src/main/java/demo/ExtractNonTextualObjectsDemo.java new file mode 100644 index 0000000..ee35f26 --- /dev/null +++ b/pdf-as-lib/src/main/java/demo/ExtractNonTextualObjectsDemo.java @@ -0,0 +1,83 @@ +/** + * Copyright 2006 by Know-Center, Graz, Austria + * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a + * joint initiative of the Federal Chancellery Austria and Graz University of + * Technology. + * + * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by + * the European Commission - subsequent versions of the EUPL (the "Licence"); + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * http://www.osor.eu/eupl/ + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and + * limitations under the Licence. + * + * This product combines work with different licenses. See the "NOTICE" text + * file for details on the various modules and licenses. + * The "NOTICE" text file is part of the distribution. Any derivative works + * that you distribute must include a readable copy of the "NOTICE" text file. + */ +package demo; + +import java.io.File; +import java.io.IOException; +import java.util.Iterator; +import java.util.List; + +import at.gv.egiz.pdfas.api.analyze.NonTextObjectInfo; +import at.gv.egiz.pdfas.api.io.DataSource; +import at.gv.egiz.pdfas.impl.api.commons.PdfDataSourceAdapter; +import at.gv.egiz.pdfas.io.FileBasedDataSource; +import at.knowcenter.wag.egov.egiz.PdfAS; + +public class ExtractNonTextualObjectsDemo { + + /** + * Disable instantiation. + */ + private ExtractNonTextualObjectsDemo() { + } + + /** + * Starts a demo that extracts non-textual elements from a document. + * + * @param args + * The parameter(s). + */ + public static void main(String[] args) { + + if (args == null || args.length == 0) { + System.err.println("Please provide path of file to be analyzed."); + System.exit(1); + } + + File testFile = new File(args[0]); + + try { + + // set source + DataSource dataSource = new FileBasedDataSource(testFile, "application/pdf"); + + List nonTextualObjects = PdfAS.extractNonTextualObjects(new PdfDataSourceAdapter(dataSource)); + if (nonTextualObjects != null && !nonTextualObjects.isEmpty()) { + System.out.println(nonTextualObjects.size() + " non-textual object(s) found."); + Iterator noit = nonTextualObjects.iterator(); + while (noit.hasNext()) { + NonTextObjectInfo info = (NonTextObjectInfo) noit.next(); + System.out.println(" -> " + info.toString()); + } + } else { + System.out.println("No non-textual objects found."); + } + + } catch (IOException e) { + e.printStackTrace(); + } + + } + +} -- cgit v1.2.3