From 6025b6016517c6d898d8957d1d7e03ba71431912 Mon Sep 17 00:00:00 2001 From: tknall Date: Fri, 1 Dec 2006 12:20:24 +0000 Subject: Initial import of release 2.2. git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@4 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../wag/egov/egiz/test/ExtractTextTextual.java | 62 ++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 src/main/java/at/knowcenter/wag/egov/egiz/test/ExtractTextTextual.java (limited to 'src/main/java/at/knowcenter/wag/egov/egiz/test/ExtractTextTextual.java') diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/test/ExtractTextTextual.java b/src/main/java/at/knowcenter/wag/egov/egiz/test/ExtractTextTextual.java new file mode 100644 index 0000000..e2433b0 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/egov/egiz/test/ExtractTextTextual.java @@ -0,0 +1,62 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: ExtractTextTextual.java,v 1.1 2006/10/31 08:19:52 wprinz Exp $ + */ +package at.knowcenter.wag.egov.egiz.test; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; + +import at.knowcenter.wag.egov.egiz.PdfAS; +import at.knowcenter.wag.egov.egiz.cfg.SettingsReader; +import at.knowcenter.wag.egov.egiz.exceptions.PresentableException; + +/** + * Test. + * + * @author wprinz + */ +public class ExtractTextTextual +{ + + /** + * @param args + * @throws IOException + * @throws PresentableException + */ + public static void main(String[] args) throws IOException, PresentableException + { + SettingsReader.initializeForCommandLine(); + + File in = new File(args[0]); + FileInputStream fis = new FileInputStream(in); + byte[] pdf = new byte[(int) in.length()]; + fis.read(pdf); + fis.close(); + + String text = PdfAS.extractNormalizedTextTextual(pdf, pdf.length); + + File out = new File(args[0] + ".txt"); + FileOutputStream fos = new FileOutputStream(out); + fos.write(text.getBytes("UTF-8")); + fos.close(); + + System.out.println("finished. written to " + out.getAbsolutePath()); + } + +} -- cgit v1.2.3