aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/at/knowcenter/wag/egov/egiz/test/ExtractTextTextual.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/at/knowcenter/wag/egov/egiz/test/ExtractTextTextual.java')
-rw-r--r--src/main/java/at/knowcenter/wag/egov/egiz/test/ExtractTextTextual.java62
1 files changed, 62 insertions, 0 deletions
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/test/ExtractTextTextual.java b/src/main/java/at/knowcenter/wag/egov/egiz/test/ExtractTextTextual.java
new file mode 100644
index 0000000..e2433b0
--- /dev/null
+++ b/src/main/java/at/knowcenter/wag/egov/egiz/test/ExtractTextTextual.java
@@ -0,0 +1,62 @@
+/**
+ * <copyright> Copyright (c) 2006 by Know-Center, Graz, Austria </copyright>
+ *
+ * This software is the confidential and proprietary information of Know-Center,
+ * Graz, Austria. You shall not disclose such Confidential Information and shall
+ * use it only in accordance with the terms of the license agreement you entered
+ * into with Know-Center.
+ *
+ * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
+ * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
+ * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY
+ * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS
+ * DERIVATIVES.
+ *
+ * $Id: ExtractTextTextual.java,v 1.1 2006/10/31 08:19:52 wprinz Exp $
+ */
+package at.knowcenter.wag.egov.egiz.test;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import at.knowcenter.wag.egov.egiz.PdfAS;
+import at.knowcenter.wag.egov.egiz.cfg.SettingsReader;
+import at.knowcenter.wag.egov.egiz.exceptions.PresentableException;
+
+/**
+ * Test.
+ *
+ * @author wprinz
+ */
+public class ExtractTextTextual
+{
+
+ /**
+ * @param args
+ * @throws IOException
+ * @throws PresentableException
+ */
+ public static void main(String[] args) throws IOException, PresentableException
+ {
+ SettingsReader.initializeForCommandLine();
+
+ File in = new File(args[0]);
+ FileInputStream fis = new FileInputStream(in);
+ byte[] pdf = new byte[(int) in.length()];
+ fis.read(pdf);
+ fis.close();
+
+ String text = PdfAS.extractNormalizedTextTextual(pdf, pdf.length);
+
+ File out = new File(args[0] + ".txt");
+ FileOutputStream fos = new FileOutputStream(out);
+ fos.write(text.getBytes("UTF-8"));
+ fos.close();
+
+ System.out.println("finished. written to " + out.getAbsolutePath());
+ }
+
+}