/** * Copyright (c) 2006 by Know-Center, Graz, Austria * * This software is the confidential and proprietary information of Know-Center, * Graz, Austria. You shall not disclose such Confidential Information and shall * use it only in accordance with the terms of the license agreement you entered * into with Know-Center. * * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS * DERIVATIVES. * * $Id: ExtractTextTextual.java,v 1.1 2006/10/31 08:19:52 wprinz Exp $ */ package at.knowcenter.wag.egov.egiz.test; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import at.knowcenter.wag.egov.egiz.PdfAS; import at.knowcenter.wag.egov.egiz.cfg.SettingsReader; import at.knowcenter.wag.egov.egiz.exceptions.PresentableException; /** * Test. * * @author wprinz */ public class ExtractTextTextual { /** * @param args * @throws IOException * @throws PresentableException */ public static void main(String[] args) throws IOException, PresentableException { SettingsReader.initializeForCommandLine(); File in = new File(args[0]); FileInputStream fis = new FileInputStream(in); byte[] pdf = new byte[(int) in.length()]; fis.read(pdf); fis.close(); String text = PdfAS.extractNormalizedTextTextual(pdf, pdf.length); File out = new File(args[0] + ".txt"); FileOutputStream fos = new FileOutputStream(out); fos.write(text.getBytes("UTF-8")); fos.close(); System.out.println("finished. written to " + out.getAbsolutePath()); } }