/** * Copyright (c) 2003-2004, www.pdfbox.org * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * 3. Neither the name of pdfbox; nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON * ANY THEORY OF LIABILIT, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * http://www.pdfbox.org */ package test.pdfbox.util; import java.io.File; import java.io.FilenameFilter; import java.io.FileOutputStream; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Writer; import junit.framework.Test; import junit.framework.TestCase; import junit.framework.TestSuite; import org.apache.log4j.Logger; import org.pdfbox.pdmodel.PDDocument; import org.pdfbox.util.PDFTextStripper; /** * Test the performance of the PDF text stripper utility. * * @author Ben Litchfield (ben@csh.rit.edu) * @version $Revision: 1.2 $ */ public class TestTextStripperPerformance extends TestCase { private static Logger log = Logger.getLogger(TestTextStripperPerformance.class); private boolean bFail = false; /** * Test class constructor. * * @param name The name of the test class. */ public TestTextStripperPerformance( String name ) { super( name ); } /** * Test suite setup. */ public void setUp() { } /** * Validate text extraction on a single file. * * @param file The file to validate * @param bLogResult Whether to log the extracted text * @throws Exception when there is an exception */ public void doTestFile(File file, boolean bLogResult) throws Exception { PDFTextStripper stripper = new PDFTextStripper(); OutputStream os = null; Writer writer = null; PDDocument document = null; try { document = PDDocument.load(file); File outFile = new File(file.getParentFile().getParentFile(), "output/" + file.getName() + ".txt"); os = new FileOutputStream(outFile); writer = new OutputStreamWriter(os); stripper.writeText(document, writer); } finally { if( writer != null ) { writer.close(); } if( os != null ) { os.close(); } if( document != null ) { document.close(); } } } /** * Test to validate text extraction of file set. * * @throws Exception when there is an exception */ public void testExtract() throws Exception { String filename = System.getProperty("test.pdfbox.util.TextStripper.file"); File testDir = new File("test/input"); if ((filename == null) || (filename.length() == 0)) { File[] testFiles = testDir.listFiles(new FilenameFilter() { public boolean accept(File dir, String name) { return (name.endsWith(".pdf")); } }); for (int n = 0; n < testFiles.length; n++) { doTestFile(testFiles[n], false); } } else { //doTestFile(new File(testDir, filename), true); } } /** * Set the tests in the suite for this test class. * * @return the Suite. */ public static Test suite() { return new TestSuite( TestTextStripperPerformance.class ); } /** * Command line execution. * * @param args Command line arguments. */ public static void main( String[] args ) { String[] arg = {TestTextStripperPerformance.class.getName() }; junit.textui.TestRunner.main( arg ); } }