From 6025b6016517c6d898d8957d1d7e03ba71431912 Mon Sep 17 00:00:00 2001 From: tknall Date: Fri, 1 Dec 2006 12:20:24 +0000 Subject: Initial import of release 2.2. git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@4 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../pdfbox/util/TestTextStripperPerformance.java | 173 +++++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 src/main/java/test/pdfbox/util/TestTextStripperPerformance.java (limited to 'src/main/java/test/pdfbox/util/TestTextStripperPerformance.java') diff --git a/src/main/java/test/pdfbox/util/TestTextStripperPerformance.java b/src/main/java/test/pdfbox/util/TestTextStripperPerformance.java new file mode 100644 index 0000000..0aadb30 --- /dev/null +++ b/src/main/java/test/pdfbox/util/TestTextStripperPerformance.java @@ -0,0 +1,173 @@ +/** + * Copyright (c) 2003-2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILIT, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + */ +package test.pdfbox.util; + +import java.io.File; +import java.io.FilenameFilter; +import java.io.FileOutputStream; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +import org.apache.log4j.Logger; + +import org.pdfbox.pdmodel.PDDocument; + +import org.pdfbox.util.PDFTextStripper; + +/** + * Test the performance of the PDF text stripper utility. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.2 $ + */ +public class TestTextStripperPerformance extends TestCase +{ + private static Logger log = Logger.getLogger(TestTextStripperPerformance.class); + + private boolean bFail = false; + + /** + * Test class constructor. + * + * @param name The name of the test class. + */ + public TestTextStripperPerformance( String name ) + { + super( name ); + } + + /** + * Test suite setup. + */ + public void setUp() + { + } + + + /** + * Validate text extraction on a single file. + * + * @param file The file to validate + * @param bLogResult Whether to log the extracted text + * @throws Exception when there is an exception + */ + public void doTestFile(File file, boolean bLogResult) + throws Exception + { + + PDFTextStripper stripper = new PDFTextStripper(); + OutputStream os = null; + Writer writer = null; + PDDocument document = null; + try + { + document = PDDocument.load(file); + + File outFile = new File(file.getParentFile().getParentFile(), "output/" + file.getName() + ".txt"); + os = new FileOutputStream(outFile); + writer = new OutputStreamWriter(os); + + stripper.writeText(document, writer); + } + finally + { + if( writer != null ) + { + writer.close(); + } + if( os != null ) + { + os.close(); + } + if( document != null ) + { + document.close(); + } + } + } + + /** + * Test to validate text extraction of file set. + * + * @throws Exception when there is an exception + */ + public void testExtract() + throws Exception + { + String filename = System.getProperty("test.pdfbox.util.TextStripper.file"); + File testDir = new File("test/input"); + + if ((filename == null) || (filename.length() == 0)) + { + File[] testFiles = testDir.listFiles(new FilenameFilter() + { + public boolean accept(File dir, String name) + { + return (name.endsWith(".pdf")); + } + }); + + for (int n = 0; n < testFiles.length; n++) + { + doTestFile(testFiles[n], false); + } + } + else + { + //doTestFile(new File(testDir, filename), true); + } + } + + /** + * Set the tests in the suite for this test class. + * + * @return the Suite. + */ + public static Test suite() + { + return new TestSuite( TestTextStripperPerformance.class ); + } + + /** + * Command line execution. + * + * @param args Command line arguments. + */ + public static void main( String[] args ) + { + String[] arg = {TestTextStripperPerformance.class.getName() }; + junit.textui.TestRunner.main( arg ); + } +} \ No newline at end of file -- cgit v1.2.3