aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/test/pdfbox/util/TestTextStripperPerformance.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/test/pdfbox/util/TestTextStripperPerformance.java')
-rw-r--r--src/main/java/test/pdfbox/util/TestTextStripperPerformance.java173
1 files changed, 173 insertions, 0 deletions
diff --git a/src/main/java/test/pdfbox/util/TestTextStripperPerformance.java b/src/main/java/test/pdfbox/util/TestTextStripperPerformance.java
new file mode 100644
index 0000000..0aadb30
--- /dev/null
+++ b/src/main/java/test/pdfbox/util/TestTextStripperPerformance.java
@@ -0,0 +1,173 @@
+/**
+ * Copyright (c) 2003-2004, www.pdfbox.org
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * 3. Neither the name of pdfbox; nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILIT, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * http://www.pdfbox.org
+ */
+package test.pdfbox.util;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.FileOutputStream;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+import org.apache.log4j.Logger;
+
+import org.pdfbox.pdmodel.PDDocument;
+
+import org.pdfbox.util.PDFTextStripper;
+
+/**
+ * Test the performance of the PDF text stripper utility.
+ *
+ * @author Ben Litchfield (ben@csh.rit.edu)
+ * @version $Revision: 1.2 $
+ */
+public class TestTextStripperPerformance extends TestCase
+{
+ private static Logger log = Logger.getLogger(TestTextStripperPerformance.class);
+
+ private boolean bFail = false;
+
+ /**
+ * Test class constructor.
+ *
+ * @param name The name of the test class.
+ */
+ public TestTextStripperPerformance( String name )
+ {
+ super( name );
+ }
+
+ /**
+ * Test suite setup.
+ */
+ public void setUp()
+ {
+ }
+
+
+ /**
+ * Validate text extraction on a single file.
+ *
+ * @param file The file to validate
+ * @param bLogResult Whether to log the extracted text
+ * @throws Exception when there is an exception
+ */
+ public void doTestFile(File file, boolean bLogResult)
+ throws Exception
+ {
+
+ PDFTextStripper stripper = new PDFTextStripper();
+ OutputStream os = null;
+ Writer writer = null;
+ PDDocument document = null;
+ try
+ {
+ document = PDDocument.load(file);
+
+ File outFile = new File(file.getParentFile().getParentFile(), "output/" + file.getName() + ".txt");
+ os = new FileOutputStream(outFile);
+ writer = new OutputStreamWriter(os);
+
+ stripper.writeText(document, writer);
+ }
+ finally
+ {
+ if( writer != null )
+ {
+ writer.close();
+ }
+ if( os != null )
+ {
+ os.close();
+ }
+ if( document != null )
+ {
+ document.close();
+ }
+ }
+ }
+
+ /**
+ * Test to validate text extraction of file set.
+ *
+ * @throws Exception when there is an exception
+ */
+ public void testExtract()
+ throws Exception
+ {
+ String filename = System.getProperty("test.pdfbox.util.TextStripper.file");
+ File testDir = new File("test/input");
+
+ if ((filename == null) || (filename.length() == 0))
+ {
+ File[] testFiles = testDir.listFiles(new FilenameFilter()
+ {
+ public boolean accept(File dir, String name)
+ {
+ return (name.endsWith(".pdf"));
+ }
+ });
+
+ for (int n = 0; n < testFiles.length; n++)
+ {
+ doTestFile(testFiles[n], false);
+ }
+ }
+ else
+ {
+ //doTestFile(new File(testDir, filename), true);
+ }
+ }
+
+ /**
+ * Set the tests in the suite for this test class.
+ *
+ * @return the Suite.
+ */
+ public static Test suite()
+ {
+ return new TestSuite( TestTextStripperPerformance.class );
+ }
+
+ /**
+ * Command line execution.
+ *
+ * @param args Command line arguments.
+ */
+ public static void main( String[] args )
+ {
+ String[] arg = {TestTextStripperPerformance.class.getName() };
+ junit.textui.TestRunner.main( arg );
+ }
+} \ No newline at end of file