From 6025b6016517c6d898d8957d1d7e03ba71431912 Mon Sep 17 00:00:00 2001 From: tknall Date: Fri, 1 Dec 2006 12:20:24 +0000 Subject: Initial import of release 2.2. git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@4 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- src/main/java/test/pdfbox/TestAll.java | 88 +++++ src/main/java/test/pdfbox/cos/TestCOSString.java | 92 +++++ src/main/java/test/pdfbox/cos/package.html | 9 + .../test/pdfbox/encryption/TestEncryption.java | 179 ++++++++++ src/main/java/test/pdfbox/encryption/package.html | 9 + src/main/java/test/pdfbox/filter/TestFilters.java | 182 ++++++++++ src/main/java/test/pdfbox/filter/package.html | 9 + src/main/java/test/pdfbox/package.html | 9 + .../java/test/pdfbox/pdfparser/TestPDFParser.java | 126 +++++++ src/main/java/test/pdfbox/pdfparser/package.html | 9 + src/main/java/test/pdfbox/pdmodel/TestFDF.java | 292 ++++++++++++++++ .../pdmodel/interactive/form/TestFields.java | 144 ++++++++ .../pdfbox/pdmodel/interactive/form/package.html | 9 + src/main/java/test/pdfbox/pdmodel/package.html | 9 + .../java/test/pdfbox/util/TestTextStripper.java | 371 +++++++++++++++++++++ .../pdfbox/util/TestTextStripperPerformance.java | 173 ++++++++++ src/main/java/test/pdfbox/util/package.html | 9 + 17 files changed, 1719 insertions(+) create mode 100644 src/main/java/test/pdfbox/TestAll.java create mode 100644 src/main/java/test/pdfbox/cos/TestCOSString.java create mode 100644 src/main/java/test/pdfbox/cos/package.html create mode 100644 src/main/java/test/pdfbox/encryption/TestEncryption.java create mode 100644 src/main/java/test/pdfbox/encryption/package.html create mode 100644 src/main/java/test/pdfbox/filter/TestFilters.java create mode 100644 src/main/java/test/pdfbox/filter/package.html create mode 100644 src/main/java/test/pdfbox/package.html create mode 100644 src/main/java/test/pdfbox/pdfparser/TestPDFParser.java create mode 100644 src/main/java/test/pdfbox/pdfparser/package.html create mode 100644 src/main/java/test/pdfbox/pdmodel/TestFDF.java create mode 100644 src/main/java/test/pdfbox/pdmodel/interactive/form/TestFields.java create mode 100644 src/main/java/test/pdfbox/pdmodel/interactive/form/package.html create mode 100644 src/main/java/test/pdfbox/pdmodel/package.html create mode 100644 src/main/java/test/pdfbox/util/TestTextStripper.java create mode 100644 src/main/java/test/pdfbox/util/TestTextStripperPerformance.java create mode 100644 src/main/java/test/pdfbox/util/package.html (limited to 'src/main/java/test') diff --git a/src/main/java/test/pdfbox/TestAll.java b/src/main/java/test/pdfbox/TestAll.java new file mode 100644 index 0000000..c1882fc --- /dev/null +++ b/src/main/java/test/pdfbox/TestAll.java @@ -0,0 +1,88 @@ +/** + * Copyright (c) 2003-2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package test.pdfbox; + +import test.pdfbox.cos.TestCOSString; +import test.pdfbox.encryption.TestEncryption; +import test.pdfbox.filter.TestFilters; +import test.pdfbox.pdmodel.TestFDF; +import test.pdfbox.pdmodel.interactive.form.TestFields; +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +/** + * This is a holder for all test cases in the pdfbox system. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.6 $ + */ +public class TestAll extends TestCase +{ + + /** + * Constructor. + * + * @param name The name of the test to run. + */ + public TestAll( String name ) + { + super( name ); + } + + /** + * The main method to run tests. + * + * @param args The command line arguments. + */ + public static void main( String[] args ) + { + String[] arg = {TestAll.class.getName() }; + junit.textui.TestRunner.main( arg ); + } + + /** + * This will get the suite of test that this class holds. + * + * @return All of the tests that this class holds. + */ + public static Test suite() + { + TestSuite suite = new TestSuite(); + suite.addTest( TestFilters.suite() ); + suite.addTest( TestFDF.suite() ); + suite.addTest( TestFields.suite() ); + suite.addTest( TestEncryption.suite() ); + suite.addTest( TestFDF.suite() ); + suite.addTest( TestCOSString.suite() ); + return suite; + } +} \ No newline at end of file diff --git a/src/main/java/test/pdfbox/cos/TestCOSString.java b/src/main/java/test/pdfbox/cos/TestCOSString.java new file mode 100644 index 0000000..19b3ca0 --- /dev/null +++ b/src/main/java/test/pdfbox/cos/TestCOSString.java @@ -0,0 +1,92 @@ +/** + * Copyright (c) 2003-2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package test.pdfbox.cos; + +import java.io.IOException; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +import org.pdfbox.cos.COSString; + +/** + * This will test all of the filters in the PDFBox system. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision$ + */ +public class TestCOSString extends TestCase +{ + + /** + * Constructor. + * + * @param name The name of the test to run. + */ + public TestCOSString( String name ) + { + super( name ); + } + + /** + * This will get the suite of test that this class holds. + * + * @return All of the tests that this class holds. + */ + public static Test suite() + { + return new TestSuite( TestCOSString.class ); + } + + /** + * infamous main method. + * + * @param args The command line arguments. + */ + public static void main( String[] args ) + { + String[] arg = {TestCOSString.class.getName() }; + junit.textui.TestRunner.main( arg ); + } + + /** + * This will test all of the filters in the system. + * + * @throws IOException If there is an exception while encoding. + */ + public void testUnicode() throws IOException + { + String theString = "\u4e16"; + COSString string = new COSString( theString ); + assertTrue( string.getString().equals( theString ) ); + } +} \ No newline at end of file diff --git a/src/main/java/test/pdfbox/cos/package.html b/src/main/java/test/pdfbox/cos/package.html new file mode 100644 index 0000000..c64f742 --- /dev/null +++ b/src/main/java/test/pdfbox/cos/package.html @@ -0,0 +1,9 @@ + + + + + + +These classes will be used to test the various COS objects that make up the core of PDFBox. + + diff --git a/src/main/java/test/pdfbox/encryption/TestEncryption.java b/src/main/java/test/pdfbox/encryption/TestEncryption.java new file mode 100644 index 0000000..e13de2a --- /dev/null +++ b/src/main/java/test/pdfbox/encryption/TestEncryption.java @@ -0,0 +1,179 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package test.pdfbox.encryption; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +import org.apache.log4j.Logger; + +import org.pdfbox.encryption.PDFEncryption; + +/** + * This will test the encryption algorithms in PDFBox. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.3 $ + */ +public class TestEncryption extends TestCase +{ + private static Logger log = Logger.getLogger(TestEncryption.class); + + /** + * Constructor. + * + * @param name The name of the test to run. + */ + public TestEncryption( String name ) + { + super( name ); + } + + /** + * This will get the suite of test that this class holds. + * + * @return All of the tests that this class holds. + */ + public static Test suite() + { + return new TestSuite( TestEncryption.class ); + } + + /** + * infamous main method. + * + * @param args The command line arguments. + */ + public static void main( String[] args ) + { + String[] arg = {TestEncryption.class.getName() }; + junit.textui.TestRunner.main( arg ); + } + + /** + * This will test some simple encryption. + * + * @throws Exception If there is an exception while encrypting. + */ + public void testEncryption() throws Exception + { + byte[] key={0x65, 0x3d, 0x4f, 0x70, 0x0c }; + byte[] data={0x31, 0x20, 0x30, 0x20, 0x30, 0x20, 0x72, 0x67, 0x20, 0x30, + 0x20, 0x30, 0x20, 0x33, 0x30, 0x38, 0x2e, 0x34, 0x37, 0x34, + 0x37, 0x20, 0x35, 0x37, 0x2e, 0x36, 0x32, 0x37, 0x31, 0x20, + 0x72, 0x65, 0x20, 0x66, 0x20, 0x30, 0x20, 0x47, 0x20, 0x31, + 0x20, 0x77, 0x20, 0x30, 0x2e, 0x35, 0x20, 0x30, 0x2e, 0x35, + 0x20, 0x33, 0x30, 0x37, 0x2e, 0x34, 0x37, 0x34, 0x37, 0x20, + 0x35, 0x36, 0x2e, 0x36, 0x32, 0x37, 0x31, 0x20, 0x72, 0x65, + 0x20, 0x73, 0x20, 0x2f, 0x54, 0x78, 0x20, 0x42, 0x4d, 0x43, + 0x20, 0x71, 0x20, 0x31, 0x20, 0x31, 0x20, 0x33, 0x30, 0x36, + 0x2e, 0x34, 0x37, 0x34, 0x37, 0x20, 0x35, 0x35, 0x2e, 0x36, + 0x32, 0x37, 0x31, 0x20, 0x72, 0x65, 0x20, 0x57, 0x20, 0x6e, + 0x20, 0x30, 0x20, 0x67, 0x20, 0x42, 0x54, 0x0a, 0x2f, 0x48, + 0x65, 0x6c, 0x76, 0x20, 0x31, 0x30, 0x20, 0x54, 0x66, 0x0a, + 0x32, 0x20, 0x32, 0x35, 0x2e, 0x31, 0x30, 0x33, 0x35, 0x20, + 0x54, 0x64, 0x0a, 0x31, 0x31, 0x2e, 0x35, 0x35, 0x39, 0x39, + 0x20, 0x54, 0x4c, 0x0a, 0x28, 0x2d, 0x2d, 0x5c, 0x30, 0x34, + 0x30, 0x44, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x5c, 0x30, + 0x34, 0x30, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x5c, 0x30, 0x34, + 0x30, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x5c, 0x30, 0x34, 0x30, + 0x2d, 0x2d, 0x29, 0x20, 0x54, 0x6a, 0x0a, 0x45, 0x54, 0x0a, + 0x20, 0x51, 0x20, 0x45, 0x4d, 0x43, (byte)0x8a, 0x0d, 0x0a + }; + PDFEncryption enc = new PDFEncryption(); + ByteArrayOutputStream output = new ByteArrayOutputStream(); + enc.encryptData( 43, 0, key, new ByteArrayInputStream( data ), output ); + + byte[] encrypted = output.toByteArray(); + printHexString( encrypted ); + + ByteArrayOutputStream sameAsInput = new ByteArrayOutputStream(); + enc.encryptData( 43, 0, key, new ByteArrayInputStream( encrypted ), sameAsInput ); + byte[] dataAgain = sameAsInput.toByteArray(); + cmpArray( data, dataAgain ); + } + + /** + * This will compare a couple of arrays and fail if they do not match. + * + * @param firstArray The first array. + * @param secondArray The second array. + */ + private void cmpArray( byte[] firstArray, byte[] secondArray ) + { + if( firstArray.length != secondArray.length ) + { + fail( "The array lengths do not match for " + + ", firstArray length was: " + firstArray.length + + ", secondArray length was: " + secondArray.length); + } + + for( int i=0; i + + + + + +These classes will be used to test the various encryption algorithms that are used in PDFBox. + + diff --git a/src/main/java/test/pdfbox/filter/TestFilters.java b/src/main/java/test/pdfbox/filter/TestFilters.java new file mode 100644 index 0000000..db124eb --- /dev/null +++ b/src/main/java/test/pdfbox/filter/TestFilters.java @@ -0,0 +1,182 @@ +/** + * Copyright (c) 2003-2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package test.pdfbox.filter; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.File; +import java.io.FileInputStream; + +import java.util.Collection; +import java.util.Iterator; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +import org.apache.log4j.Logger; + +import org.pdfbox.cos.COSDictionary; + +import org.pdfbox.filter.DCTFilter; +import org.pdfbox.filter.CCITTFaxDecodeFilter; +import org.pdfbox.filter.Filter; +import org.pdfbox.filter.FilterManager; +import org.pdfbox.filter.RunLengthDecodeFilter; + +/** + * This will test all of the filters in the PDFBox system. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.5 $ + */ +public class TestFilters extends TestCase +{ + private static Logger log = Logger.getLogger(TestFilters.class); + + private static final int BUFFER_SIZE = 2048; + private static final COSDictionary EMPTY_DICTIONARY = new COSDictionary(); + + /** + * Constructor. + * + * @param name The name of the test to run. + */ + public TestFilters( String name ) + { + super( name ); + } + + /** + * This will get the suite of test that this class holds. + * + * @return All of the tests that this class holds. + */ + public static Test suite() + { + return new TestSuite( TestFilters.class ); + } + + /** + * This will test all of the filters in the system. + * + * @throws IOException If there is an exception while encoding. + */ + public void testFilters() throws IOException + { + FilterManager manager = new FilterManager(); + Collection filters = manager.getFilters(); + + Iterator filterIter = filters.iterator(); + while( filterIter.hasNext() ) + { + long start = System.currentTimeMillis(); + Filter filter = (Filter)filterIter.next(); + if( !(filter instanceof DCTFilter || + filter instanceof CCITTFaxDecodeFilter || + filter instanceof RunLengthDecodeFilter)) + { + checkFilter( new File( "classes" ), filter ); + long stop = System.currentTimeMillis(); + System.out.println( "Time for filter " + filter.getClass().getName() + "=" + (stop-start) ); + } + } + } + + /** + * This will check the filter. + * + * @param file The file or directory to test. + * @param filter The filter to check. + * + * @throws IOException If there is an exception while encoding. + */ + private void checkFilter( File file, Filter filter ) throws IOException + { + if( file.isDirectory() ) + { + File[] subFiles = file.listFiles(); + for( int i=0; i + + + + + +These classes will be used to test the various filters that are available with PDFBox. + + diff --git a/src/main/java/test/pdfbox/package.html b/src/main/java/test/pdfbox/package.html new file mode 100644 index 0000000..2d5d590 --- /dev/null +++ b/src/main/java/test/pdfbox/package.html @@ -0,0 +1,9 @@ + + + + + + +This holds classes that will be used to test all of PDFBox. + + diff --git a/src/main/java/test/pdfbox/pdfparser/TestPDFParser.java b/src/main/java/test/pdfbox/pdfparser/TestPDFParser.java new file mode 100644 index 0000000..e935af3 --- /dev/null +++ b/src/main/java/test/pdfbox/pdfparser/TestPDFParser.java @@ -0,0 +1,126 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package test.pdfbox.pdfparser; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +import org.pdfbox.cos.COSName; + +import org.pdfbox.pdfparser.BaseParser; + + +/** + * This will test the PDF parsing in PDFBox. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class TestPDFParser extends TestCase +{ + //private static Logger log = Logger.getLogger(TestFDF.class); + + /** + * Constructor. + * + * @param name The name of the test to run. + */ + public TestPDFParser( String name ) + { + super( name ); + } + + /** + * This will get the suite of test that this class holds. + * + * @return All of the tests that this class holds. + */ + public static Test suite() + { + return new TestSuite( TestPDFParser.class ); + } + + /** + * infamous main method. + * + * @param args The command line arguments. + */ + public static void main( String[] args ) + { + String[] arg = {TestPDFParser.class.getName() }; + junit.textui.TestRunner.main( arg ); + } + + /** + * This will test some cos name parsing. + * + * @throws Exception If there is an exception while parsing. + */ + public void testCOSName() throws Exception + { + TestParser parser = new TestParser(new ByteArrayInputStream( "/PANTONE#20116#20CV".getBytes() ) ); + COSName name = parser.parseCOSName(); + assertTrue("Failed to parse COSName",name.getName().equals( "PANTONE 116 CV" )); + + } + + /** + * A simple class used to test parsing of the cos name. + */ + private class TestParser extends BaseParser + { + /** + * Constructor. + * @param input The input stream. + * @throws IOException If there is an error during parsing. + */ + public TestParser( InputStream input) throws IOException + { + super( input ); + } + + /** + * Expose the parseCOSName as public. + * + * @return The parsed cos name. + * @throws IOException If there is an error parsing the COSName. + */ + public COSName parseCOSName() throws IOException + { + return super.parseCOSName(); + } + } +} \ No newline at end of file diff --git a/src/main/java/test/pdfbox/pdfparser/package.html b/src/main/java/test/pdfbox/pdfparser/package.html new file mode 100644 index 0000000..d723694 --- /dev/null +++ b/src/main/java/test/pdfbox/pdfparser/package.html @@ -0,0 +1,9 @@ + + + + + + +This holds classes that will be used to test the pdf parsing classes. + + diff --git a/src/main/java/test/pdfbox/pdmodel/TestFDF.java b/src/main/java/test/pdfbox/pdmodel/TestFDF.java new file mode 100644 index 0000000..d17ed11 --- /dev/null +++ b/src/main/java/test/pdfbox/pdmodel/TestFDF.java @@ -0,0 +1,292 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package test.pdfbox.pdmodel; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.List; +import java.util.Map; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +import org.pdfbox.cos.COSStream; +import org.pdfbox.cos.COSString; +import org.pdfbox.pdfparser.PDFStreamParser; +import org.pdfbox.pdmodel.PDDocument; +import org.pdfbox.pdmodel.fdf.FDFDocument; +import org.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; +import org.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.pdfbox.pdmodel.interactive.form.PDAcroForm; +import org.pdfbox.pdmodel.interactive.form.PDField; +import org.pdfbox.pdmodel.interactive.form.PDRadioCollection; +import org.pdfbox.pdmodel.interactive.form.PDTextbox; + +/** + * This will test the FDF algorithms in PDFBox. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.5 $ + */ +public class TestFDF extends TestCase +{ + //private static Logger log = Logger.getLogger(TestFDF.class); + + /** + * Constructor. + * + * @param name The name of the test to run. + */ + public TestFDF( String name ) + { + super( name ); + } + + /** + * This will get the suite of test that this class holds. + * + * @return All of the tests that this class holds. + */ + public static Test suite() + { + return new TestSuite( TestFDF.class ); + } + + /** + * infamous main method. + * + * @param args The command line arguments. + */ + public static void main( String[] args ) + { + String[] arg = {TestFDF.class.getName() }; + junit.textui.TestRunner.main( arg ); + } + + /** + * This will test some simple field setting. + * + * @throws Exception If there is an exception while encrypting. + */ + public void testFDFfdeb() throws Exception + { + PDDocument fdeb = null; + try + { + fdeb = PDDocument.load( "test/input/fdeb.pdf" ); + PDAcroForm form = fdeb.getDocumentCatalog().getAcroForm(); + PDTextbox field = (PDTextbox)form.getField( "f67_1" ); + field.setValue( "2" ); + + String expected = + "/Tx BMC " + + "BT " + + "/Helv 9 Tf " + + " 0 g " + + " 2 1.985585 Td " + + "2.07698 0 Td " + + "(2) Tj " + + "ET " + + "EMC"; + + testContentStreams( fdeb, field, expected ); + } + finally + { + fdeb.close(); + } + + } + + /** + * This will test a pdf with lots of fields. + * + * @throws Exception If there is an exception while encrypting. + */ + public void testFDFPDFWithLotsOfFields() throws Exception + { + PDDocument fdeb = null; + try + { + fdeb = PDDocument.load( "test/input/pdf_with_lots_of_fields.pdf" ); + PDAcroForm form = fdeb.getDocumentCatalog().getAcroForm(); + PDTextbox feld2 = (PDTextbox)form.getField( "Feld.2" ); + feld2.setValue( "Benjamin" ); + + String expected = + "1 1 0.8000000119 rg " + + " 0 0 127.5 19.8299999237 re " + + " f " + + " 0 0 0 RG " + + " 1 w " + + " 0.5 0.5 126.5 18.8299999237 re " + + " S " + + " 0.5 g " + + " 1 1 m " + + " 1 18.8299999237 l " + + " 126.5 18.8299999237 l " + + " 125.5 17.8299999237 l " + + " 2 17.8299999237 l " + + " 2 2 l " + + " 1 1 l " + + " f " + + " 0.75 g " + + " 1 1 m " + + " 126.5 1 l " + + " 126.5 18.8299999237 l " + + " 125.5 17.8299999237 l " + + " 125.5 2 l " + + " 2 2 l " + + " 1 1 l " + + " f " + + " /Tx BMC " + + "BT " + + "/Helv 14 Tf " + + " 0 0 0 rg " + + " 4 4.721 Td " + + "(Benjamin) Tj " + + "ET " + + "EMC"; + + testContentStreams( fdeb, feld2, expected ); + + PDRadioCollection feld3 = (PDRadioCollection)form.getField( "Feld.3" ); + feld3.setValue("RB1"); + assertEquals( "RB1", feld3.getValue() ); + //assertEquals( ((PDCheckbox)feld3.getKids().get( 0 )).getValue(), "RB1" ); + + } + finally + { + fdeb.close(); + } + } + + /** + * This will test the Freedom pdf. + * + * @throws Exception If there is an error while testing. + */ + public void testFDFFreedomExpressions() throws Exception + { + PDDocument freedom = null; + FDFDocument fdf = null; + try + { + freedom = PDDocument.load( "test/input/FreedomExpressions.pdf" ); + fdf = FDFDocument.load( "test/input/FreedomExpressions.fdf" ); + PDAcroForm form = freedom.getDocumentCatalog().getAcroForm(); + form.importFDF( fdf ); + PDTextbox feld2 = (PDTextbox)form.getField( "eeFirstName" ); + List kids = feld2.getKids(); + PDField firstKid = (PDField)kids.get( 0 ); + PDField secondKid = (PDField)kids.get( 1 ); + testContentStreamContains( freedom, firstKid, "Steve" ); + testContentStreamContains( freedom, secondKid, "Steve" ); + + //the appearance stream is suppose to be null because there + //is an F action in the AA dictionary that populates that field. + PDField totalAmt = form.getField( "eeSuppTotalAmt" ); + assertTrue( totalAmt.getDictionary().getDictionaryObject( "AP" ) == null ); + + } + finally + { + if( freedom != null ) + { + freedom.close(); + } + if( fdf != null ) + { + fdf.close(); + } + } + } + + private void testContentStreamContains( PDDocument doc, PDField field, String expected ) throws Exception + { + PDAnnotationWidget widget = field.getWidget(); + Map normalAppearance = widget.getAppearance().getNormalAppearance(); + PDAppearanceStream appearanceStream = (PDAppearanceStream)normalAppearance.get( "default" ); + COSStream actual = appearanceStream.getStream(); + + List actualTokens = getStreamTokens( doc, actual ); + assertTrue( actualTokens.contains( new COSString( expected ) ) ); + } + + private void testContentStreams( PDDocument doc, PDField field, String expected ) throws Exception + { + PDAnnotationWidget widget = field.getWidget(); + Map normalAppearance = widget.getAppearance().getNormalAppearance(); + PDAppearanceStream appearanceStream = (PDAppearanceStream)normalAppearance.get( "default" ); + COSStream actual = appearanceStream.getStream(); + + List actualTokens = getStreamTokens( doc, actual ); + List expectedTokens = getStreamTokens( doc, expected ); + assertEquals( actualTokens.size(), expectedTokens.size() ); + for( int i=0; i + + + + + +This holds classes that will be used to test pdmodel form classes. + + diff --git a/src/main/java/test/pdfbox/pdmodel/package.html b/src/main/java/test/pdfbox/pdmodel/package.html new file mode 100644 index 0000000..7cb4cde --- /dev/null +++ b/src/main/java/test/pdfbox/pdmodel/package.html @@ -0,0 +1,9 @@ + + + + + + +This holds classes that will be used to test pdmodel classes. + + diff --git a/src/main/java/test/pdfbox/util/TestTextStripper.java b/src/main/java/test/pdfbox/util/TestTextStripper.java new file mode 100644 index 0000000..c425f38 --- /dev/null +++ b/src/main/java/test/pdfbox/util/TestTextStripper.java @@ -0,0 +1,371 @@ +/** + * Copyright (c) 2003-2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILIT, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + */ +package test.pdfbox.util; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FilenameFilter; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.LineNumberReader; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +import org.apache.log4j.Logger; +import org.apache.log4j.Level; + +import org.pdfbox.pdmodel.PDDocument; + +import org.pdfbox.util.PDFTextStripper; + +/** + * Test suite for PDFTextStripper. + * + * FILE SET VALIDATION + * + * This test suite is designed to test PDFTextStripper using a set of PDF + * files and known good output for each. The default mode of testAll() + * is to process each *.pdf file in "test/input". An output file is + * created in "test/output" with the same name as the PDF file, plus an + * additional ".txt" suffix. + * + * The output file is then tested against a known good result file from + * the input directory (again, with the same name as the tested PDF file, + * but with the additional ".txt" suffix). + * + * So for the file "test/input/hello.pdf", an output file will be generated + * named "test/output/hello.pdf.txt". Then that file will be compared to + * the known good file "test/input/hello.pdf.txt", if it exists. + * + * Any errors are logged, and at the end of processing all *.pdf files, if + * there were any errors, the test fails. The logging is at INFO, as the + * general goal is overall validation, and on failure, the indication of + * which file or files failed. + * + * When processing new PDF files, you may use testAll() to generate output, + * verify the output manually, then move the output file to the test input + * directory to use as the basis for future validations. + * + * SINGLE FILE VALIDATION + * + * To further research individual failures, the test.pdfbox.util.TextStripper.file + * system property may be set with the name of a single file in the "test/input" + * directory. In this mode, testAll() will evaluate only that file, and will + * do so with DEBUG level logging. You can set this property from ant by + * defining "file", as in: + * + * ant testextract -Dfile=hello.pdf + * + * @author Robert Dickinson (bob@brutesquadlabs.com) + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.14 $ + */ +public class TestTextStripper extends TestCase +{ + private static Logger log = Logger.getLogger(TestTextStripper.class); + + private boolean bFail = false; + private PDFTextStripper stripper = null; + + /** + * Test class constructor. + * + * @param name The name of the test class. + * + * @throws IOException If there is an error creating the test. + */ + public TestTextStripper( String name ) throws IOException + { + super( name ); + stripper = new PDFTextStripper(); + stripper.setLineSeparator("\n"); + } + + /** + * Test suite setup. + */ + public void setUp() + { + // If you want to test a single file using DEBUG logging, from an IDE, + // you can do something like this: + // + // System.setProperty("test.pdfbox.util.TextStripper.file", "FVS318Ref.pdf"); + } + + /** + * Determine whether two strings are equal, where two null strings are + * considered equal. + * + * @param expected Excpected string + * @param actual Actual String + * @return true is the strings are both null, + * or if their contents are the same, otherwise false. + */ + private boolean stringsEqual(String expected, String actual) + { + boolean equals = true; + if( (expected == null) && (actual == null) ) + { + return true; + } + else if( expected != null && actual != null ) + { + expected = expected.trim(); + actual = actual.trim(); + char[] expectedArray = expected.toCharArray(); + char[] actualArray = actual.toCharArray(); + int expectedIndex = 0; + int actualIndex = 0; + while( expectedIndex 256 ) + { + while( index < array.length && (array[index] == ' ' || array[index] > 256)) + { + index++; + } + index--; + } + return index; + } + + /** + * Validate text extraction on a single file. + * + * @param file The file to validate + * @param bLogResult Whether to log the extracted text + * @throws Exception when there is an exception + */ + public void doTestFile(File file, boolean bLogResult) + throws Exception + { + log.info("Preparing to parse " + file.getName()); + + + OutputStream os = null; + Writer writer = null; + PDDocument document = null; + try + { + document = PDDocument.load(file); + + File outFile = new File(file.getParentFile().getParentFile(), "output/" + file.getName() + ".txt"); + os = new FileOutputStream(outFile); + os.write( 0xFF ); + os.write( 0xFE ); + writer = new OutputStreamWriter(os,"UTF-16LE"); + + stripper.writeText(document, writer); + + + + if (bLogResult) + { + log.info("Text for " + file.getName() + ":\r\n" + stripper.getText(document)); + } + + File expectedFile = new File(file.getParentFile().getParentFile(), "input/" + file.getName() + ".txt"); + File actualFile = new File(file.getParentFile().getParentFile(), "output/" + file.getName() + ".txt"); + + if (!expectedFile.exists()) + { + this.bFail = true; + log.error("FAILURE: Input verification file: " + expectedFile.getAbsolutePath() + " did not exist"); + return; + } + + LineNumberReader expectedReader = + new LineNumberReader(new InputStreamReader(new FileInputStream(expectedFile),"UTF-16")); + LineNumberReader actualReader = + new LineNumberReader(new InputStreamReader(new FileInputStream(actualFile), "UTF-16")); + + while (true) + { + String expectedLine = expectedReader.readLine(); + while( expectedLine != null && expectedLine.trim().length() == 0 ) + { + expectedLine = expectedReader.readLine(); + } + String actualLine = actualReader.readLine(); + while( actualLine != null && actualLine.trim().length() == 0 ) + { + actualLine = actualReader.readLine(); + } + if (!stringsEqual(expectedLine, actualLine)) + { + this.bFail = true; + log.error("FAILURE: Line mismatch for file " + file.getName() + + " at expected line: " + expectedReader.getLineNumber() + + " at actual line: " + actualReader.getLineNumber() + + "\r\n expected line was: \"" + expectedLine + "\"" + + "\r\n actual line was: \"" + actualLine + "\""); + //lets report all lines, even though this might produce some verbose logging + //break; + } + + if( expectedLine == null || actualLine==null) + { + break; + } + } + } + finally + { + if( writer != null ) + { + writer.close(); + } + if( os != null ) + { + os.close(); + } + if( document != null ) + { + document.close(); + } + } + } + + /** + * Test to validate text extraction of file set. + * + * @throws Exception when there is an exception + */ + public void testExtract() + throws Exception + { + String filename = System.getProperty("test.pdfbox.util.TextStripper.file"); + File testDir = new File("test/input"); + + if ((filename == null) || (filename.length() == 0)) + { + Logger.getRootLogger().setLevel( Level.INFO ); + + File[] testFiles = testDir.listFiles(new FilenameFilter() + { + public boolean accept(File dir, String name) + { + return (name.endsWith(".pdf")); + } + }); + + for (int n = 0; n < testFiles.length; n++) + { + doTestFile(testFiles[n], false); + } + } + else + { + doTestFile(new File(testDir, filename), true); + } + + if (this.bFail) + { + fail("One or more failures, see test log for details"); + } + } + + /** + * Set the tests in the suite for this test class. + * + * @return the Suite. + */ + public static Test suite() + { + return new TestSuite( TestTextStripper.class ); + } + + /** + * Command line execution. + * + * @param args Command line arguments. + */ + public static void main( String[] args ) + { + String[] arg = {TestTextStripper.class.getName() }; + junit.textui.TestRunner.main( arg ); + } +} \ No newline at end of file diff --git a/src/main/java/test/pdfbox/util/TestTextStripperPerformance.java b/src/main/java/test/pdfbox/util/TestTextStripperPerformance.java new file mode 100644 index 0000000..0aadb30 --- /dev/null +++ b/src/main/java/test/pdfbox/util/TestTextStripperPerformance.java @@ -0,0 +1,173 @@ +/** + * Copyright (c) 2003-2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILIT, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + */ +package test.pdfbox.util; + +import java.io.File; +import java.io.FilenameFilter; +import java.io.FileOutputStream; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +import org.apache.log4j.Logger; + +import org.pdfbox.pdmodel.PDDocument; + +import org.pdfbox.util.PDFTextStripper; + +/** + * Test the performance of the PDF text stripper utility. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.2 $ + */ +public class TestTextStripperPerformance extends TestCase +{ + private static Logger log = Logger.getLogger(TestTextStripperPerformance.class); + + private boolean bFail = false; + + /** + * Test class constructor. + * + * @param name The name of the test class. + */ + public TestTextStripperPerformance( String name ) + { + super( name ); + } + + /** + * Test suite setup. + */ + public void setUp() + { + } + + + /** + * Validate text extraction on a single file. + * + * @param file The file to validate + * @param bLogResult Whether to log the extracted text + * @throws Exception when there is an exception + */ + public void doTestFile(File file, boolean bLogResult) + throws Exception + { + + PDFTextStripper stripper = new PDFTextStripper(); + OutputStream os = null; + Writer writer = null; + PDDocument document = null; + try + { + document = PDDocument.load(file); + + File outFile = new File(file.getParentFile().getParentFile(), "output/" + file.getName() + ".txt"); + os = new FileOutputStream(outFile); + writer = new OutputStreamWriter(os); + + stripper.writeText(document, writer); + } + finally + { + if( writer != null ) + { + writer.close(); + } + if( os != null ) + { + os.close(); + } + if( document != null ) + { + document.close(); + } + } + } + + /** + * Test to validate text extraction of file set. + * + * @throws Exception when there is an exception + */ + public void testExtract() + throws Exception + { + String filename = System.getProperty("test.pdfbox.util.TextStripper.file"); + File testDir = new File("test/input"); + + if ((filename == null) || (filename.length() == 0)) + { + File[] testFiles = testDir.listFiles(new FilenameFilter() + { + public boolean accept(File dir, String name) + { + return (name.endsWith(".pdf")); + } + }); + + for (int n = 0; n < testFiles.length; n++) + { + doTestFile(testFiles[n], false); + } + } + else + { + //doTestFile(new File(testDir, filename), true); + } + } + + /** + * Set the tests in the suite for this test class. + * + * @return the Suite. + */ + public static Test suite() + { + return new TestSuite( TestTextStripperPerformance.class ); + } + + /** + * Command line execution. + * + * @param args Command line arguments. + */ + public static void main( String[] args ) + { + String[] arg = {TestTextStripperPerformance.class.getName() }; + junit.textui.TestRunner.main( arg ); + } +} \ No newline at end of file diff --git a/src/main/java/test/pdfbox/util/package.html b/src/main/java/test/pdfbox/util/package.html new file mode 100644 index 0000000..8d98577 --- /dev/null +++ b/src/main/java/test/pdfbox/util/package.html @@ -0,0 +1,9 @@ + + + + + + +These classes will be used to test the text extraction capabilities that are available with PDFBox. + + -- cgit v1.2.3