From c68ad0ec056b37c82debebcecfcde1866d61b4d9 Mon Sep 17 00:00:00 2001 From: tknall Date: Tue, 25 Nov 2008 12:03:13 +0000 Subject: Removing pdfbox from source. git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@301 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../java/test/pdfbox/util/TestTextStripper.java | 371 --------------------- 1 file changed, 371 deletions(-) delete mode 100644 src/main/java/test/pdfbox/util/TestTextStripper.java (limited to 'src/main/java/test/pdfbox/util/TestTextStripper.java') diff --git a/src/main/java/test/pdfbox/util/TestTextStripper.java b/src/main/java/test/pdfbox/util/TestTextStripper.java deleted file mode 100644 index c425f38..0000000 --- a/src/main/java/test/pdfbox/util/TestTextStripper.java +++ /dev/null @@ -1,371 +0,0 @@ -/** - * Copyright (c) 2003-2005, www.pdfbox.org - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * 3. Neither the name of pdfbox; nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILIT, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * http://www.pdfbox.org - */ -package test.pdfbox.util; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FilenameFilter; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.LineNumberReader; -import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.io.Writer; - -import junit.framework.Test; -import junit.framework.TestCase; -import junit.framework.TestSuite; - -import org.apache.log4j.Logger; -import org.apache.log4j.Level; - -import org.pdfbox.pdmodel.PDDocument; - -import org.pdfbox.util.PDFTextStripper; - -/** - * Test suite for PDFTextStripper. - * - * FILE SET VALIDATION - * - * This test suite is designed to test PDFTextStripper using a set of PDF - * files and known good output for each. The default mode of testAll() - * is to process each *.pdf file in "test/input". An output file is - * created in "test/output" with the same name as the PDF file, plus an - * additional ".txt" suffix. - * - * The output file is then tested against a known good result file from - * the input directory (again, with the same name as the tested PDF file, - * but with the additional ".txt" suffix). - * - * So for the file "test/input/hello.pdf", an output file will be generated - * named "test/output/hello.pdf.txt". Then that file will be compared to - * the known good file "test/input/hello.pdf.txt", if it exists. - * - * Any errors are logged, and at the end of processing all *.pdf files, if - * there were any errors, the test fails. The logging is at INFO, as the - * general goal is overall validation, and on failure, the indication of - * which file or files failed. - * - * When processing new PDF files, you may use testAll() to generate output, - * verify the output manually, then move the output file to the test input - * directory to use as the basis for future validations. - * - * SINGLE FILE VALIDATION - * - * To further research individual failures, the test.pdfbox.util.TextStripper.file - * system property may be set with the name of a single file in the "test/input" - * directory. In this mode, testAll() will evaluate only that file, and will - * do so with DEBUG level logging. You can set this property from ant by - * defining "file", as in: - * - * ant testextract -Dfile=hello.pdf - * - * @author Robert Dickinson (bob@brutesquadlabs.com) - * @author Ben Litchfield (ben@benlitchfield.com) - * @version $Revision: 1.14 $ - */ -public class TestTextStripper extends TestCase -{ - private static Logger log = Logger.getLogger(TestTextStripper.class); - - private boolean bFail = false; - private PDFTextStripper stripper = null; - - /** - * Test class constructor. - * - * @param name The name of the test class. - * - * @throws IOException If there is an error creating the test. - */ - public TestTextStripper( String name ) throws IOException - { - super( name ); - stripper = new PDFTextStripper(); - stripper.setLineSeparator("\n"); - } - - /** - * Test suite setup. - */ - public void setUp() - { - // If you want to test a single file using DEBUG logging, from an IDE, - // you can do something like this: - // - // System.setProperty("test.pdfbox.util.TextStripper.file", "FVS318Ref.pdf"); - } - - /** - * Determine whether two strings are equal, where two null strings are - * considered equal. - * - * @param expected Excpected string - * @param actual Actual String - * @return true is the strings are both null, - * or if their contents are the same, otherwise false. - */ - private boolean stringsEqual(String expected, String actual) - { - boolean equals = true; - if( (expected == null) && (actual == null) ) - { - return true; - } - else if( expected != null && actual != null ) - { - expected = expected.trim(); - actual = actual.trim(); - char[] expectedArray = expected.toCharArray(); - char[] actualArray = actual.toCharArray(); - int expectedIndex = 0; - int actualIndex = 0; - while( expectedIndex 256 ) - { - while( index < array.length && (array[index] == ' ' || array[index] > 256)) - { - index++; - } - index--; - } - return index; - } - - /** - * Validate text extraction on a single file. - * - * @param file The file to validate - * @param bLogResult Whether to log the extracted text - * @throws Exception when there is an exception - */ - public void doTestFile(File file, boolean bLogResult) - throws Exception - { - log.info("Preparing to parse " + file.getName()); - - - OutputStream os = null; - Writer writer = null; - PDDocument document = null; - try - { - document = PDDocument.load(file); - - File outFile = new File(file.getParentFile().getParentFile(), "output/" + file.getName() + ".txt"); - os = new FileOutputStream(outFile); - os.write( 0xFF ); - os.write( 0xFE ); - writer = new OutputStreamWriter(os,"UTF-16LE"); - - stripper.writeText(document, writer); - - - - if (bLogResult) - { - log.info("Text for " + file.getName() + ":\r\n" + stripper.getText(document)); - } - - File expectedFile = new File(file.getParentFile().getParentFile(), "input/" + file.getName() + ".txt"); - File actualFile = new File(file.getParentFile().getParentFile(), "output/" + file.getName() + ".txt"); - - if (!expectedFile.exists()) - { - this.bFail = true; - log.error("FAILURE: Input verification file: " + expectedFile.getAbsolutePath() + " did not exist"); - return; - } - - LineNumberReader expectedReader = - new LineNumberReader(new InputStreamReader(new FileInputStream(expectedFile),"UTF-16")); - LineNumberReader actualReader = - new LineNumberReader(new InputStreamReader(new FileInputStream(actualFile), "UTF-16")); - - while (true) - { - String expectedLine = expectedReader.readLine(); - while( expectedLine != null && expectedLine.trim().length() == 0 ) - { - expectedLine = expectedReader.readLine(); - } - String actualLine = actualReader.readLine(); - while( actualLine != null && actualLine.trim().length() == 0 ) - { - actualLine = actualReader.readLine(); - } - if (!stringsEqual(expectedLine, actualLine)) - { - this.bFail = true; - log.error("FAILURE: Line mismatch for file " + file.getName() + - " at expected line: " + expectedReader.getLineNumber() + - " at actual line: " + actualReader.getLineNumber() + - "\r\n expected line was: \"" + expectedLine + "\"" + - "\r\n actual line was: \"" + actualLine + "\""); - //lets report all lines, even though this might produce some verbose logging - //break; - } - - if( expectedLine == null || actualLine==null) - { - break; - } - } - } - finally - { - if( writer != null ) - { - writer.close(); - } - if( os != null ) - { - os.close(); - } - if( document != null ) - { - document.close(); - } - } - } - - /** - * Test to validate text extraction of file set. - * - * @throws Exception when there is an exception - */ - public void testExtract() - throws Exception - { - String filename = System.getProperty("test.pdfbox.util.TextStripper.file"); - File testDir = new File("test/input"); - - if ((filename == null) || (filename.length() == 0)) - { - Logger.getRootLogger().setLevel( Level.INFO ); - - File[] testFiles = testDir.listFiles(new FilenameFilter() - { - public boolean accept(File dir, String name) - { - return (name.endsWith(".pdf")); - } - }); - - for (int n = 0; n < testFiles.length; n++) - { - doTestFile(testFiles[n], false); - } - } - else - { - doTestFile(new File(testDir, filename), true); - } - - if (this.bFail) - { - fail("One or more failures, see test log for details"); - } - } - - /** - * Set the tests in the suite for this test class. - * - * @return the Suite. - */ - public static Test suite() - { - return new TestSuite( TestTextStripper.class ); - } - - /** - * Command line execution. - * - * @param args Command line arguments. - */ - public static void main( String[] args ) - { - String[] arg = {TestTextStripper.class.getName() }; - junit.textui.TestRunner.main( arg ); - } -} \ No newline at end of file -- cgit v1.2.3