Removing pdfbox from source.

git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@301 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
author: tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> 2008-11-25 12:03:13 +0000
committer: tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> 2008-11-25 12:03:13 +0000
commit: c68ad0ec056b37c82debebcecfcde1866d61b4d9 (patch)
tree: 9e3ec28a87a5c29d1087fd03cc4c77899e2c758c /src/main/java/org/pdfbox/ExtractText.java
parent: 2eae233bf806c076acb8afea41eb16552687bcea (diff)
download: pdf-as-3-c68ad0ec056b37c82debebcecfcde1866d61b4d9.tar.gz
pdf-as-3-c68ad0ec056b37c82debebcecfcde1866d61b4d9.tar.bz2
pdf-as-3-c68ad0ec056b37c82debebcecfcde1866d61b4d9.zip
1 files changed, 0 insertions, 270 deletions
diff --git a/src/main/java/org/pdfbox/ExtractText.java b/src/main/java/org/pdfbox/ExtractText.java
deleted file mode 100644
index 5f5a328..0000000
--- a/src/main/java/org/pdfbox/ExtractText.java
+++ /dev/null
@@ -1,270 +0,0 @@
-/**
- * Copyright (c) 2003-2004, www.pdfbox.org
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- *    this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- * 3. Neither the name of pdfbox; nor the names of its
- *    contributors may be used to endorse or promote products derived from this
- *    software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * http://www.pdfbox.org
- *
- */
-package org.pdfbox;
-
-import java.io.FileOutputStream;
-import java.io.OutputStreamWriter;
-import java.io.Writer;
-
-import org.pdfbox.exceptions.InvalidPasswordException;
-
-import org.pdfbox.pdmodel.PDDocument;
-
-import org.pdfbox.util.PDFText2HTML;
-import org.pdfbox.util.PDFTextStripper;
-
-import org.apache.log4j.Logger;
-
-/**
- * This is the main program that simply parses the pdf document and transforms it
- * into text.
- *
- * @author Ben Litchfield (ben@csh.rit.edu)
- * @version $Revision: 1.9 $
- */
-public class ExtractText
-{
-    private static final Logger LOG = Logger.getLogger( ExtractText.class );
-
-    /**
-     * This is the default encoding of the text to be output.
-     */
-    public static final String DEFAULT_ENCODING =
-        null;
-        //"ISO-8859-1";
-        //"ISO-8859-6"; //arabic
-        //"US-ASCII";
-        //"UTF-8";
-        //"UTF-16";
-        //"UTF-16BE";
-        //"UTF-16LE";
-
-
-    private static final String PASSWORD = "-password";
-    private static final String ENCODING = "-encoding";
-    private static final String CONSOLE = "-console";
-    private static final String START_PAGE = "-startPage";
-    private static final String END_PAGE = "-endPage";
-    private static final String SORT = "-sort";
-    private static final String HTML = "-html";  // jjb - added simple HTML output
-
-    /**
-     * private constructor.
-    */
-    private ExtractText()
-    {
-        //static class
-    }
-
-    /**
-     * Infamous main method.
-     *
-     * @param args Command line arguments, should be one and a reference to a file.
-     *
-     * @throws Exception If there is an error parsing the document.
-     */
-    public static void main( String[] args ) throws Exception
-    {
-        boolean toConsole = false;
-        boolean toHTML = false;
-        boolean sort = false;
-        String password = "";
-        String encoding = DEFAULT_ENCODING;
-        String pdfFile = null;
-        String textFile = null;
-        int startPage = 1;
-        int endPage = Integer.MAX_VALUE;
-        for( int i=0; i<args.length; i++ )
-        {
-            if( args[i].equals( PASSWORD ) )
-            {
-                i++;
-                if( i >= args.length )
-                {
-                    usage();
-                }
-                password = args[i];
-            }
-            else if( args[i].equals( ENCODING ) )
-            {
-                i++;
-                if( i >= args.length )
-                {
-                    usage();
-                }
-                encoding = args[i];
-            }
-            else if( args[i].equals( START_PAGE ) )
-            {
-                i++;
-                if( i >= args.length )
-                {
-                    usage();
-                }
-                startPage = Integer.parseInt( args[i] );
-            }
-            else if( args[i].equals( HTML ) )
-            {
-                toHTML = true;
-            }
-            else if( args[i].equals( SORT ) )
-            {
-                sort = true;
-            }
-            else if( args[i].equals( END_PAGE ) )
-            {
-                i++;
-                if( i >= args.length )
-                {
-                    usage();
-                }
-                endPage = Integer.parseInt( args[i] );
-            }
-            else if( args[i].equals( CONSOLE ) )
-            {
-                toConsole = true;
-            }
-            else
-            {
-                if( pdfFile == null )
-                {
-                    pdfFile = args[i];
-                }
-                else
-                {
-                    textFile = args[i];
-                }
-            }
-        }
-
-        if( pdfFile == null )
-        {
-            usage();
-        }
-
-        if( textFile == null && pdfFile.length() >4 )
-        {
-            textFile = pdfFile.substring( 0, pdfFile.length() -4 ) + ".txt";
-        }
-
-        Writer output = null;
-        PDDocument document = null;
-        try
-        {
-            document = PDDocument.load( pdfFile );
-
-            //document.print();
-            if( document.isEncrypted() )
-            {
-                try
-                {
-                    document.decrypt( password );
-                }
-                catch( InvalidPasswordException e )
-                {
-                    if( args.length == 4 )//they supplied the wrong password
-                    {
-                        System.err.println( "Error: The supplied password is incorrect." );
-                        System.exit( 2 );
-                    }
-                    else
-                    {
-                        //they didn't suppply a password and the default of "" was wrong.
-                        System.err.println( "Error: The document is encrypted." );
-                        usage();
-                    }
-                }
-            }
-            if( toConsole )
-            {
-                output = new OutputStreamWriter( System.out );
-            }
-            else
-            {
-                if( encoding != null )
-                {
-                    output = new OutputStreamWriter(
-                        new FileOutputStream( textFile ), encoding );
-                }
-                else
-                {
-                    //use default encoding
-                    output = new OutputStreamWriter(
-                        new FileOutputStream( textFile ) );
-                }
-            }
-
-            PDFTextStripper stripper = null;
-            if(toHTML) 
-            {
-               stripper = new PDFText2HTML();
-            } 
-            else 
-            {
-               stripper = new PDFTextStripper();
-            }
-            stripper.setSortByPosition( sort );
-            stripper.setStartPage( startPage );
-            stripper.setEndPage( endPage );
-            stripper.writeText( document, output );
-        }
-        finally
-        {
-            if( output != null )
-            {
-                output.close();
-            }
-            if( document != null )
-            {
-                document.close();
-            }
-        }
-    }
-
-    /**
-     * This will print the usage requirements and exit.
-     */
-    private static void usage()
-    {
-        System.err.println( "Usage: java org.pdfbox.ExtractText [OPTIONS] <PDF file> [Text File]\n" +
-            "  -password  <password>        Password to decrypt document\n" +
-            "  -encoding  <output encoding> (ISO-8859-1,UTF-16BE,UTF-16LE,...)\n" +
-            "  -console                     Send text to console instead of file\n" +
-            "  -html                        Output in HTML format instead of raw text\n" +
-            "  -sort                        Sort the text before writing\n" +
-            "  -startPage <number>          The first page to start extraction(1 based)\n" +
-            "  -endPage <number>            The last page to extract(inclusive)\n" +
-            "  <PDF file>                   The PDF document to use\n" +
-            "  [Text File]                  The file to write the text to\n"
-            );
-        System.exit( 1 );
-    }
-}
-\ No newline at end of file
author	tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>	2008-11-25 12:03:13 +0000
committer	tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>	2008-11-25 12:03:13 +0000
commit	c68ad0ec056b37c82debebcecfcde1866d61b4d9 (patch)
tree	9e3ec28a87a5c29d1087fd03cc4c77899e2c758c /src/main/java/org/pdfbox/ExtractText.java
parent	2eae233bf806c076acb8afea41eb16552687bcea (diff)
download	pdf-as-3-c68ad0ec056b37c82debebcecfcde1866d61b4d9.tar.gz pdf-as-3-c68ad0ec056b37c82debebcecfcde1866d61b4d9.tar.bz2 pdf-as-3-c68ad0ec056b37c82debebcecfcde1866d61b4d9.zip