diff options
| author | tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> | 2006-12-01 12:20:24 +0000 | 
|---|---|---|
| committer | tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> | 2006-12-01 12:20:24 +0000 | 
| commit | 6025b6016517c6d898d8957d1d7e03ba71431912 (patch) | |
| tree | b15bd6fa5ffe9588a9bca3f2b8a7e358f83b6eba /src/main/java/org/pdfbox/ant | |
| parent | d2c77e820ab4aba8235d71275755021347b3ad10 (diff) | |
| download | pdf-as-3-6025b6016517c6d898d8957d1d7e03ba71431912.tar.gz pdf-as-3-6025b6016517c6d898d8957d1d7e03ba71431912.tar.bz2 pdf-as-3-6025b6016517c6d898d8957d1d7e03ba71431912.zip | |
Initial import of release 2.2.REL-2.2@923
git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@4 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
Diffstat (limited to 'src/main/java/org/pdfbox/ant')
| -rw-r--r-- | src/main/java/org/pdfbox/ant/PDFToTextTask.java | 100 | ||||
| -rw-r--r-- | src/main/java/org/pdfbox/ant/package.html | 18 | 
2 files changed, 118 insertions, 0 deletions
| diff --git a/src/main/java/org/pdfbox/ant/PDFToTextTask.java b/src/main/java/org/pdfbox/ant/PDFToTextTask.java new file mode 100644 index 0000000..56ef42b --- /dev/null +++ b/src/main/java/org/pdfbox/ant/PDFToTextTask.java @@ -0,0 +1,100 @@ +/**
 + * Copyright (c) 2003, www.pdfbox.org
 + * All rights reserved.
 + *
 + * Redistribution and use in source and binary forms, with or without
 + * modification, are permitted provided that the following conditions are met:
 + *
 + * 1. Redistributions of source code must retain the above copyright notice,
 + *    this list of conditions and the following disclaimer.
 + * 2. Redistributions in binary form must reproduce the above copyright notice,
 + *    this list of conditions and the following disclaimer in the documentation
 + *    and/or other materials provided with the distribution.
 + * 3. Neither the name of pdfbox; nor the names of its
 + *    contributors may be used to endorse or promote products derived from this
 + *    software without specific prior written permission.
 + *
 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 + * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 + *
 + * http://www.pdfbox.org
 + *
 + */
 +package org.pdfbox.ant;
 +
 +import java.io.File;
 +
 +import java.util.ArrayList;
 +import java.util.Iterator;
 +import java.util.List;
 +
 +import org.apache.tools.ant.DirectoryScanner;
 +import org.apache.tools.ant.Task;
 +
 +import org.apache.tools.ant.types.FileSet;
 +
 +/**
 + * This is an ant task that will allow pdf documents to be converted using an
 + * and task.
 + *
 + * @author Ben Litchfield (ben@csh.rit.edu)
 + * @version $Revision: 1.7 $
 + */
 +public class PDFToTextTask extends Task
 +{
 +    private List fileSets = new ArrayList();
 +
 +    /**
 +     * Adds a set of files (nested fileset attribute).
 +     *
 +     * @param set Another fileset to add.
 +     */
 +    public void addFileset( FileSet set )
 +    {
 +        fileSets.add( set );
 +    }
 +
 +    /**
 +     * This will perform the execution.
 +     */
 +    public void execute()
 +    {
 +        log( "PDFToTextTask executing" );
 +        Iterator fileSetIter = fileSets.iterator();
 +        while( fileSetIter.hasNext() )
 +        {
 +            FileSet next = (FileSet)fileSetIter.next();
 +            DirectoryScanner dirScanner = next.getDirectoryScanner( getProject() );
 +            dirScanner.scan();
 +            String[] files = dirScanner.getIncludedFiles();
 +            for( int i=0; i<files.length; i++ )
 +            {
 +                File f = new File( dirScanner.getBasedir(), files[i] );
 +                log( "processing: " + f.getAbsolutePath() );
 +                String pdfFile = f.getAbsolutePath();
 +                if( pdfFile.toUpperCase().endsWith( ".PDF" ) )
 +                {
 +                    String textFile = pdfFile.substring( 0, pdfFile.length() -3 );
 +                    textFile = textFile + "txt";
 +                    try
 +                    {
 +                        org.pdfbox.ExtractText.main( new String[] { pdfFile, textFile } );
 +                    }
 +                    catch( Exception e )
 +                    {
 +                        log( "Error processing " + pdfFile + e.getMessage() );
 +                    }
 +                }
 +            }
 +
 +        }
 +    }
 +}
\ No newline at end of file diff --git a/src/main/java/org/pdfbox/ant/package.html b/src/main/java/org/pdfbox/ant/package.html new file mode 100644 index 0000000..675014e --- /dev/null +++ b/src/main/java/org/pdfbox/ant/package.html @@ -0,0 +1,18 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
 +<html>
 +<head>
 +
 +</head>
 +<body>
 +<A href="http://jakarta.apache.org/ant/index.html">ANT</a> tasks that utilize PDFBox features can be found in this package.
 +This is an example of using the PDF2Text task:<br/> <br/>
 +
 +<taskdef name="pdf2text" classname="org.pdfbox.ant.PDFToTextTask" classpathref="build.classpath" /><br/>
 +
 +<pdf2text><br/>
 +      <fileset dir="test"><br/>
 +            <include name="**/*.pdf" /><br/>
 +      </fileset><br/>
 +</pdf2text><br/>
 +</body>
 +</html>
 | 
