/** * Copyright (c) 2005, www.pdfbox.org * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * 3. Neither the name of pdfbox; nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * http://www.pdfbox.org * */ package org.pdfbox; import java.awt.image.BufferedImage; import java.io.File; import java.util.Iterator; import java.util.List; import javax.imageio.IIOException; import javax.imageio.IIOImage; import javax.imageio.ImageIO; import javax.imageio.ImageWriteParam; import javax.imageio.ImageWriter; import javax.imageio.stream.ImageOutputStream; import org.pdfbox.exceptions.InvalidPasswordException; import org.pdfbox.pdmodel.PDDocument; import org.pdfbox.pdmodel.PDPage; /** * Convert a PDF document to an image. * * @author Ben Litchfield (ben@csh.rit.edu) * @version $Revision: 1.3 $ */ public class PDFToImage { private static final String PASSWORD = "-password"; private static final String START_PAGE = "-startPage"; private static final String END_PAGE = "-endPage"; private static final String IMAGE_TYPE = "-imageType"; private static final String OUTPUT_PREFIX = "-outputPrefix"; /** * private constructor. */ private PDFToImage() { //static class } /** * Infamous main method. * * @param args Command line arguments, should be one and a reference to a file. * * @throws Exception If there is an error parsing the document. */ public static void main( String[] args ) throws Exception { String password = ""; String pdfFile = null; String outputPrefix = null; String imageType = "jpg"; int startPage = 1; int endPage = Integer.MAX_VALUE; for( int i=0; i<args.length; i++ ) { if( args[i].equals( PASSWORD ) ) { i++; if( i >= args.length ) { usage(); } password = args[i]; } else if( args[i].equals( START_PAGE ) ) { i++; if( i >= args.length ) { usage(); } startPage = Integer.parseInt( args[i] ); } else if( args[i].equals( END_PAGE ) ) { i++; if( i >= args.length ) { usage(); } endPage = Integer.parseInt( args[i] ); } else if( args[i].equals( IMAGE_TYPE ) ) { i++; imageType = args[i]; } else if( args[i].equals( OUTPUT_PREFIX ) ) { i++; outputPrefix = args[i]; } else { if( pdfFile == null ) { pdfFile = args[i]; } } } if( pdfFile == null ) { usage(); } if(outputPrefix == null) { outputPrefix = pdfFile.substring( 0, pdfFile.lastIndexOf( '.' )); } PDDocument document = null; try { document = PDDocument.load( pdfFile ); //document.print(); if( document.isEncrypted() ) { try { document.decrypt( password ); } catch( InvalidPasswordException e ) { if( args.length == 4 )//they supplied the wrong password { System.err.println( "Error: The supplied password is incorrect." ); System.exit( 2 ); } else { //they didn't suppply a password and the default of "" was wrong. System.err.println( "Error: The document is encrypted." ); usage(); } } } List pages = document.getDocumentCatalog().getAllPages(); for( int i=startPage-1; i<endPage && i<pages.size(); i++ ) { ImageOutputStream output = null; ImageWriter imageWriter = null; try { PDPage page = (PDPage)pages.get( i ); BufferedImage image = page.convertToImage(); String fileName = outputPrefix + (i+1) + "." + imageType; System.out.println( "Writing:" + fileName ); output = ImageIO.createImageOutputStream( new File( fileName ) ); boolean foundWriter = false; Iterator writerIter = ImageIO.getImageWritersByFormatName( imageType ); while( writerIter.hasNext() && !foundWriter ) { try { imageWriter = (ImageWriter)writerIter.next(); ImageWriteParam writerParams = imageWriter.getDefaultWriteParam(); if(writerParams.canWriteCompressed() ) { writerParams.setCompressionMode(ImageWriteParam.MODE_EXPLICIT); writerParams.setCompressionQuality(1.0f); } imageWriter.setOutput( output ); imageWriter.write( null, new IIOImage( image, null, null), writerParams ); foundWriter = true; } catch( IIOException io ) { //ignore exception } finally { imageWriter.dispose(); } } if( !foundWriter ) { throw new RuntimeException( "Error: no writer found for image type '" + imageType + "'" ); } } finally { if( output != null ) { output.flush(); output.close(); } } } } finally { if( document != null ) { document.close(); } } } /** * This will print the usage requirements and exit. */ private static void usage() { System.err.println( "Usage: java org.pdfbox.ExtractText [OPTIONS] <PDF file> [Text File]\n" + " -password <password> Password to decrypt document\n" + " -imageType <image type> (" + getImageFormats() + ")\n" + " -outputPrefix <output prefix> Filename prefix for image files\n" + " -startPage <number> The first page to start extraction(1 based)\n" + " -endPage <number> The last page to extract(inclusive)\n" + " <PDF file> The PDF document to use\n" ); System.exit( 1 ); } private static String getImageFormats() { StringBuffer retval = new StringBuffer(); String[] formats = ImageIO.getReaderFormatNames(); for( int i=0; i<formats.length; i++ ) { retval.append( formats[i] ); if( i+1<formats.length ) { retval.append( "," ); } } return retval.toString(); } }