From 6025b6016517c6d898d8957d1d7e03ba71431912 Mon Sep 17 00:00:00 2001 From: tknall Date: Fri, 1 Dec 2006 12:20:24 +0000 Subject: Initial import of release 2.2. git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@4 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- src/main/java/org/pdfbox/ExtractImages.java | 210 ++++++++++++++++++++++++++++ 1 file changed, 210 insertions(+) create mode 100644 src/main/java/org/pdfbox/ExtractImages.java (limited to 'src/main/java/org/pdfbox/ExtractImages.java') diff --git a/src/main/java/org/pdfbox/ExtractImages.java b/src/main/java/org/pdfbox/ExtractImages.java new file mode 100644 index 0000000..a8e46ee --- /dev/null +++ b/src/main/java/org/pdfbox/ExtractImages.java @@ -0,0 +1,210 @@ +/** + * Copyright (c) 2003-2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox; + +import java.io.File; +import java.io.IOException; + +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.pdfbox.pdmodel.PDDocument; +import org.pdfbox.pdmodel.PDPage; +import org.pdfbox.pdmodel.PDResources; + +import org.pdfbox.pdmodel.encryption.PDEncryptionDictionary; +import org.pdfbox.pdmodel.encryption.PDStandardEncryption; +import org.pdfbox.pdmodel.graphics.xobject.PDXObjectImage; + +/** + * This will read a read pdf and extract images.

+ * + * usage: java org.pdfbox.ExtractImages <pdffile> <password> [imageprefix] + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.3 $ + */ +public class ExtractImages +{ + private int imageCounter = 1; + + private static final String PASSWORD = "-password"; + private static final String PREFIX = "-prefix"; + + /** + * This is the entry point for the application. + * + * @param args The command-line arguments. + * + * @throws Exception If there is an error decrypting the document. + */ + public static void main( String[] args ) throws Exception + { + ExtractImages extractor = new ExtractImages(); + extractor.extractImages( args ); + } + + private void extractImages( String[] args ) throws Exception + { + if( args.length < 1 || args.length > 3 ) + { + usage(); + } + else + { + String pdfFile = null; + String password = ""; + String prefix = null; + for( int i=0; i= args.length ) + { + usage(); + } + password = args[i]; + } + else if( args[i].equals( PREFIX ) ) + { + i++; + if( i >= args.length ) + { + usage(); + } + prefix = args[i]; + } + else + { + if( pdfFile == null ) + { + pdfFile = args[i]; + } + } + } + if( prefix == null && pdfFile.length() >4 ) + { + prefix = pdfFile.substring( 0, pdfFile.length() -4 ); + } + + PDDocument document = null; + + try + { + document = PDDocument.load( pdfFile ); + + if( document.isEncrypted() ) + { + if( document.isOwnerPassword( password ) ) + { + document.decrypt( password ); + } + else + { + throw new IOException( + "Error: You are only allowed to extract images with the owner password." ); + } + } + + PDEncryptionDictionary encDictionary = document.getEncryptionDictionary(); + + //only care about standard encryption and if it was decrypted with the + //user password + if( encDictionary instanceof PDStandardEncryption && + !document.wasDecryptedWithOwnerPassword() ) + { + PDStandardEncryption stdEncryption = (PDStandardEncryption)encDictionary; + if( !stdEncryption.canExtractContent() ) + { + throw new IOException( "You do not have permission to extract images." ); + } + } + + List pages = document.getDocumentCatalog().getAllPages(); + Iterator iter = pages.iterator(); + while( iter.hasNext() ) + { + PDPage page = (PDPage)iter.next(); + PDResources resources = page.getResources(); + Map images = resources.getImages(); + if( images != null ) + { + Iterator imageIter = images.keySet().iterator(); + while( imageIter.hasNext() ) + { + String key = (String)imageIter.next(); + PDXObjectImage image = (PDXObjectImage)images.get( key ); + String name = getUniqueFileName( key, image.getSuffix() ); + System.out.println( "Writing image:" + name ); + image.write2file( name ); + } + } + } + } + finally + { + if( document != null ) + { + document.close(); + } + } + } + } + + private String getUniqueFileName( String prefix, String suffix ) + { + String uniqueName = null; + File f = null; + while( f == null || f.exists() ) + { + uniqueName = prefix + "-" + imageCounter; + f = new File( uniqueName + "." + suffix ); + imageCounter++; + } + return uniqueName; + } + + /** + * This will print the usage requirements and exit. + */ + private static void usage() + { + System.err.println( "Usage: java org.pdfbox.ExtractImages [OPTIONS] \n" + + " -password Password to decrypt document\n" + + " -suffix Image suffix(default to pdf name)\n" + + " The PDF document to use\n" + ); + System.exit( 1 ); + } + +} \ No newline at end of file -- cgit v1.2.3