Initial import of release 2.2.REL-2.2@923

git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@4 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
author: tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> 2006-12-01 12:20:24 +0000
committer: tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> 2006-12-01 12:20:24 +0000
commit: 6025b6016517c6d898d8957d1d7e03ba71431912 (patch)
tree: b15bd6fa5ffe9588a9bca3f2b8a7e358f83b6eba /src/main/java/org/pdfbox/ExtractImages.java
parent: d2c77e820ab4aba8235d71275755021347b3ad10 (diff)
download: pdf-as-3-6025b6016517c6d898d8957d1d7e03ba71431912.tar.gz
pdf-as-3-6025b6016517c6d898d8957d1d7e03ba71431912.tar.bz2
pdf-as-3-6025b6016517c6d898d8957d1d7e03ba71431912.zip
1 files changed, 210 insertions, 0 deletions
diff --git a/src/main/java/org/pdfbox/ExtractImages.java b/src/main/java/org/pdfbox/ExtractImages.java
new file mode 100644
index 0000000..a8e46ee
--- /dev/null
+++ b/src/main/java/org/pdfbox/ExtractImages.java
@@ -0,0 +1,210 @@
+/**
+ * Copyright (c) 2003-2004, www.pdfbox.org
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 3. Neither the name of pdfbox; nor the names of its
+ *    contributors may be used to endorse or promote products derived from this
+ *    software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * http://www.pdfbox.org
+ *
+ */
+package org.pdfbox;
+
+import java.io.File;
+import java.io.IOException;
+
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.pdfbox.pdmodel.PDDocument;
+import org.pdfbox.pdmodel.PDPage;
+import org.pdfbox.pdmodel.PDResources;
+
+import org.pdfbox.pdmodel.encryption.PDEncryptionDictionary;
+import org.pdfbox.pdmodel.encryption.PDStandardEncryption;
+import org.pdfbox.pdmodel.graphics.xobject.PDXObjectImage;
+
+/**
+ * This will read a read pdf and extract images. <br/><br/>
+ *
+ * usage: java org.pdfbox.ExtractImages &lt;pdffile&gt; &lt;password&gt; [imageprefix]
+ *
+ * @author  Ben Litchfield (ben@csh.rit.edu)
+ * @version $Revision: 1.3 $
+ */
+public class ExtractImages
+{
+    private int imageCounter = 1;
+    
+    private static final String PASSWORD = "-password";
+    private static final String PREFIX = "-prefix";
+    
+    /**
+     * This is the entry point for the application.
+     *
+     * @param args The command-line arguments.
+     *
+     * @throws Exception If there is an error decrypting the document.
+     */
+    public static void main( String[] args ) throws Exception
+    {
+        ExtractImages extractor = new ExtractImages();
+        extractor.extractImages( args );
+    }
+
+    private void extractImages( String[] args ) throws Exception
+    {
+        if( args.length < 1 || args.length > 3 )
+        {
+            usage();
+        }
+        else
+        {
+            String pdfFile = null;
+            String password = "";
+            String prefix = null;
+            for( int i=0; i<args.length; i++ )
+            {
+                if( args[i].equals( PASSWORD ) )
+                {
+                    i++;
+                    if( i >= args.length )
+                    {
+                        usage();
+                    }
+                    password = args[i];
+                }
+                else if( args[i].equals( PREFIX ) )
+                {
+                    i++;
+                    if( i >= args.length )
+                    {
+                        usage();
+                    }
+                    prefix = args[i];
+                }
+                else
+                {
+                    if( pdfFile == null )
+                    {
+                        pdfFile = args[i];
+                    }
+                }
+            }
+            if( prefix == null && pdfFile.length() >4 )
+            {
+                prefix = pdfFile.substring( 0, pdfFile.length() -4 );
+            }
+
+            PDDocument document = null;
+
+            try
+            {
+                document = PDDocument.load( pdfFile );
+
+                if( document.isEncrypted() )
+                {
+                    if( document.isOwnerPassword( password ) )
+                    {
+                        document.decrypt( password );
+                    }
+                    else
+                    {
+                        throw new IOException(
+                            "Error: You are only allowed to extract images with the owner password." );
+                    }
+                }
+                
+                PDEncryptionDictionary encDictionary = document.getEncryptionDictionary();
+
+                //only care about standard encryption and if it was decrypted with the
+                //user password
+                if( encDictionary instanceof PDStandardEncryption && 
+                    !document.wasDecryptedWithOwnerPassword() )
+                {
+                    PDStandardEncryption stdEncryption = (PDStandardEncryption)encDictionary;
+                    if( !stdEncryption.canExtractContent() )
+                    {
+                        throw new IOException( "You do not have permission to extract images." );
+                    }
+                }
+                
+                List pages = document.getDocumentCatalog().getAllPages();
+                Iterator iter = pages.iterator();
+                while( iter.hasNext() )
+                {
+                    PDPage page = (PDPage)iter.next();
+                    PDResources resources = page.getResources();
+                    Map images = resources.getImages();
+                    if( images != null )
+                    {
+                        Iterator imageIter = images.keySet().iterator();
+                        while( imageIter.hasNext() )
+                        {
+                            String key = (String)imageIter.next();
+                            PDXObjectImage image = (PDXObjectImage)images.get( key );
+                            String name = getUniqueFileName( key, image.getSuffix() );
+                            System.out.println( "Writing image:" + name );
+                            image.write2file( name );
+                        }
+                    }
+                }
+            }
+            finally
+            {
+                if( document != null )
+                {
+                    document.close();
+                }
+            }
+        }
+    }
+    
+    private String getUniqueFileName( String prefix, String suffix )
+    {
+        String uniqueName = null;
+        File f = null;
+        while( f == null || f.exists() )
+        {
+            uniqueName = prefix + "-" + imageCounter;
+            f = new File( uniqueName + "." + suffix );
+            imageCounter++;
+        }
+        return uniqueName;
+    }
+
+    /**
+     * This will print the usage requirements and exit.
+     */
+    private static void usage()
+    {
+        System.err.println( "Usage: java org.pdfbox.ExtractImages [OPTIONS] <PDF file>\n" +
+            "  -password  <password>        Password to decrypt document\n" +
+            "  -suffix  <password>          Image suffix(default to pdf name)\n" +
+            "  <PDF file>                   The PDF document to use\n"
+            );
+        System.exit( 1 );
+    }
+
+}
+\ No newline at end of file
author	tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>	2006-12-01 12:20:24 +0000
committer	tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>	2006-12-01 12:20:24 +0000
commit	6025b6016517c6d898d8957d1d7e03ba71431912 (patch)
tree	b15bd6fa5ffe9588a9bca3f2b8a7e358f83b6eba /src/main/java/org/pdfbox/ExtractImages.java
parent	d2c77e820ab4aba8235d71275755021347b3ad10 (diff)
download	pdf-as-3-6025b6016517c6d898d8957d1d7e03ba71431912.tar.gz pdf-as-3-6025b6016517c6d898d8957d1d7e03ba71431912.tar.bz2 pdf-as-3-6025b6016517c6d898d8957d1d7e03ba71431912.zip