1 files changed, 725 insertions, 0 deletions
diff --git a/src/main/java/org/pdfbox/pdmodel/PDDocument.java b/src/main/java/org/pdfbox/pdmodel/PDDocument.java
new file mode 100644
index 0000000..94150fb
--- /dev/null
+++ b/src/main/java/org/pdfbox/pdmodel/PDDocument.java
@@ -0,0 +1,725 @@
+/**
+ * Copyright (c) 2003-2005, www.pdfbox.org
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 3. Neither the name of pdfbox; nor the names of its
+ *    contributors may be used to endorse or promote products derived from this
+ *    software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * http://www.pdfbox.org
+ *
+ */
+package org.pdfbox.pdmodel;
+
+import java.awt.print.PageFormat;
+import java.awt.print.Pageable;
+import java.awt.print.Paper;
+import java.awt.print.Printable;
+import java.awt.print.PrinterException;
+import java.awt.print.PrinterIOException;
+import java.awt.print.PrinterJob;
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.List;
+
+import org.pdfbox.cos.COSArray;
+import org.pdfbox.cos.COSDictionary;
+import org.pdfbox.cos.COSDocument;
+import org.pdfbox.cos.COSInteger;
+import org.pdfbox.cos.COSName;
+import org.pdfbox.cos.COSStream;
+import org.pdfbox.cos.COSString;
+
+import org.pdfbox.encryption.PDFEncryption;
+import org.pdfbox.encryption.DocumentEncryption;
+
+import org.pdfbox.exceptions.COSVisitorException;
+import org.pdfbox.exceptions.CryptographyException;
+import org.pdfbox.exceptions.InvalidPasswordException;
+
+import org.pdfbox.pdfparser.PDFParser;
+
+import org.pdfbox.pdfwriter.COSWriter;
+
+import org.pdfbox.pdmodel.common.PDRectangle;
+import org.pdfbox.pdmodel.common.PDStream;
+
+import org.pdfbox.pdmodel.encryption.PDEncryptionDictionary;
+import org.pdfbox.pdmodel.encryption.PDEncryptionManager;
+import org.pdfbox.pdmodel.encryption.PDStandardEncryption;
+
+/**
+ * This is the in-memory representation of the PDF document.  You need to call
+ * close() on this object when you are done using it!!
+ *
+ * @author Ben Litchfield (ben@benlitchfield.com)
+ * @version $Revision: 1.35 $
+ */
+public class PDDocument implements Pageable
+{
+    private COSDocument document;
+    private boolean encryptOnSave = false;
+    private String encryptUserPassword = null;
+    private String encryptOwnerPassword = null;
+
+    //cached values
+    private PDDocumentInformation documentInformation;
+    private PDDocumentCatalog documentCatalog;
+
+    //The encParameters will be cached here.  When the document is decrypted then
+    //the COSDocument will not have an "Encrypt" dictionary anymore and this object
+    //must be used.
+    private PDEncryptionDictionary encParameters = null;
+    /**
+     * This will tell if the document was decrypted with the master password.
+     */
+    private boolean decryptedWithOwnerPassword = false;
+
+    /**
+     * Constructor, creates a new PDF Document with no pages.  You need to add
+     * at least one page for the document to be valid.
+     *
+     * @throws IOException If there is an error creating this document.
+     */
+    public PDDocument() throws IOException
+    {
+        document = new COSDocument();
+
+        //First we need a trailer
+        COSDictionary trailer = new COSDictionary();
+        document.setTrailer( trailer );
+
+        //Next we need the root dictionary.
+        COSDictionary rootDictionary = new COSDictionary();
+        trailer.setItem( COSName.ROOT, rootDictionary );
+        rootDictionary.setItem( COSName.TYPE, COSName.CATALOG );
+        rootDictionary.setItem( COSName.VERSION, COSName.getPDFName( "1.4" ) );
+
+        //next we need the pages tree structure
+        COSDictionary pages = new COSDictionary();
+        rootDictionary.setItem( COSName.PAGES, pages );
+        pages.setItem( COSName.TYPE, COSName.PAGES );
+        COSArray kidsArray = new COSArray();
+        pages.setItem( COSName.KIDS, kidsArray );
+        pages.setItem( COSName.COUNT, new COSInteger( 0 ) );
+    }
+
+    /**
+     * This will add a page to the document.  This is a convenience method, that
+     * will add the page to the root of the hierarchy and set the parent of the
+     * page to the root.
+     *
+     * @param page The page to add to the document.
+     */
+    public void addPage( PDPage page )
+    {
+        PDPageNode rootPages = getDocumentCatalog().getPages();
+        rootPages.getKids().add( page );
+        page.setParent( rootPages );
+        rootPages.updateCount();
+    }
+    
+    /**
+     * Remove the page from the document.
+     *
+     * @param page The page to remove from the document.
+     * 
+     * @return true if the page was found false otherwise.
+     */
+    public boolean removePage( PDPage page )
+    {
+        PDPageNode parent = page.getParent();
+        boolean retval = parent.getKids().remove( page );
+        if( retval )
+        {
+            //do a recursive updateCount starting at the root
+            //of the document
+            getDocumentCatalog().getPages().updateCount();
+        }
+        return retval;
+    }
+    
+    /**
+     * Remove the page from the document.
+     * 
+     * @param pageNumber 0 based index to page number.
+     * @return true if the page was found false otherwise.
+     */
+    public boolean removePage( int pageNumber )
+    {
+        boolean removed = false;
+        List allPages = getDocumentCatalog().getAllPages();
+        if( allPages.size() > pageNumber)
+        {
+            PDPage page = (PDPage)allPages.get( pageNumber );
+            removed = removePage( page );
+        }
+        return removed;
+    }
+
+    /**
+     * This will import and copy the contents from another location.  Currently
+     * the content stream is stored in a scratch file.  The scratch file is
+     * associated with the document.  If you are adding a page to this document
+     * from another document and want to copy the contents to this document's
+     * scratch file then use this method otherwise just use the addPage method.
+     *
+     * @param page The page to import.
+     * @return The page that was imported.
+     *
+     * @throws IOException If there is an error copying the page.
+     */
+    public PDPage importPage( PDPage page ) throws IOException
+    {
+        PDPage importedPage = new PDPage( new COSDictionary( page.getCOSDictionary() ) );
+        InputStream is = null;
+        OutputStream os = null;
+        try
+        {
+            PDStream src = page.getContents();
+            PDStream dest = new PDStream( new COSStream( src.getStream(), document.getScratchFile() ) );
+            importedPage.setContents( dest );
+            os = dest.createOutputStream();
+
+            byte[] buf = new byte[10240];
+            int amountRead = 0;
+            is = src.createInputStream();
+            while((amountRead = is.read(buf,0,10240)) > -1)
+            {
+                os.write(buf, 0, amountRead);
+            }
+            addPage( importedPage );
+        }
+        finally
+        {
+            if( is != null )
+            {
+                is.close();
+            }
+            if( os != null )
+            {
+                os.close();
+            }
+        }
+        return importedPage;
+
+    }
+
+    /**
+     * Constructor that uses an existing document.  The COSDocument that
+     * is passed in must be valid.
+     *
+     * @param doc The COSDocument that this document wraps.
+     */
+    public PDDocument( COSDocument doc )
+    {
+        document = doc;
+    }
+
+    /**
+     * This will get the low level document.
+     *
+     * @return The document that this layer sits on top of.
+     */
+    public COSDocument getDocument()
+    {
+        return document;
+    }
+
+    /**
+     * This will get the document info dictionary.  This is guaranteed to not return null.
+     *
+     * @return The documents /Info dictionary
+     */
+    public PDDocumentInformation getDocumentInformation()
+    {
+        if( documentInformation == null )
+        {
+            COSDictionary trailer = document.getTrailer();
+            COSDictionary infoDic = (COSDictionary)trailer.getDictionaryObject( COSName.INFO );
+            if( infoDic == null )
+            {
+                infoDic = new COSDictionary();
+                trailer.setItem( COSName.INFO, infoDic );
+            }
+            documentInformation = new PDDocumentInformation( infoDic );
+        }
+        return documentInformation;
+    }
+
+    /**
+     * This will set the document information for this document.
+     *
+     * @param info The updated document information.
+     */
+    public void setDocumentInformation( PDDocumentInformation info )
+    {
+        documentInformation = info;
+        document.getTrailer().setItem( COSName.INFO, info.getDictionary() );
+    }
+
+    /**
+     * This will get the document CATALOG.  This is guaranteed to not return null.
+     *
+     * @return The documents /Root dictionary
+     */
+    public PDDocumentCatalog getDocumentCatalog()
+    {
+        if( documentCatalog == null )
+        {
+            COSDictionary trailer = document.getTrailer();
+            COSDictionary infoDic = (COSDictionary)trailer.getDictionaryObject( COSName.ROOT );
+            if( infoDic == null )
+            {
+                documentCatalog = new PDDocumentCatalog( this );
+            }
+            else
+            {
+                documentCatalog = new PDDocumentCatalog( this, infoDic );
+            }
+
+        }
+        return documentCatalog;
+    }
+
+    /**
+     * This will tell if this document is encrypted or not.
+     *
+     * @return true If this document is encrypted.
+     */
+    public boolean isEncrypted()
+    {
+        return document.isEncrypted();
+    }
+
+    /**
+     * This will get the encryption dictionary for this document.  This will still
+     * return the parameters if the document was decrypted.  If the document was
+     * never encrypted then this will return null.  As the encryption architecture
+     * in PDF documents is plugable this returns an abstract class, but the only
+     * supported subclass at this time is a PDStandardEncryption object.
+     *
+     * @return The encryption dictionary(most likely a PDStandardEncryption object)
+     *
+     * @throws IOException If there is an error determining which security handler to use.
+     */
+    public PDEncryptionDictionary getEncryptionDictionary() throws IOException
+    {
+        if( encParameters == null )
+        {
+            encParameters = PDEncryptionManager.getEncryptionDictionary( document.getEncryptionDictionary() );
+        }
+        return encParameters;
+    }
+
+    /**
+     * This will set the encryption dictionary for this document.
+     *
+     * @param encDictionary The encryption dictionary(most likely a PDStandardEncryption object)
+     *
+     * @throws IOException If there is an error determining which security handler to use.
+     */
+    public void setEncryptionDictionary( PDEncryptionDictionary encDictionary ) throws IOException
+    {
+        encParameters = encDictionary;
+    }
+
+    /**
+     * This will determine if this is the user password.  This only applies when
+     * the document is encrypted and uses standard encryption.
+     *
+     * @param password The plain text user password.
+     *
+     * @return true If the password passed in matches the user password used to encrypt the document.
+     *
+     * @throws IOException If there is an error determining if it is the user password.
+     * @throws CryptographyException If there is an error in the encryption algorithms.
+     */
+    public boolean isUserPassword( String password ) throws IOException, CryptographyException
+    {
+        boolean retval = false;
+        if( password == null )
+        {
+            password = "";
+        }
+        PDFEncryption encryptor = new PDFEncryption();
+        PDEncryptionDictionary encryptionDictionary = getEncryptionDictionary();
+        if( encryptionDictionary == null )
+        {
+            throw new IOException( "Error: Document is not encrypted" );
+        }
+        else
+        {
+            if( encryptionDictionary instanceof PDStandardEncryption )
+            {
+                COSString documentID = (COSString)document.getDocumentID().get(0);
+                PDStandardEncryption standard = (PDStandardEncryption)encryptionDictionary;
+                retval = encryptor.isUserPassword(
+                    password.getBytes(),
+                    standard.getUserKey(),
+                    standard.getOwnerKey(),
+                    standard.getPermissions(),
+                    documentID.getBytes(),
+                    standard.getRevision(),
+                    standard.getLength()/8 );
+            }
+            else
+            {
+                throw new IOException( "Error: Encyption dictionary is not 'Standard'" +
+                    encryptionDictionary.getClass().getName() );
+            }
+        }
+        return retval;
+    }
+
+    /**
+     * This will determine if this is the owner password.  This only applies when
+     * the document is encrypted and uses standard encryption.
+     *
+     * @param password The plain text owner password.
+     *
+     * @return true If the password passed in matches the owner password used to encrypt the document.
+     *
+     * @throws IOException If there is an error determining if it is the user password.
+     * @throws CryptographyException If there is an error in the encryption algorithms.
+     */
+    public boolean isOwnerPassword( String password ) throws IOException, CryptographyException
+    {
+        boolean retval = false;
+        if( password == null )
+        {
+            password = "";
+        }
+        PDFEncryption encryptor = new PDFEncryption();
+        PDEncryptionDictionary encryptionDictionary = getEncryptionDictionary();
+        if( encryptionDictionary == null )
+        {
+            throw new IOException( "Error: Document is not encrypted" );
+        }
+        else
+        {
+            if( encryptionDictionary instanceof PDStandardEncryption )
+            {
+                COSString documentID = (COSString)document.getDocumentID().get( 0 );
+                PDStandardEncryption standard = (PDStandardEncryption)encryptionDictionary;
+                retval = encryptor.isOwnerPassword(
+                    password.getBytes(),
+                    standard.getUserKey(),
+                    standard.getOwnerKey(),
+                    standard.getPermissions(),
+                    documentID.getBytes(),
+                    standard.getRevision(),
+                    standard.getLength()/8 );
+            }
+            else
+            {
+                throw new IOException( "Error: Encyption dictionary is not 'Standard'" +
+                    encryptionDictionary.getClass().getName() );
+            }
+        }
+        return retval;
+    }
+
+    /**
+     * This will decrypt a document.
+     *
+     * @param password Either the user or owner password.
+     *
+     * @throws CryptographyException If there is an error decrypting the document.
+     * @throws IOException If there is an error getting the stream data.
+     * @throws InvalidPasswordException If the password is not a user or owner password.
+     */
+    public void decrypt( String password ) throws CryptographyException, IOException, InvalidPasswordException
+    {
+        decryptedWithOwnerPassword = isOwnerPassword( password );
+        DocumentEncryption decryptor = new DocumentEncryption( this );
+        decryptor.decryptDocument( password );
+        document.dereferenceObjectStreams();
+    }
+    
+    /**
+     * This will tell if the document was decrypted with the master password.  This
+     * entry is invalid if the PDF was not decrypted.
+     * 
+     * @return true if the pdf was decrypted with the master password.
+     */
+    public boolean wasDecryptedWithOwnerPassword()
+    {
+        return decryptedWithOwnerPassword;
+    }
+
+    /**
+     * This will <b>mark</b> a document to be encrypted.  The actual encryption
+     * will occur when the document is saved.
+     *
+     * @param ownerPassword The owner password to encrypt the document.
+     * @param userPassword The user password to encrypt the document.
+     *
+     * @throws CryptographyException If an error occurs during encryption.
+     * @throws IOException If there is an error accessing the data.
+     */
+    public void encrypt( String ownerPassword, String userPassword )
+        throws CryptographyException, IOException
+    {
+        encryptOnSave = true;
+        encryptOwnerPassword = ownerPassword;
+        encryptUserPassword = userPassword;
+    }
+    
+    
+    /**
+     * The owner password that was passed into the encrypt method.  You should
+     * never use this method.  This will not longer be valid once encryption
+     * has occured.
+     * 
+     * @return The owner password passed to the encrypt method.
+     */
+    public String getOwnerPasswordForEncryption()
+    {
+        return encryptOwnerPassword;
+    }
+    
+    /**
+     * The user password that was passed into the encrypt method.  You should
+     * never use this method.  This will not longer be valid once encryption
+     * has occured.
+     * 
+     * @return The user password passed to the encrypt method.
+     */
+    public String getUserPasswordForEncryption()
+    {
+        return encryptUserPassword;
+    }
+    
+    /**
+     * Internal method do determine if the document will be encrypted when it is saved.
+     * 
+     * @return True if encrypt has been called and the document 
+     *              has not been saved yet.
+     */
+    public boolean willEncryptWhenSaving()
+    {
+        return encryptOnSave;
+    }
+    
+    /**
+     * This shoule only be called by the COSWriter after encryption has completed.
+     *
+     */
+    public void clearWillEncryptWhenSaving()
+    {
+        encryptOnSave = false;
+    }
+
+    /**
+     * This will load a document from a file.
+     *
+     * @param filename The name of the file to load.
+     *
+     * @return The document that was loaded.
+     *
+     * @throws IOException If there is an error reading from the stream.
+     */
+    public static PDDocument load( String filename ) throws IOException
+    {
+        return load( new BufferedInputStream( new FileInputStream( filename ) ) );
+    }
+
+    /**
+     * This will load a document from a file.
+     *
+     * @param file The name of the file to load.
+     *
+     * @return The document that was loaded.
+     *
+     * @throws IOException If there is an error reading from the stream.
+     */
+    public static PDDocument load( File file ) throws IOException
+    {
+        return load( new BufferedInputStream( new FileInputStream( file ) ) );
+    }
+
+    /**
+     * This will load a document from an input stream.
+     *
+     * @param input The stream that contains the document.
+     *
+     * @return The document that was loaded.
+     *
+     * @throws IOException If there is an error reading from the stream.
+     */
+    public static PDDocument load( InputStream input ) throws IOException
+    {
+        PDFParser parser = new PDFParser( input );
+        parser.parse();
+        return parser.getPDDocument();
+    }
+
+    /**
+     * This will save this document to the filesystem.
+     *
+     * @param fileName The file to save as.
+     *
+     * @throws IOException If there is an error saving the document.
+     * @throws COSVisitorException If an error occurs while generating the data.
+     */
+    public void save( String fileName ) throws IOException, COSVisitorException
+    {
+        save( new FileOutputStream( fileName ) );
+    }
+
+    /**
+     * This will save the document to an output stream.
+     *
+     * @param output The stream to write to.
+     *
+     * @throws IOException If there is an error writing the document.
+     * @throws COSVisitorException If an error occurs while generating the data.
+     */
+    public void save( OutputStream output ) throws IOException, COSVisitorException
+    {
+        //update the count in case any pages have been added behind the scenes.
+        getDocumentCatalog().getPages().updateCount();
+        COSWriter writer = null;
+        try
+        {
+            writer = new COSWriter( output );
+            writer.write( this );
+            writer.close();
+        }
+        finally
+        {
+            if( writer != null )
+            {
+                writer.close();
+            }
+        }
+
+    }
+
+    /**
+     * This will return the total page count of the PDF document.  Note: This method
+     * is deprecated in favor of the getNumberOfPages method.  The getNumberOfPages is
+     * a required interface method of the Pageable interface.  This method will
+     * be removed in a future version of PDFBox!!
+     *
+     * @return The total number of pages in the PDF document.
+     * @deprecated Use the getNumberOfPages method instead!
+     */
+    public int getPageCount()
+    {
+        return getNumberOfPages();
+    }
+    
+    /**
+     * @see Pageable#getNumberOfPages()
+     */
+    public int getNumberOfPages()
+    {
+        PDDocumentCatalog cat = getDocumentCatalog();
+        return (int)cat.getPages().getCount();
+    }
+    
+    /**
+     * @see Pageable#getPageFormat(int)
+     */
+    public PageFormat getPageFormat(int pageIndex)
+    {
+        PDPage page = (PDPage)getDocumentCatalog().getAllPages().get( pageIndex );
+        PDRectangle mediaBox = page.findMediaBox();
+        PageFormat format = new PageFormat();
+        Paper paper = new Paper();
+        //hmm the imageable area might need to be the CropBox instead
+        //of the media box???
+        paper.setImageableArea( 0,0,mediaBox.getWidth(),mediaBox.getHeight());
+        paper.setSize( mediaBox.getWidth(), mediaBox.getHeight() );
+        format.setPaper( paper );
+        return format;
+    }
+    
+    /**
+     * @see Pageable#getPrintable(int)
+     */
+    public Printable getPrintable(int pageIndex)
+    {
+        return (Printable)getDocumentCatalog().getAllPages().get( pageIndex );
+    }
+    
+    /**
+     * This will send the PDF document to a printer.  The printing functionality
+     * depends on the org.pdfbox.pdfviewer.PageDrawer functionality.  The PageDrawer
+     * is a work in progress and some PDFs will print correctly and some will
+     * not.  This is a convenience method to create the java.awt.print.PrinterJob.  
+     * The PDDocument implements the java.awt.print.Pageable interface and 
+     * PDPage implementes the java.awt.print.Printable interface, so advanced printing
+     * capabilities can be done by using those interfaces instead of this method. 
+     * 
+     * @throws PrinterException If there is an error while sending the PDF to
+     * the printer, or you do not have permissions to print this document.
+     */
+    public void print() throws PrinterException
+    {
+        PDEncryptionDictionary encDictionary = null;
+        try
+        {
+            encDictionary = getEncryptionDictionary();
+        }
+        catch( IOException io )
+        {
+            throw new PrinterIOException( io );
+        }
+
+        //only care about standard encryption and if it was decrypted with the
+        //user password
+        if( encDictionary instanceof PDStandardEncryption && 
+            !wasDecryptedWithOwnerPassword() )
+        {
+            PDStandardEncryption stdEncryption = (PDStandardEncryption)encDictionary;
+            if( !stdEncryption.canPrint() )
+            {
+                throw new PrinterException( "You do not have permission to print this document." );
+            }
+        }
+        
+        PrinterJob printJob = PrinterJob.getPrinterJob();
+        printJob.setPageable(this);
+        if( printJob.printDialog() )
+        {
+            printJob.print();
+        }
+    }
+    
+    /**
+     * This will close the underlying COSDocument object.
+     *
+     * @throws IOException If there is an error releasing resources.
+     */
+    public void close() throws IOException
+    {
+        document.close();
+    }
+}
+\ No newline at end of file