aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/org/pdfbox/pdmodel/PDDocument.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/org/pdfbox/pdmodel/PDDocument.java')
-rw-r--r--src/main/java/org/pdfbox/pdmodel/PDDocument.java725
1 files changed, 725 insertions, 0 deletions
diff --git a/src/main/java/org/pdfbox/pdmodel/PDDocument.java b/src/main/java/org/pdfbox/pdmodel/PDDocument.java
new file mode 100644
index 0000000..94150fb
--- /dev/null
+++ b/src/main/java/org/pdfbox/pdmodel/PDDocument.java
@@ -0,0 +1,725 @@
+/**
+ * Copyright (c) 2003-2005, www.pdfbox.org
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * 3. Neither the name of pdfbox; nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * http://www.pdfbox.org
+ *
+ */
+package org.pdfbox.pdmodel;
+
+import java.awt.print.PageFormat;
+import java.awt.print.Pageable;
+import java.awt.print.Paper;
+import java.awt.print.Printable;
+import java.awt.print.PrinterException;
+import java.awt.print.PrinterIOException;
+import java.awt.print.PrinterJob;
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.List;
+
+import org.pdfbox.cos.COSArray;
+import org.pdfbox.cos.COSDictionary;
+import org.pdfbox.cos.COSDocument;
+import org.pdfbox.cos.COSInteger;
+import org.pdfbox.cos.COSName;
+import org.pdfbox.cos.COSStream;
+import org.pdfbox.cos.COSString;
+
+import org.pdfbox.encryption.PDFEncryption;
+import org.pdfbox.encryption.DocumentEncryption;
+
+import org.pdfbox.exceptions.COSVisitorException;
+import org.pdfbox.exceptions.CryptographyException;
+import org.pdfbox.exceptions.InvalidPasswordException;
+
+import org.pdfbox.pdfparser.PDFParser;
+
+import org.pdfbox.pdfwriter.COSWriter;
+
+import org.pdfbox.pdmodel.common.PDRectangle;
+import org.pdfbox.pdmodel.common.PDStream;
+
+import org.pdfbox.pdmodel.encryption.PDEncryptionDictionary;
+import org.pdfbox.pdmodel.encryption.PDEncryptionManager;
+import org.pdfbox.pdmodel.encryption.PDStandardEncryption;
+
+/**
+ * This is the in-memory representation of the PDF document. You need to call
+ * close() on this object when you are done using it!!
+ *
+ * @author Ben Litchfield (ben@benlitchfield.com)
+ * @version $Revision: 1.35 $
+ */
+public class PDDocument implements Pageable
+{
+ private COSDocument document;
+ private boolean encryptOnSave = false;
+ private String encryptUserPassword = null;
+ private String encryptOwnerPassword = null;
+
+ //cached values
+ private PDDocumentInformation documentInformation;
+ private PDDocumentCatalog documentCatalog;
+
+ //The encParameters will be cached here. When the document is decrypted then
+ //the COSDocument will not have an "Encrypt" dictionary anymore and this object
+ //must be used.
+ private PDEncryptionDictionary encParameters = null;
+ /**
+ * This will tell if the document was decrypted with the master password.
+ */
+ private boolean decryptedWithOwnerPassword = false;
+
+ /**
+ * Constructor, creates a new PDF Document with no pages. You need to add
+ * at least one page for the document to be valid.
+ *
+ * @throws IOException If there is an error creating this document.
+ */
+ public PDDocument() throws IOException
+ {
+ document = new COSDocument();
+
+ //First we need a trailer
+ COSDictionary trailer = new COSDictionary();
+ document.setTrailer( trailer );
+
+ //Next we need the root dictionary.
+ COSDictionary rootDictionary = new COSDictionary();
+ trailer.setItem( COSName.ROOT, rootDictionary );
+ rootDictionary.setItem( COSName.TYPE, COSName.CATALOG );
+ rootDictionary.setItem( COSName.VERSION, COSName.getPDFName( "1.4" ) );
+
+ //next we need the pages tree structure
+ COSDictionary pages = new COSDictionary();
+ rootDictionary.setItem( COSName.PAGES, pages );
+ pages.setItem( COSName.TYPE, COSName.PAGES );
+ COSArray kidsArray = new COSArray();
+ pages.setItem( COSName.KIDS, kidsArray );
+ pages.setItem( COSName.COUNT, new COSInteger( 0 ) );
+ }
+
+ /**
+ * This will add a page to the document. This is a convenience method, that
+ * will add the page to the root of the hierarchy and set the parent of the
+ * page to the root.
+ *
+ * @param page The page to add to the document.
+ */
+ public void addPage( PDPage page )
+ {
+ PDPageNode rootPages = getDocumentCatalog().getPages();
+ rootPages.getKids().add( page );
+ page.setParent( rootPages );
+ rootPages.updateCount();
+ }
+
+ /**
+ * Remove the page from the document.
+ *
+ * @param page The page to remove from the document.
+ *
+ * @return true if the page was found false otherwise.
+ */
+ public boolean removePage( PDPage page )
+ {
+ PDPageNode parent = page.getParent();
+ boolean retval = parent.getKids().remove( page );
+ if( retval )
+ {
+ //do a recursive updateCount starting at the root
+ //of the document
+ getDocumentCatalog().getPages().updateCount();
+ }
+ return retval;
+ }
+
+ /**
+ * Remove the page from the document.
+ *
+ * @param pageNumber 0 based index to page number.
+ * @return true if the page was found false otherwise.
+ */
+ public boolean removePage( int pageNumber )
+ {
+ boolean removed = false;
+ List allPages = getDocumentCatalog().getAllPages();
+ if( allPages.size() > pageNumber)
+ {
+ PDPage page = (PDPage)allPages.get( pageNumber );
+ removed = removePage( page );
+ }
+ return removed;
+ }
+
+ /**
+ * This will import and copy the contents from another location. Currently
+ * the content stream is stored in a scratch file. The scratch file is
+ * associated with the document. If you are adding a page to this document
+ * from another document and want to copy the contents to this document's
+ * scratch file then use this method otherwise just use the addPage method.
+ *
+ * @param page The page to import.
+ * @return The page that was imported.
+ *
+ * @throws IOException If there is an error copying the page.
+ */
+ public PDPage importPage( PDPage page ) throws IOException
+ {
+ PDPage importedPage = new PDPage( new COSDictionary( page.getCOSDictionary() ) );
+ InputStream is = null;
+ OutputStream os = null;
+ try
+ {
+ PDStream src = page.getContents();
+ PDStream dest = new PDStream( new COSStream( src.getStream(), document.getScratchFile() ) );
+ importedPage.setContents( dest );
+ os = dest.createOutputStream();
+
+ byte[] buf = new byte[10240];
+ int amountRead = 0;
+ is = src.createInputStream();
+ while((amountRead = is.read(buf,0,10240)) > -1)
+ {
+ os.write(buf, 0, amountRead);
+ }
+ addPage( importedPage );
+ }
+ finally
+ {
+ if( is != null )
+ {
+ is.close();
+ }
+ if( os != null )
+ {
+ os.close();
+ }
+ }
+ return importedPage;
+
+ }
+
+ /**
+ * Constructor that uses an existing document. The COSDocument that
+ * is passed in must be valid.
+ *
+ * @param doc The COSDocument that this document wraps.
+ */
+ public PDDocument( COSDocument doc )
+ {
+ document = doc;
+ }
+
+ /**
+ * This will get the low level document.
+ *
+ * @return The document that this layer sits on top of.
+ */
+ public COSDocument getDocument()
+ {
+ return document;
+ }
+
+ /**
+ * This will get the document info dictionary. This is guaranteed to not return null.
+ *
+ * @return The documents /Info dictionary
+ */
+ public PDDocumentInformation getDocumentInformation()
+ {
+ if( documentInformation == null )
+ {
+ COSDictionary trailer = document.getTrailer();
+ COSDictionary infoDic = (COSDictionary)trailer.getDictionaryObject( COSName.INFO );
+ if( infoDic == null )
+ {
+ infoDic = new COSDictionary();
+ trailer.setItem( COSName.INFO, infoDic );
+ }
+ documentInformation = new PDDocumentInformation( infoDic );
+ }
+ return documentInformation;
+ }
+
+ /**
+ * This will set the document information for this document.
+ *
+ * @param info The updated document information.
+ */
+ public void setDocumentInformation( PDDocumentInformation info )
+ {
+ documentInformation = info;
+ document.getTrailer().setItem( COSName.INFO, info.getDictionary() );
+ }
+
+ /**
+ * This will get the document CATALOG. This is guaranteed to not return null.
+ *
+ * @return The documents /Root dictionary
+ */
+ public PDDocumentCatalog getDocumentCatalog()
+ {
+ if( documentCatalog == null )
+ {
+ COSDictionary trailer = document.getTrailer();
+ COSDictionary infoDic = (COSDictionary)trailer.getDictionaryObject( COSName.ROOT );
+ if( infoDic == null )
+ {
+ documentCatalog = new PDDocumentCatalog( this );
+ }
+ else
+ {
+ documentCatalog = new PDDocumentCatalog( this, infoDic );
+ }
+
+ }
+ return documentCatalog;
+ }
+
+ /**
+ * This will tell if this document is encrypted or not.
+ *
+ * @return true If this document is encrypted.
+ */
+ public boolean isEncrypted()
+ {
+ return document.isEncrypted();
+ }
+
+ /**
+ * This will get the encryption dictionary for this document. This will still
+ * return the parameters if the document was decrypted. If the document was
+ * never encrypted then this will return null. As the encryption architecture
+ * in PDF documents is plugable this returns an abstract class, but the only
+ * supported subclass at this time is a PDStandardEncryption object.
+ *
+ * @return The encryption dictionary(most likely a PDStandardEncryption object)
+ *
+ * @throws IOException If there is an error determining which security handler to use.
+ */
+ public PDEncryptionDictionary getEncryptionDictionary() throws IOException
+ {
+ if( encParameters == null )
+ {
+ encParameters = PDEncryptionManager.getEncryptionDictionary( document.getEncryptionDictionary() );
+ }
+ return encParameters;
+ }
+
+ /**
+ * This will set the encryption dictionary for this document.
+ *
+ * @param encDictionary The encryption dictionary(most likely a PDStandardEncryption object)
+ *
+ * @throws IOException If there is an error determining which security handler to use.
+ */
+ public void setEncryptionDictionary( PDEncryptionDictionary encDictionary ) throws IOException
+ {
+ encParameters = encDictionary;
+ }
+
+ /**
+ * This will determine if this is the user password. This only applies when
+ * the document is encrypted and uses standard encryption.
+ *
+ * @param password The plain text user password.
+ *
+ * @return true If the password passed in matches the user password used to encrypt the document.
+ *
+ * @throws IOException If there is an error determining if it is the user password.
+ * @throws CryptographyException If there is an error in the encryption algorithms.
+ */
+ public boolean isUserPassword( String password ) throws IOException, CryptographyException
+ {
+ boolean retval = false;
+ if( password == null )
+ {
+ password = "";
+ }
+ PDFEncryption encryptor = new PDFEncryption();
+ PDEncryptionDictionary encryptionDictionary = getEncryptionDictionary();
+ if( encryptionDictionary == null )
+ {
+ throw new IOException( "Error: Document is not encrypted" );
+ }
+ else
+ {
+ if( encryptionDictionary instanceof PDStandardEncryption )
+ {
+ COSString documentID = (COSString)document.getDocumentID().get(0);
+ PDStandardEncryption standard = (PDStandardEncryption)encryptionDictionary;
+ retval = encryptor.isUserPassword(
+ password.getBytes(),
+ standard.getUserKey(),
+ standard.getOwnerKey(),
+ standard.getPermissions(),
+ documentID.getBytes(),
+ standard.getRevision(),
+ standard.getLength()/8 );
+ }
+ else
+ {
+ throw new IOException( "Error: Encyption dictionary is not 'Standard'" +
+ encryptionDictionary.getClass().getName() );
+ }
+ }
+ return retval;
+ }
+
+ /**
+ * This will determine if this is the owner password. This only applies when
+ * the document is encrypted and uses standard encryption.
+ *
+ * @param password The plain text owner password.
+ *
+ * @return true If the password passed in matches the owner password used to encrypt the document.
+ *
+ * @throws IOException If there is an error determining if it is the user password.
+ * @throws CryptographyException If there is an error in the encryption algorithms.
+ */
+ public boolean isOwnerPassword( String password ) throws IOException, CryptographyException
+ {
+ boolean retval = false;
+ if( password == null )
+ {
+ password = "";
+ }
+ PDFEncryption encryptor = new PDFEncryption();
+ PDEncryptionDictionary encryptionDictionary = getEncryptionDictionary();
+ if( encryptionDictionary == null )
+ {
+ throw new IOException( "Error: Document is not encrypted" );
+ }
+ else
+ {
+ if( encryptionDictionary instanceof PDStandardEncryption )
+ {
+ COSString documentID = (COSString)document.getDocumentID().get( 0 );
+ PDStandardEncryption standard = (PDStandardEncryption)encryptionDictionary;
+ retval = encryptor.isOwnerPassword(
+ password.getBytes(),
+ standard.getUserKey(),
+ standard.getOwnerKey(),
+ standard.getPermissions(),
+ documentID.getBytes(),
+ standard.getRevision(),
+ standard.getLength()/8 );
+ }
+ else
+ {
+ throw new IOException( "Error: Encyption dictionary is not 'Standard'" +
+ encryptionDictionary.getClass().getName() );
+ }
+ }
+ return retval;
+ }
+
+ /**
+ * This will decrypt a document.
+ *
+ * @param password Either the user or owner password.
+ *
+ * @throws CryptographyException If there is an error decrypting the document.
+ * @throws IOException If there is an error getting the stream data.
+ * @throws InvalidPasswordException If the password is not a user or owner password.
+ */
+ public void decrypt( String password ) throws CryptographyException, IOException, InvalidPasswordException
+ {
+ decryptedWithOwnerPassword = isOwnerPassword( password );
+ DocumentEncryption decryptor = new DocumentEncryption( this );
+ decryptor.decryptDocument( password );
+ document.dereferenceObjectStreams();
+ }
+
+ /**
+ * This will tell if the document was decrypted with the master password. This
+ * entry is invalid if the PDF was not decrypted.
+ *
+ * @return true if the pdf was decrypted with the master password.
+ */
+ public boolean wasDecryptedWithOwnerPassword()
+ {
+ return decryptedWithOwnerPassword;
+ }
+
+ /**
+ * This will <b>mark</b> a document to be encrypted. The actual encryption
+ * will occur when the document is saved.
+ *
+ * @param ownerPassword The owner password to encrypt the document.
+ * @param userPassword The user password to encrypt the document.
+ *
+ * @throws CryptographyException If an error occurs during encryption.
+ * @throws IOException If there is an error accessing the data.
+ */
+ public void encrypt( String ownerPassword, String userPassword )
+ throws CryptographyException, IOException
+ {
+ encryptOnSave = true;
+ encryptOwnerPassword = ownerPassword;
+ encryptUserPassword = userPassword;
+ }
+
+
+ /**
+ * The owner password that was passed into the encrypt method. You should
+ * never use this method. This will not longer be valid once encryption
+ * has occured.
+ *
+ * @return The owner password passed to the encrypt method.
+ */
+ public String getOwnerPasswordForEncryption()
+ {
+ return encryptOwnerPassword;
+ }
+
+ /**
+ * The user password that was passed into the encrypt method. You should
+ * never use this method. This will not longer be valid once encryption
+ * has occured.
+ *
+ * @return The user password passed to the encrypt method.
+ */
+ public String getUserPasswordForEncryption()
+ {
+ return encryptUserPassword;
+ }
+
+ /**
+ * Internal method do determine if the document will be encrypted when it is saved.
+ *
+ * @return True if encrypt has been called and the document
+ * has not been saved yet.
+ */
+ public boolean willEncryptWhenSaving()
+ {
+ return encryptOnSave;
+ }
+
+ /**
+ * This shoule only be called by the COSWriter after encryption has completed.
+ *
+ */
+ public void clearWillEncryptWhenSaving()
+ {
+ encryptOnSave = false;
+ }
+
+ /**
+ * This will load a document from a file.
+ *
+ * @param filename The name of the file to load.
+ *
+ * @return The document that was loaded.
+ *
+ * @throws IOException If there is an error reading from the stream.
+ */
+ public static PDDocument load( String filename ) throws IOException
+ {
+ return load( new BufferedInputStream( new FileInputStream( filename ) ) );
+ }
+
+ /**
+ * This will load a document from a file.
+ *
+ * @param file The name of the file to load.
+ *
+ * @return The document that was loaded.
+ *
+ * @throws IOException If there is an error reading from the stream.
+ */
+ public static PDDocument load( File file ) throws IOException
+ {
+ return load( new BufferedInputStream( new FileInputStream( file ) ) );
+ }
+
+ /**
+ * This will load a document from an input stream.
+ *
+ * @param input The stream that contains the document.
+ *
+ * @return The document that was loaded.
+ *
+ * @throws IOException If there is an error reading from the stream.
+ */
+ public static PDDocument load( InputStream input ) throws IOException
+ {
+ PDFParser parser = new PDFParser( input );
+ parser.parse();
+ return parser.getPDDocument();
+ }
+
+ /**
+ * This will save this document to the filesystem.
+ *
+ * @param fileName The file to save as.
+ *
+ * @throws IOException If there is an error saving the document.
+ * @throws COSVisitorException If an error occurs while generating the data.
+ */
+ public void save( String fileName ) throws IOException, COSVisitorException
+ {
+ save( new FileOutputStream( fileName ) );
+ }
+
+ /**
+ * This will save the document to an output stream.
+ *
+ * @param output The stream to write to.
+ *
+ * @throws IOException If there is an error writing the document.
+ * @throws COSVisitorException If an error occurs while generating the data.
+ */
+ public void save( OutputStream output ) throws IOException, COSVisitorException
+ {
+ //update the count in case any pages have been added behind the scenes.
+ getDocumentCatalog().getPages().updateCount();
+ COSWriter writer = null;
+ try
+ {
+ writer = new COSWriter( output );
+ writer.write( this );
+ writer.close();
+ }
+ finally
+ {
+ if( writer != null )
+ {
+ writer.close();
+ }
+ }
+
+ }
+
+ /**
+ * This will return the total page count of the PDF document. Note: This method
+ * is deprecated in favor of the getNumberOfPages method. The getNumberOfPages is
+ * a required interface method of the Pageable interface. This method will
+ * be removed in a future version of PDFBox!!
+ *
+ * @return The total number of pages in the PDF document.
+ * @deprecated Use the getNumberOfPages method instead!
+ */
+ public int getPageCount()
+ {
+ return getNumberOfPages();
+ }
+
+ /**
+ * @see Pageable#getNumberOfPages()
+ */
+ public int getNumberOfPages()
+ {
+ PDDocumentCatalog cat = getDocumentCatalog();
+ return (int)cat.getPages().getCount();
+ }
+
+ /**
+ * @see Pageable#getPageFormat(int)
+ */
+ public PageFormat getPageFormat(int pageIndex)
+ {
+ PDPage page = (PDPage)getDocumentCatalog().getAllPages().get( pageIndex );
+ PDRectangle mediaBox = page.findMediaBox();
+ PageFormat format = new PageFormat();
+ Paper paper = new Paper();
+ //hmm the imageable area might need to be the CropBox instead
+ //of the media box???
+ paper.setImageableArea( 0,0,mediaBox.getWidth(),mediaBox.getHeight());
+ paper.setSize( mediaBox.getWidth(), mediaBox.getHeight() );
+ format.setPaper( paper );
+ return format;
+ }
+
+ /**
+ * @see Pageable#getPrintable(int)
+ */
+ public Printable getPrintable(int pageIndex)
+ {
+ return (Printable)getDocumentCatalog().getAllPages().get( pageIndex );
+ }
+
+ /**
+ * This will send the PDF document to a printer. The printing functionality
+ * depends on the org.pdfbox.pdfviewer.PageDrawer functionality. The PageDrawer
+ * is a work in progress and some PDFs will print correctly and some will
+ * not. This is a convenience method to create the java.awt.print.PrinterJob.
+ * The PDDocument implements the java.awt.print.Pageable interface and
+ * PDPage implementes the java.awt.print.Printable interface, so advanced printing
+ * capabilities can be done by using those interfaces instead of this method.
+ *
+ * @throws PrinterException If there is an error while sending the PDF to
+ * the printer, or you do not have permissions to print this document.
+ */
+ public void print() throws PrinterException
+ {
+ PDEncryptionDictionary encDictionary = null;
+ try
+ {
+ encDictionary = getEncryptionDictionary();
+ }
+ catch( IOException io )
+ {
+ throw new PrinterIOException( io );
+ }
+
+ //only care about standard encryption and if it was decrypted with the
+ //user password
+ if( encDictionary instanceof PDStandardEncryption &&
+ !wasDecryptedWithOwnerPassword() )
+ {
+ PDStandardEncryption stdEncryption = (PDStandardEncryption)encDictionary;
+ if( !stdEncryption.canPrint() )
+ {
+ throw new PrinterException( "You do not have permission to print this document." );
+ }
+ }
+
+ PrinterJob printJob = PrinterJob.getPrinterJob();
+ printJob.setPageable(this);
+ if( printJob.printDialog() )
+ {
+ printJob.print();
+ }
+ }
+
+ /**
+ * This will close the underlying COSDocument object.
+ *
+ * @throws IOException If there is an error releasing resources.
+ */
+ public void close() throws IOException
+ {
+ document.close();
+ }
+} \ No newline at end of file