From 6025b6016517c6d898d8957d1d7e03ba71431912 Mon Sep 17 00:00:00 2001 From: tknall Date: Fri, 1 Dec 2006 12:20:24 +0000 Subject: Initial import of release 2.2. git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@4 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- src/main/java/org/pdfbox/pdmodel/PDDocument.java | 725 +++++++++++++++++++++++ 1 file changed, 725 insertions(+) create mode 100644 src/main/java/org/pdfbox/pdmodel/PDDocument.java (limited to 'src/main/java/org/pdfbox/pdmodel/PDDocument.java') diff --git a/src/main/java/org/pdfbox/pdmodel/PDDocument.java b/src/main/java/org/pdfbox/pdmodel/PDDocument.java new file mode 100644 index 0000000..94150fb --- /dev/null +++ b/src/main/java/org/pdfbox/pdmodel/PDDocument.java @@ -0,0 +1,725 @@ +/** + * Copyright (c) 2003-2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.pdmodel; + +import java.awt.print.PageFormat; +import java.awt.print.Pageable; +import java.awt.print.Paper; +import java.awt.print.Printable; +import java.awt.print.PrinterException; +import java.awt.print.PrinterIOException; +import java.awt.print.PrinterJob; +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.List; + +import org.pdfbox.cos.COSArray; +import org.pdfbox.cos.COSDictionary; +import org.pdfbox.cos.COSDocument; +import org.pdfbox.cos.COSInteger; +import org.pdfbox.cos.COSName; +import org.pdfbox.cos.COSStream; +import org.pdfbox.cos.COSString; + +import org.pdfbox.encryption.PDFEncryption; +import org.pdfbox.encryption.DocumentEncryption; + +import org.pdfbox.exceptions.COSVisitorException; +import org.pdfbox.exceptions.CryptographyException; +import org.pdfbox.exceptions.InvalidPasswordException; + +import org.pdfbox.pdfparser.PDFParser; + +import org.pdfbox.pdfwriter.COSWriter; + +import org.pdfbox.pdmodel.common.PDRectangle; +import org.pdfbox.pdmodel.common.PDStream; + +import org.pdfbox.pdmodel.encryption.PDEncryptionDictionary; +import org.pdfbox.pdmodel.encryption.PDEncryptionManager; +import org.pdfbox.pdmodel.encryption.PDStandardEncryption; + +/** + * This is the in-memory representation of the PDF document. You need to call + * close() on this object when you are done using it!! + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.35 $ + */ +public class PDDocument implements Pageable +{ + private COSDocument document; + private boolean encryptOnSave = false; + private String encryptUserPassword = null; + private String encryptOwnerPassword = null; + + //cached values + private PDDocumentInformation documentInformation; + private PDDocumentCatalog documentCatalog; + + //The encParameters will be cached here. When the document is decrypted then + //the COSDocument will not have an "Encrypt" dictionary anymore and this object + //must be used. + private PDEncryptionDictionary encParameters = null; + /** + * This will tell if the document was decrypted with the master password. + */ + private boolean decryptedWithOwnerPassword = false; + + /** + * Constructor, creates a new PDF Document with no pages. You need to add + * at least one page for the document to be valid. + * + * @throws IOException If there is an error creating this document. + */ + public PDDocument() throws IOException + { + document = new COSDocument(); + + //First we need a trailer + COSDictionary trailer = new COSDictionary(); + document.setTrailer( trailer ); + + //Next we need the root dictionary. + COSDictionary rootDictionary = new COSDictionary(); + trailer.setItem( COSName.ROOT, rootDictionary ); + rootDictionary.setItem( COSName.TYPE, COSName.CATALOG ); + rootDictionary.setItem( COSName.VERSION, COSName.getPDFName( "1.4" ) ); + + //next we need the pages tree structure + COSDictionary pages = new COSDictionary(); + rootDictionary.setItem( COSName.PAGES, pages ); + pages.setItem( COSName.TYPE, COSName.PAGES ); + COSArray kidsArray = new COSArray(); + pages.setItem( COSName.KIDS, kidsArray ); + pages.setItem( COSName.COUNT, new COSInteger( 0 ) ); + } + + /** + * This will add a page to the document. This is a convenience method, that + * will add the page to the root of the hierarchy and set the parent of the + * page to the root. + * + * @param page The page to add to the document. + */ + public void addPage( PDPage page ) + { + PDPageNode rootPages = getDocumentCatalog().getPages(); + rootPages.getKids().add( page ); + page.setParent( rootPages ); + rootPages.updateCount(); + } + + /** + * Remove the page from the document. + * + * @param page The page to remove from the document. + * + * @return true if the page was found false otherwise. + */ + public boolean removePage( PDPage page ) + { + PDPageNode parent = page.getParent(); + boolean retval = parent.getKids().remove( page ); + if( retval ) + { + //do a recursive updateCount starting at the root + //of the document + getDocumentCatalog().getPages().updateCount(); + } + return retval; + } + + /** + * Remove the page from the document. + * + * @param pageNumber 0 based index to page number. + * @return true if the page was found false otherwise. + */ + public boolean removePage( int pageNumber ) + { + boolean removed = false; + List allPages = getDocumentCatalog().getAllPages(); + if( allPages.size() > pageNumber) + { + PDPage page = (PDPage)allPages.get( pageNumber ); + removed = removePage( page ); + } + return removed; + } + + /** + * This will import and copy the contents from another location. Currently + * the content stream is stored in a scratch file. The scratch file is + * associated with the document. If you are adding a page to this document + * from another document and want to copy the contents to this document's + * scratch file then use this method otherwise just use the addPage method. + * + * @param page The page to import. + * @return The page that was imported. + * + * @throws IOException If there is an error copying the page. + */ + public PDPage importPage( PDPage page ) throws IOException + { + PDPage importedPage = new PDPage( new COSDictionary( page.getCOSDictionary() ) ); + InputStream is = null; + OutputStream os = null; + try + { + PDStream src = page.getContents(); + PDStream dest = new PDStream( new COSStream( src.getStream(), document.getScratchFile() ) ); + importedPage.setContents( dest ); + os = dest.createOutputStream(); + + byte[] buf = new byte[10240]; + int amountRead = 0; + is = src.createInputStream(); + while((amountRead = is.read(buf,0,10240)) > -1) + { + os.write(buf, 0, amountRead); + } + addPage( importedPage ); + } + finally + { + if( is != null ) + { + is.close(); + } + if( os != null ) + { + os.close(); + } + } + return importedPage; + + } + + /** + * Constructor that uses an existing document. The COSDocument that + * is passed in must be valid. + * + * @param doc The COSDocument that this document wraps. + */ + public PDDocument( COSDocument doc ) + { + document = doc; + } + + /** + * This will get the low level document. + * + * @return The document that this layer sits on top of. + */ + public COSDocument getDocument() + { + return document; + } + + /** + * This will get the document info dictionary. This is guaranteed to not return null. + * + * @return The documents /Info dictionary + */ + public PDDocumentInformation getDocumentInformation() + { + if( documentInformation == null ) + { + COSDictionary trailer = document.getTrailer(); + COSDictionary infoDic = (COSDictionary)trailer.getDictionaryObject( COSName.INFO ); + if( infoDic == null ) + { + infoDic = new COSDictionary(); + trailer.setItem( COSName.INFO, infoDic ); + } + documentInformation = new PDDocumentInformation( infoDic ); + } + return documentInformation; + } + + /** + * This will set the document information for this document. + * + * @param info The updated document information. + */ + public void setDocumentInformation( PDDocumentInformation info ) + { + documentInformation = info; + document.getTrailer().setItem( COSName.INFO, info.getDictionary() ); + } + + /** + * This will get the document CATALOG. This is guaranteed to not return null. + * + * @return The documents /Root dictionary + */ + public PDDocumentCatalog getDocumentCatalog() + { + if( documentCatalog == null ) + { + COSDictionary trailer = document.getTrailer(); + COSDictionary infoDic = (COSDictionary)trailer.getDictionaryObject( COSName.ROOT ); + if( infoDic == null ) + { + documentCatalog = new PDDocumentCatalog( this ); + } + else + { + documentCatalog = new PDDocumentCatalog( this, infoDic ); + } + + } + return documentCatalog; + } + + /** + * This will tell if this document is encrypted or not. + * + * @return true If this document is encrypted. + */ + public boolean isEncrypted() + { + return document.isEncrypted(); + } + + /** + * This will get the encryption dictionary for this document. This will still + * return the parameters if the document was decrypted. If the document was + * never encrypted then this will return null. As the encryption architecture + * in PDF documents is plugable this returns an abstract class, but the only + * supported subclass at this time is a PDStandardEncryption object. + * + * @return The encryption dictionary(most likely a PDStandardEncryption object) + * + * @throws IOException If there is an error determining which security handler to use. + */ + public PDEncryptionDictionary getEncryptionDictionary() throws IOException + { + if( encParameters == null ) + { + encParameters = PDEncryptionManager.getEncryptionDictionary( document.getEncryptionDictionary() ); + } + return encParameters; + } + + /** + * This will set the encryption dictionary for this document. + * + * @param encDictionary The encryption dictionary(most likely a PDStandardEncryption object) + * + * @throws IOException If there is an error determining which security handler to use. + */ + public void setEncryptionDictionary( PDEncryptionDictionary encDictionary ) throws IOException + { + encParameters = encDictionary; + } + + /** + * This will determine if this is the user password. This only applies when + * the document is encrypted and uses standard encryption. + * + * @param password The plain text user password. + * + * @return true If the password passed in matches the user password used to encrypt the document. + * + * @throws IOException If there is an error determining if it is the user password. + * @throws CryptographyException If there is an error in the encryption algorithms. + */ + public boolean isUserPassword( String password ) throws IOException, CryptographyException + { + boolean retval = false; + if( password == null ) + { + password = ""; + } + PDFEncryption encryptor = new PDFEncryption(); + PDEncryptionDictionary encryptionDictionary = getEncryptionDictionary(); + if( encryptionDictionary == null ) + { + throw new IOException( "Error: Document is not encrypted" ); + } + else + { + if( encryptionDictionary instanceof PDStandardEncryption ) + { + COSString documentID = (COSString)document.getDocumentID().get(0); + PDStandardEncryption standard = (PDStandardEncryption)encryptionDictionary; + retval = encryptor.isUserPassword( + password.getBytes(), + standard.getUserKey(), + standard.getOwnerKey(), + standard.getPermissions(), + documentID.getBytes(), + standard.getRevision(), + standard.getLength()/8 ); + } + else + { + throw new IOException( "Error: Encyption dictionary is not 'Standard'" + + encryptionDictionary.getClass().getName() ); + } + } + return retval; + } + + /** + * This will determine if this is the owner password. This only applies when + * the document is encrypted and uses standard encryption. + * + * @param password The plain text owner password. + * + * @return true If the password passed in matches the owner password used to encrypt the document. + * + * @throws IOException If there is an error determining if it is the user password. + * @throws CryptographyException If there is an error in the encryption algorithms. + */ + public boolean isOwnerPassword( String password ) throws IOException, CryptographyException + { + boolean retval = false; + if( password == null ) + { + password = ""; + } + PDFEncryption encryptor = new PDFEncryption(); + PDEncryptionDictionary encryptionDictionary = getEncryptionDictionary(); + if( encryptionDictionary == null ) + { + throw new IOException( "Error: Document is not encrypted" ); + } + else + { + if( encryptionDictionary instanceof PDStandardEncryption ) + { + COSString documentID = (COSString)document.getDocumentID().get( 0 ); + PDStandardEncryption standard = (PDStandardEncryption)encryptionDictionary; + retval = encryptor.isOwnerPassword( + password.getBytes(), + standard.getUserKey(), + standard.getOwnerKey(), + standard.getPermissions(), + documentID.getBytes(), + standard.getRevision(), + standard.getLength()/8 ); + } + else + { + throw new IOException( "Error: Encyption dictionary is not 'Standard'" + + encryptionDictionary.getClass().getName() ); + } + } + return retval; + } + + /** + * This will decrypt a document. + * + * @param password Either the user or owner password. + * + * @throws CryptographyException If there is an error decrypting the document. + * @throws IOException If there is an error getting the stream data. + * @throws InvalidPasswordException If the password is not a user or owner password. + */ + public void decrypt( String password ) throws CryptographyException, IOException, InvalidPasswordException + { + decryptedWithOwnerPassword = isOwnerPassword( password ); + DocumentEncryption decryptor = new DocumentEncryption( this ); + decryptor.decryptDocument( password ); + document.dereferenceObjectStreams(); + } + + /** + * This will tell if the document was decrypted with the master password. This + * entry is invalid if the PDF was not decrypted. + * + * @return true if the pdf was decrypted with the master password. + */ + public boolean wasDecryptedWithOwnerPassword() + { + return decryptedWithOwnerPassword; + } + + /** + * This will mark a document to be encrypted. The actual encryption + * will occur when the document is saved. + * + * @param ownerPassword The owner password to encrypt the document. + * @param userPassword The user password to encrypt the document. + * + * @throws CryptographyException If an error occurs during encryption. + * @throws IOException If there is an error accessing the data. + */ + public void encrypt( String ownerPassword, String userPassword ) + throws CryptographyException, IOException + { + encryptOnSave = true; + encryptOwnerPassword = ownerPassword; + encryptUserPassword = userPassword; + } + + + /** + * The owner password that was passed into the encrypt method. You should + * never use this method. This will not longer be valid once encryption + * has occured. + * + * @return The owner password passed to the encrypt method. + */ + public String getOwnerPasswordForEncryption() + { + return encryptOwnerPassword; + } + + /** + * The user password that was passed into the encrypt method. You should + * never use this method. This will not longer be valid once encryption + * has occured. + * + * @return The user password passed to the encrypt method. + */ + public String getUserPasswordForEncryption() + { + return encryptUserPassword; + } + + /** + * Internal method do determine if the document will be encrypted when it is saved. + * + * @return True if encrypt has been called and the document + * has not been saved yet. + */ + public boolean willEncryptWhenSaving() + { + return encryptOnSave; + } + + /** + * This shoule only be called by the COSWriter after encryption has completed. + * + */ + public void clearWillEncryptWhenSaving() + { + encryptOnSave = false; + } + + /** + * This will load a document from a file. + * + * @param filename The name of the file to load. + * + * @return The document that was loaded. + * + * @throws IOException If there is an error reading from the stream. + */ + public static PDDocument load( String filename ) throws IOException + { + return load( new BufferedInputStream( new FileInputStream( filename ) ) ); + } + + /** + * This will load a document from a file. + * + * @param file The name of the file to load. + * + * @return The document that was loaded. + * + * @throws IOException If there is an error reading from the stream. + */ + public static PDDocument load( File file ) throws IOException + { + return load( new BufferedInputStream( new FileInputStream( file ) ) ); + } + + /** + * This will load a document from an input stream. + * + * @param input The stream that contains the document. + * + * @return The document that was loaded. + * + * @throws IOException If there is an error reading from the stream. + */ + public static PDDocument load( InputStream input ) throws IOException + { + PDFParser parser = new PDFParser( input ); + parser.parse(); + return parser.getPDDocument(); + } + + /** + * This will save this document to the filesystem. + * + * @param fileName The file to save as. + * + * @throws IOException If there is an error saving the document. + * @throws COSVisitorException If an error occurs while generating the data. + */ + public void save( String fileName ) throws IOException, COSVisitorException + { + save( new FileOutputStream( fileName ) ); + } + + /** + * This will save the document to an output stream. + * + * @param output The stream to write to. + * + * @throws IOException If there is an error writing the document. + * @throws COSVisitorException If an error occurs while generating the data. + */ + public void save( OutputStream output ) throws IOException, COSVisitorException + { + //update the count in case any pages have been added behind the scenes. + getDocumentCatalog().getPages().updateCount(); + COSWriter writer = null; + try + { + writer = new COSWriter( output ); + writer.write( this ); + writer.close(); + } + finally + { + if( writer != null ) + { + writer.close(); + } + } + + } + + /** + * This will return the total page count of the PDF document. Note: This method + * is deprecated in favor of the getNumberOfPages method. The getNumberOfPages is + * a required interface method of the Pageable interface. This method will + * be removed in a future version of PDFBox!! + * + * @return The total number of pages in the PDF document. + * @deprecated Use the getNumberOfPages method instead! + */ + public int getPageCount() + { + return getNumberOfPages(); + } + + /** + * @see Pageable#getNumberOfPages() + */ + public int getNumberOfPages() + { + PDDocumentCatalog cat = getDocumentCatalog(); + return (int)cat.getPages().getCount(); + } + + /** + * @see Pageable#getPageFormat(int) + */ + public PageFormat getPageFormat(int pageIndex) + { + PDPage page = (PDPage)getDocumentCatalog().getAllPages().get( pageIndex ); + PDRectangle mediaBox = page.findMediaBox(); + PageFormat format = new PageFormat(); + Paper paper = new Paper(); + //hmm the imageable area might need to be the CropBox instead + //of the media box??? + paper.setImageableArea( 0,0,mediaBox.getWidth(),mediaBox.getHeight()); + paper.setSize( mediaBox.getWidth(), mediaBox.getHeight() ); + format.setPaper( paper ); + return format; + } + + /** + * @see Pageable#getPrintable(int) + */ + public Printable getPrintable(int pageIndex) + { + return (Printable)getDocumentCatalog().getAllPages().get( pageIndex ); + } + + /** + * This will send the PDF document to a printer. The printing functionality + * depends on the org.pdfbox.pdfviewer.PageDrawer functionality. The PageDrawer + * is a work in progress and some PDFs will print correctly and some will + * not. This is a convenience method to create the java.awt.print.PrinterJob. + * The PDDocument implements the java.awt.print.Pageable interface and + * PDPage implementes the java.awt.print.Printable interface, so advanced printing + * capabilities can be done by using those interfaces instead of this method. + * + * @throws PrinterException If there is an error while sending the PDF to + * the printer, or you do not have permissions to print this document. + */ + public void print() throws PrinterException + { + PDEncryptionDictionary encDictionary = null; + try + { + encDictionary = getEncryptionDictionary(); + } + catch( IOException io ) + { + throw new PrinterIOException( io ); + } + + //only care about standard encryption and if it was decrypted with the + //user password + if( encDictionary instanceof PDStandardEncryption && + !wasDecryptedWithOwnerPassword() ) + { + PDStandardEncryption stdEncryption = (PDStandardEncryption)encDictionary; + if( !stdEncryption.canPrint() ) + { + throw new PrinterException( "You do not have permission to print this document." ); + } + } + + PrinterJob printJob = PrinterJob.getPrinterJob(); + printJob.setPageable(this); + if( printJob.printDialog() ) + { + printJob.print(); + } + } + + /** + * This will close the underlying COSDocument object. + * + * @throws IOException If there is an error releasing resources. + */ + public void close() throws IOException + { + document.close(); + } +} \ No newline at end of file -- cgit v1.2.3