/** * Copyright (c) 2003-2004, www.pdfbox.org * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * 3. Neither the name of pdfbox; nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * http://www.pdfbox.org * */ package org.pdfbox.cos; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.pdfbox.exceptions.COSVisitorException; import org.pdfbox.pdfparser.PDFObjectStreamParser; import org.pdfbox.persistence.util.COSObjectKey; /** * This is the in-memory representation of the PDF document. You need to call * close() on this object when you are done using it!! * * @author Ben Litchfield (ben@csh.rit.edu) * @version $Revision: 1.26 $ */ public class COSDocument extends COSBase { private float version; /** * added objects (actually preserving original sequence). */ private List objects = new ArrayList(); /** * a pool of objects read/referenced so far * used to resolve indirect object references. */ private Map objectPool = new HashMap(); /** * Document trailer dictionary. */ private COSDictionary trailer; /** * This file will store the streams in order to conserve memory. */ private RandomAccessFile scratchFile = null; private File tmpFile = null; private String headerString = "%PDF-1.4"; /** * Constructor. Uses the java.io.tmpdir value to create a file * to store the streams. * * @throws IOException If there is an error creating the tmp file. */ public COSDocument() throws IOException { this( new File( System.getProperty( "java.io.tmpdir" ) ) ); } /** * Constructor that will create a create a scratch file in the * following directory. * * @param scratchDir The directory to store a scratch file. * * @throws IOException If there is an error creating the tmp file. */ public COSDocument( File scratchDir ) throws IOException { tmpFile = File.createTempFile( "pdfbox", "tmp", scratchDir ); scratchFile = new RandomAccessFile( tmpFile, "rw" ); } /** * Constructor that will use the following random access file for storage * of the PDF streams. The client of this method is responsible for deleting * the storage if necessary that this file will write to. The close method * will close the file though. * * @param file The random access file to use for storage. */ public COSDocument( RandomAccessFile file ) { scratchFile = file; } /** * This will get the scratch file for this document. * * @return The scratch file. */ public RandomAccessFile getScratchFile() { return scratchFile; } /** * This will get the first dictionary object by type. * * @param type The type of the object. * * @return This will return an object with the specified type. */ public COSObject getObjectByType( String type ) { return getObjectByType( COSName.getPDFName( type ) ); } /** * This will get the first dictionary object by type. * * @param type The type of the object. * * @return This will return an object with the specified type. */ public COSObject getObjectByType( COSName type ) { COSObject retval = null; Iterator iter = objects.iterator(); while( iter.hasNext() && retval == null) { COSObject object = (COSObject)iter.next(); COSBase realObject = object.getObject(); if( realObject instanceof COSDictionary ) { COSDictionary dic = (COSDictionary)realObject; COSName objectType = (COSName)dic.getItem( COSName.TYPE ); if( objectType != null && objectType.equals( type ) ) { retval = object; } } } return retval; } /** * This will get all dictionary objects by type. * * @param type The type of the object. * * @return This will return an object with the specified type. */ public List getObjectsByType( String type ) { return getObjectsByType( COSName.getPDFName( type ) ); } /** * This will get a dictionary object by type. * * @param type The type of the object. * * @return This will return an object with the specified type. */ public List getObjectsByType( COSName type ) { List retval = new ArrayList(); Iterator iter = objects.iterator(); while( iter.hasNext() ) { COSObject object = (COSObject)iter.next(); COSBase realObject = object.getObject(); if( realObject instanceof COSStream ) { realObject = (COSStream)realObject; } if( realObject instanceof COSDictionary ) { COSDictionary dic = (COSDictionary)realObject; COSName objectType = (COSName)dic.getItem( COSName.TYPE ); if( objectType != null && objectType.equals( type ) ) { retval.add( object ); } } } return retval; } /** * This will print contents to stdout. */ public void print() { Iterator iter = objects.iterator(); while( iter.hasNext() ) { COSObject object = (COSObject)iter.next(); System.out.println( object); } } /** * This will set the version of this PDF document. * * @param versionValue The version of the PDF document. */ public void setVersion( float versionValue ) { version = versionValue; } /** * This will get the version of this PDF document. * * @return This documents version. */ public float getVersion() { return version; } /** * This will tell if this is an encrypted document. * * @return true If this document is encrypted. */ public boolean isEncrypted() { boolean encrypted = false; if( trailer != null ) { encrypted = trailer.getDictionaryObject( "Encrypt" ) != null; } return encrypted; } /** * This will get the encryption dictionary if the document is encrypted or null * if the document is not encrypted. * * @return The encryption dictionary. */ public COSDictionary getEncryptionDictionary() { return (COSDictionary)trailer.getDictionaryObject( COSName.getPDFName( "Encrypt" ) ); } /** * This will set the encryption dictionary, this should only be called when * encypting the document. * * @param encDictionary The encryption dictionary. */ public void setEncryptionDictionary( COSDictionary encDictionary ) { trailer.setItem( COSName.getPDFName( "Encrypt" ), encDictionary ); } /** * This will get the document ID. * * @return The document id. */ public COSArray getDocumentID() { return (COSArray) getTrailer().getItem(COSName.getPDFName("ID")); } /** * This will set the document ID. * * @param id The document id. */ public void setDocumentID( COSArray id ) { getTrailer().setItem(COSName.getPDFName("ID"), id); } /** * This will create an object for this document. * * Create an indirect object out of the direct type and include in the document * for later lookup via document a map from direct object to indirect object * is maintained. this provides better support for manual PDF construction. * * @param base the base object to wrap in an indirect object. * * @return The pdf object that wraps the base, or creates a new one. */ /** public COSObject createObject( COSBase base ) { COSObject obj = (COSObject)objectMap.get(base); if (obj == null) { obj = new COSObject( base ); obj.addTo(this); } return obj; }**/ /** * This will get the document catalog. * * Maybe this should move to an object at PDFEdit level * * @return catalog is the root of all document activities * * @throws IOException If no catalog can be found. */ public COSObject getCatalog() throws IOException { COSObject catalog = (COSObject)getObjectByType( COSName.CATALOG ); if( catalog == null ) { throw new IOException( "Catalog cannot be found" ); } return catalog; } /** * This will get a list of all available objects. * * @return A list of all objects. */ public List getObjects() { return new ArrayList(objects); } /** * This will get the document trailer. * * @return the document trailer dict */ public COSDictionary getTrailer() { return trailer; } /** * // MIT added, maybe this should not be supported as trailer is a persistence construct. * This will set the document trailer. * * @param newTrailer the document trailer dictionary */ public void setTrailer(COSDictionary newTrailer) { trailer = newTrailer; } /** * visitor pattern double dispatch method. * * @param visitor The object to notify when visiting this object. * @return any object, depending on the visitor implementation, or null * @throws COSVisitorException If an error occurs while visiting this object. */ public Object accept(ICOSVisitor visitor) throws COSVisitorException { return visitor.visitFromDocument( this ); } /** * This will close all storage and delete the tmp files. * * @throws IOException If there is an error close resources. */ public void close() throws IOException { if( scratchFile != null ) { scratchFile.close(); scratchFile = null; } if( tmpFile != null ) { tmpFile.delete(); tmpFile = null; } } /** * The sole purpose of this is to inform a client of PDFBox that they * did not close the document. */ protected void finalize() { if( tmpFile != null || scratchFile != null ) { Throwable t = new Throwable( "Warning: You did not close the PDF Document" ); t.printStackTrace(); } } /** * @return Returns the headerString. */ public String getHeaderString() { return headerString; } /** * @param header The headerString to set. */ public void setHeaderString(String header) { headerString = header; } /** * This method will search the list of objects for types of ObjStm. If it finds * them then it will parse out all of the objects from the stream that is contains. * * @throws IOException If there is an error parsing the stream. */ public void dereferenceObjectStreams() throws IOException { Iterator objStm = getObjectsByType( "ObjStm" ).iterator(); while( objStm.hasNext() ) { COSObject objStream = (COSObject)objStm.next(); COSStream stream = (COSStream)objStream.getObject(); PDFObjectStreamParser parser = new PDFObjectStreamParser( stream, this ); parser.parse(); Iterator compressedObjects = parser.getObjects().iterator(); while( compressedObjects.hasNext() ) { COSObject next = (COSObject)compressedObjects.next(); COSObjectKey key = new COSObjectKey( next ); COSObject obj = getObjectFromPool( key ); obj.setObject( next.getObject() ); } } } /** * This will add an object to this document. * the method checks if obj is already present as there may be cyclic dependencies * * @param obj The object to add to the document. * @return The object that was actually added to this document, if an object reference already * existed then that will be returned. * * @throws IOException If there is an error adding the object. */ public COSObject addObject(COSObject obj) throws IOException { COSObjectKey key = null; if( obj.getObjectNumber() != null ) { key = new COSObjectKey( obj ); } COSObject fromPool = getObjectFromPool( key ); fromPool.setObject( obj.getObject() ); return fromPool; } /** * This will get an object from the pool. * * @param key The object key. * * @return The object in the pool or a new one if it has not been parsed yet. * * @throws IOException If there is an error getting the proxy object. */ public COSObject getObjectFromPool(COSObjectKey key) throws IOException { COSObject obj = null; if( key != null ) { obj = (COSObject) objectPool.get(key); } if (obj == null) { // this was a forward reference, make "proxy" object obj = new COSObject(null); if( key != null ) { obj.setObjectNumber( new COSInteger( key.getNumber() ) ); obj.setGenerationNumber( new COSInteger( key.getGeneration() ) ); objectPool.put(key, obj); } objects.add( obj ); } return obj; } }