From 6025b6016517c6d898d8957d1d7e03ba71431912 Mon Sep 17 00:00:00 2001 From: tknall Date: Fri, 1 Dec 2006 12:20:24 +0000 Subject: Initial import of release 2.2. git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@4 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- src/main/java/org/pdfbox/util/PDFStreamEngine.java | 622 +++++++++++++++++++++ 1 file changed, 622 insertions(+) create mode 100644 src/main/java/org/pdfbox/util/PDFStreamEngine.java (limited to 'src/main/java/org/pdfbox/util/PDFStreamEngine.java') diff --git a/src/main/java/org/pdfbox/util/PDFStreamEngine.java b/src/main/java/org/pdfbox/util/PDFStreamEngine.java new file mode 100644 index 0000000..1e05f8a --- /dev/null +++ b/src/main/java/org/pdfbox/util/PDFStreamEngine.java @@ -0,0 +1,622 @@ +/** + * Copyright (c) 2003-2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util; + +import java.io.IOException; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Stack; + +import org.pdfbox.cos.COSObject; +import org.pdfbox.cos.COSStream; +import org.pdfbox.exceptions.WrappedIOException; + +import org.pdfbox.pdmodel.PDPage; +import org.pdfbox.pdmodel.PDResources; + +import org.pdfbox.pdmodel.font.PDFont; + +import org.pdfbox.pdmodel.graphics.PDGraphicsState; + +import org.pdfbox.util.operator.OperatorProcessor; + +import org.apache.log4j.Logger; + +/** + * This class will run through a PDF content stream and execute certain operations + * and provide a callback interface for clients that want to do things with the stream. + * See the PDFTextStripper class for an example of how to use this class. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.29 $ + */ +public class PDFStreamEngine +{ + private static Logger log = Logger.getLogger(PDFStreamEngine.class); + + static protected final byte[] SPACE_BYTES = { (byte)32 }; + + private PDGraphicsState graphicsState = null; + + protected Matrix textMatrix = null; + protected Matrix textLineMatrix = null; + protected Stack graphicsStack = new Stack(); + //private PDResources resources = null; + + protected Map operators = new HashMap(); + + protected Map fontToAverageWidths = new HashMap(); + + protected Stack streamResourcesStack = new Stack(); + + protected PDPage page; + + /** + * This is a simple internal class used by the Stream engine to handle the + * resources stack. + */ + protected static class StreamResources + { + protected Map fonts; + protected Map colorSpaces; + protected Map xobjects; + protected Map graphicsStates; + protected PDResources resources; + } + + /** + * Constructor. + */ + public PDFStreamEngine() + { + //default constructor + } + + /** + * Constructor with engine properties. The property keys are all + * PDF operators, the values are class names used to execute those + * operators. + * + * @param properties The engine properties. + * + * @throws IOException If there is an error setting the engine properties. + */ + public PDFStreamEngine( Properties properties ) throws IOException + { + try + { + Iterator keys = properties.keySet().iterator(); + while( keys.hasNext() ) + { + String operator = (String)keys.next(); + String operatorClass = properties.getProperty( operator ); + if( log.isDebugEnabled() ) + { + log.debug( "Operator Class: " + operator + "=" + operatorClass ); + } + OperatorProcessor op = (OperatorProcessor)Class.forName( operatorClass ).newInstance(); + op.setContext( this ); + operators.put( operator, op ); + } + } + catch( Exception e ) + { + throw new WrappedIOException( e ); + } + } + + /** + * This will process the contents of the stream. + * + * @param aPage The page. + * @param resources The location to retrieve resources. + * @param cosStream the Stream to execute. + * + * + * @throws IOException if there is an error accessing the stream. + */ + public void processStream( PDPage aPage, PDResources resources, COSStream cosStream ) throws IOException + { + graphicsState = new PDGraphicsState(); + textMatrix = null; + textLineMatrix = null; + graphicsStack.clear(); + streamResourcesStack.clear(); + fontToAverageWidths.clear(); + + processSubStream( aPage, resources, cosStream ); + } + + /** + * Process a sub stream of the current stream. + * + * @param aPage The page used for drawing. + * @param resources The resources used when processing the stream. + * @param cosStream The stream to process. + * + * @throws IOException If there is an exception while processing the stream. + */ + public void processSubStream( PDPage aPage, PDResources resources, COSStream cosStream ) throws IOException + { + page = aPage; + if( resources != null ) + { + StreamResources sr = new StreamResources(); + sr.fonts = resources.getFonts(); + sr.colorSpaces = resources.getColorSpaces(); + sr.xobjects = resources.getXObjects(); + sr.graphicsStates = resources.getGraphicsStates(); + sr.resources = resources; + streamResourcesStack.push(sr); + } + try + { + List arguments = new ArrayList(); + long startTokens = System.currentTimeMillis(); + List tokens = cosStream.getStreamTokens(); + long stopTokens = System.currentTimeMillis(); + if( log.isDebugEnabled() ) + { + log.debug( "Getting tokens time=" + (stopTokens-startTokens) ); + } + if( tokens != null ) + { + Iterator iter = tokens.iterator(); + while( iter.hasNext() ) + { + Object next = iter.next(); + if( next instanceof COSObject ) + { + arguments.add( ((COSObject)next).getObject() ); + } + else if( next instanceof PDFOperator ) + { + processOperator( (PDFOperator)next, arguments ); + arguments = new ArrayList(); + } + else + { + arguments.add( next ); + } + } + } + } + finally + { + if( resources != null ) + { + streamResourcesStack.pop(); + } + } + + } + + /** + * A method provided as an event interface to allow a subclass to perform + * some specific functionality when a character needs to be displayed. + * + * @param text The character to be displayed. + */ + protected void showCharacter( TextPosition text ) + { + //subclasses can override to provide specific functionality. + } + + /** + * You should override this method if you want to perform an action when a + * string is being shown. + * + * @param string The string to display. + * + * @throws IOException If there is an error showing the string + */ + public void showString( byte[] string ) throws IOException + { + float spaceWidth = 0; + float spacing = 0; + StringBuffer stringResult = new StringBuffer(string.length); + + float characterDisplacement = 0; + float spaceDisplacement = 0; + float fontSize = graphicsState.getTextState().getFontSize(); + float horizontalScaling = graphicsState.getTextState().getHorizontalScalingPercent()/100f; + float rise = graphicsState.getTextState().getRise(); + final float wordSpacing = graphicsState.getTextState().getWordSpacing(); + final float characterSpacing = graphicsState.getTextState().getCharacterSpacing(); + float wordSpacingDisplacement = 0; + + PDFont font = graphicsState.getTextState().getFont(); + + //This will typically be 1000 but in the case of a type3 font + //this might be a different number + float glyphSpaceToTextSpaceFactor = 1f/font.getFontMatrix().getValue( 0, 0 ); + Float averageWidth = (Float)fontToAverageWidths.get( font ); + if( averageWidth == null ) + { + averageWidth = new Float( font.getAverageFontWidth() ); + fontToAverageWidths.put( font, averageWidth ); + } + + Matrix initialMatrix = new Matrix(); + initialMatrix.setValue(0,0,1); + initialMatrix.setValue(0,1,0); + initialMatrix.setValue(0,2,0); + initialMatrix.setValue(1,0,0); + initialMatrix.setValue(1,1,1); + initialMatrix.setValue(1,2,0); + initialMatrix.setValue(2,0,0); + initialMatrix.setValue(2,1,rise); + initialMatrix.setValue(2,2,1); + + + //this + int codeLength = 1; + Matrix ctm = graphicsState.getCurrentTransformationMatrix(); + + //lets see what the space displacement should be + spaceDisplacement = (font.getFontWidth( SPACE_BYTES, 0, 1 )/glyphSpaceToTextSpaceFactor); + if( spaceDisplacement == 0 ) + { + spaceDisplacement = (averageWidth.floatValue()/glyphSpaceToTextSpaceFactor); + //The average space width appears to be higher than necessary + //so lets make it a little bit smaller. + spaceDisplacement *= .80f; + if( log.isDebugEnabled() ) + { + log.debug( "Font: Space From Average=" + spaceDisplacement ); + } + } + int pageRotation = page.findRotation(); + Matrix trm = initialMatrix.multiply( textMatrix ).multiply( ctm ); + float x = trm.getValue(2,0); + float y = trm.getValue(2,1); + if( pageRotation == 0 ) + { + trm.setValue( 2,1, -y + page.findMediaBox().getHeight() ); + } + else if( pageRotation == 90 ) + { + trm.setValue( 2,0, y ); + trm.setValue( 2,1, x ); + } + else if( pageRotation == 270 ) + { + trm.setValue( 2,0, -y + page.findMediaBox().getHeight() ); + trm.setValue( 2,1, x ); + } + for( int i=0; i