From 6025b6016517c6d898d8957d1d7e03ba71431912 Mon Sep 17 00:00:00 2001 From: tknall Date: Fri, 1 Dec 2006 12:20:24 +0000 Subject: Initial import of release 2.2. git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@4 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- src/main/java/org/pdfbox/util/BitFlagHelper.java | 85 ++ src/main/java/org/pdfbox/util/BoundingBox.java | 188 ++++ src/main/java/org/pdfbox/util/DateConverter.java | 281 ++++++ .../java/org/pdfbox/util/DefaultFileFilter.java | 285 ++++++ src/main/java/org/pdfbox/util/ErrorLogger.java | 72 ++ src/main/java/org/pdfbox/util/ImageParameters.java | 234 +++++ src/main/java/org/pdfbox/util/Matrix.java | 350 +++++++ src/main/java/org/pdfbox/util/PDFHighlighter.java | 213 ++++ src/main/java/org/pdfbox/util/PDFOperator.java | 153 +++ src/main/java/org/pdfbox/util/PDFStreamEngine.java | 622 ++++++++++++ src/main/java/org/pdfbox/util/PDFText2HTML.java | 271 +++++ src/main/java/org/pdfbox/util/PDFTextStripper.java | 1033 ++++++++++++++++++++ .../org/pdfbox/util/PDFTextStripperByArea.java | 165 ++++ src/main/java/org/pdfbox/util/ResourceLoader.java | 169 ++++ .../java/org/pdfbox/util/SimpleConfigurator.java | 68 ++ src/main/java/org/pdfbox/util/Splitter.java | 201 ++++ src/main/java/org/pdfbox/util/TextPosition.java | 203 ++++ .../org/pdfbox/util/TextPositionComparator.java | 126 +++ src/main/java/org/pdfbox/util/XMLUtil.java | 103 ++ .../java/org/pdfbox/util/operator/BeginText.java | 68 ++ .../java/org/pdfbox/util/operator/Concatenate.java | 106 ++ .../java/org/pdfbox/util/operator/EndText.java | 67 ++ .../java/org/pdfbox/util/operator/GRestore.java | 67 ++ src/main/java/org/pdfbox/util/operator/GSave.java | 66 ++ src/main/java/org/pdfbox/util/operator/Invoke.java | 113 +++ .../java/org/pdfbox/util/operator/MoveAndShow.java | 75 ++ .../java/org/pdfbox/util/operator/MoveText.java | 76 ++ .../pdfbox/util/operator/MoveTextSetLeading.java | 80 ++ .../java/org/pdfbox/util/operator/NextLine.java | 82 ++ .../pdfbox/util/operator/OperatorProcessor.java | 93 ++ .../org/pdfbox/util/operator/SetCharSpacing.java | 79 ++ .../util/operator/SetGraphicsStateParameters.java | 72 ++ .../util/operator/SetHorizontalTextScaling.java | 64 ++ .../org/pdfbox/util/operator/SetLineWidth.java | 64 ++ .../java/org/pdfbox/util/operator/SetMatrix.java | 90 ++ .../org/pdfbox/util/operator/SetMoveAndShow.java | 80 ++ .../util/operator/SetNonStrokingCMYKColor.java | 69 ++ .../util/operator/SetNonStrokingColorSpace.java | 86 ++ .../util/operator/SetNonStrokingRGBColor.java | 69 ++ .../pdfbox/util/operator/SetStrokingCMYKColor.java | 71 ++ .../util/operator/SetStrokingColorSpace.java | 89 ++ .../pdfbox/util/operator/SetStrokingRGBColor.java | 71 ++ .../java/org/pdfbox/util/operator/SetTextFont.java | 96 ++ .../org/pdfbox/util/operator/SetTextLeading.java | 69 ++ .../pdfbox/util/operator/SetTextRenderingMode.java | 64 ++ .../java/org/pdfbox/util/operator/SetTextRise.java | 64 ++ .../org/pdfbox/util/operator/SetWordSpacing.java | 68 ++ .../java/org/pdfbox/util/operator/ShowText.java | 73 ++ .../org/pdfbox/util/operator/ShowTextGlyph.java | 99 ++ .../java/org/pdfbox/util/operator/package.html | 9 + .../operator/pagedrawer/AppendRectangleToPath.java | 77 ++ .../util/operator/pagedrawer/BeginInlineImage.java | 114 +++ .../pdfbox/util/operator/pagedrawer/ClosePath.java | 59 ++ .../pdfbox/util/operator/pagedrawer/CurveTo.java | 73 ++ .../pagedrawer/CurveToReplicateFinalPoint.java | 69 ++ .../pagedrawer/CurveToReplicateInitialPoint.java | 76 ++ .../util/operator/pagedrawer/FillEvenOddRule.java | 71 ++ .../util/operator/pagedrawer/FillNonZeroRule.java | 71 ++ .../pdfbox/util/operator/pagedrawer/Invoke.java | 180 ++++ .../pdfbox/util/operator/pagedrawer/LineTo.java | 65 ++ .../pdfbox/util/operator/pagedrawer/MoveTo.java | 68 ++ .../util/operator/pagedrawer/SetLineWidth.java | 65 ++ .../pagedrawer/SetNonStrokingCMYKColor.java | 64 ++ .../pagedrawer/SetNonStrokingColorSpace.java | 71 ++ .../pagedrawer/SetNonStrokingRGBColor.java | 65 ++ .../operator/pagedrawer/SetStrokingCMYKColor.java | 64 ++ .../operator/pagedrawer/SetStrokingColorSpace.java | 70 ++ .../operator/pagedrawer/SetStrokingRGBColor.java | 65 ++ .../util/operator/pagedrawer/StrokePath.java | 73 ++ .../pdfbox/util/operator/pagedrawer/package.html | 9 + src/main/java/org/pdfbox/util/package.html | 9 + 71 files changed, 8639 insertions(+) create mode 100644 src/main/java/org/pdfbox/util/BitFlagHelper.java create mode 100644 src/main/java/org/pdfbox/util/BoundingBox.java create mode 100644 src/main/java/org/pdfbox/util/DateConverter.java create mode 100644 src/main/java/org/pdfbox/util/DefaultFileFilter.java create mode 100644 src/main/java/org/pdfbox/util/ErrorLogger.java create mode 100644 src/main/java/org/pdfbox/util/ImageParameters.java create mode 100644 src/main/java/org/pdfbox/util/Matrix.java create mode 100644 src/main/java/org/pdfbox/util/PDFHighlighter.java create mode 100644 src/main/java/org/pdfbox/util/PDFOperator.java create mode 100644 src/main/java/org/pdfbox/util/PDFStreamEngine.java create mode 100644 src/main/java/org/pdfbox/util/PDFText2HTML.java create mode 100644 src/main/java/org/pdfbox/util/PDFTextStripper.java create mode 100644 src/main/java/org/pdfbox/util/PDFTextStripperByArea.java create mode 100644 src/main/java/org/pdfbox/util/ResourceLoader.java create mode 100644 src/main/java/org/pdfbox/util/SimpleConfigurator.java create mode 100644 src/main/java/org/pdfbox/util/Splitter.java create mode 100644 src/main/java/org/pdfbox/util/TextPosition.java create mode 100644 src/main/java/org/pdfbox/util/TextPositionComparator.java create mode 100644 src/main/java/org/pdfbox/util/XMLUtil.java create mode 100644 src/main/java/org/pdfbox/util/operator/BeginText.java create mode 100644 src/main/java/org/pdfbox/util/operator/Concatenate.java create mode 100644 src/main/java/org/pdfbox/util/operator/EndText.java create mode 100644 src/main/java/org/pdfbox/util/operator/GRestore.java create mode 100644 src/main/java/org/pdfbox/util/operator/GSave.java create mode 100644 src/main/java/org/pdfbox/util/operator/Invoke.java create mode 100644 src/main/java/org/pdfbox/util/operator/MoveAndShow.java create mode 100644 src/main/java/org/pdfbox/util/operator/MoveText.java create mode 100644 src/main/java/org/pdfbox/util/operator/MoveTextSetLeading.java create mode 100644 src/main/java/org/pdfbox/util/operator/NextLine.java create mode 100644 src/main/java/org/pdfbox/util/operator/OperatorProcessor.java create mode 100644 src/main/java/org/pdfbox/util/operator/SetCharSpacing.java create mode 100644 src/main/java/org/pdfbox/util/operator/SetGraphicsStateParameters.java create mode 100644 src/main/java/org/pdfbox/util/operator/SetHorizontalTextScaling.java create mode 100644 src/main/java/org/pdfbox/util/operator/SetLineWidth.java create mode 100644 src/main/java/org/pdfbox/util/operator/SetMatrix.java create mode 100644 src/main/java/org/pdfbox/util/operator/SetMoveAndShow.java create mode 100644 src/main/java/org/pdfbox/util/operator/SetNonStrokingCMYKColor.java create mode 100644 src/main/java/org/pdfbox/util/operator/SetNonStrokingColorSpace.java create mode 100644 src/main/java/org/pdfbox/util/operator/SetNonStrokingRGBColor.java create mode 100644 src/main/java/org/pdfbox/util/operator/SetStrokingCMYKColor.java create mode 100644 src/main/java/org/pdfbox/util/operator/SetStrokingColorSpace.java create mode 100644 src/main/java/org/pdfbox/util/operator/SetStrokingRGBColor.java create mode 100644 src/main/java/org/pdfbox/util/operator/SetTextFont.java create mode 100644 src/main/java/org/pdfbox/util/operator/SetTextLeading.java create mode 100644 src/main/java/org/pdfbox/util/operator/SetTextRenderingMode.java create mode 100644 src/main/java/org/pdfbox/util/operator/SetTextRise.java create mode 100644 src/main/java/org/pdfbox/util/operator/SetWordSpacing.java create mode 100644 src/main/java/org/pdfbox/util/operator/ShowText.java create mode 100644 src/main/java/org/pdfbox/util/operator/ShowTextGlyph.java create mode 100644 src/main/java/org/pdfbox/util/operator/package.html create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/AppendRectangleToPath.java create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/BeginInlineImage.java create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/ClosePath.java create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/CurveTo.java create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/CurveToReplicateFinalPoint.java create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/CurveToReplicateInitialPoint.java create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/FillEvenOddRule.java create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/FillNonZeroRule.java create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/Invoke.java create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/LineTo.java create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/MoveTo.java create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/SetLineWidth.java create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/SetNonStrokingCMYKColor.java create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/SetNonStrokingColorSpace.java create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/SetNonStrokingRGBColor.java create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/SetStrokingCMYKColor.java create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/SetStrokingColorSpace.java create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/SetStrokingRGBColor.java create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/StrokePath.java create mode 100644 src/main/java/org/pdfbox/util/operator/pagedrawer/package.html create mode 100644 src/main/java/org/pdfbox/util/package.html (limited to 'src/main/java/org/pdfbox/util') diff --git a/src/main/java/org/pdfbox/util/BitFlagHelper.java b/src/main/java/org/pdfbox/util/BitFlagHelper.java new file mode 100644 index 0000000..058f733 --- /dev/null +++ b/src/main/java/org/pdfbox/util/BitFlagHelper.java @@ -0,0 +1,85 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util; + +import org.pdfbox.cos.COSDictionary; + +/** + * This class will be used for bit flag operations. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.2 $ + */ +public class BitFlagHelper +{ + private BitFlagHelper() + { + //helper class should not be constructed + } + + /** + * Sets the given boolean value at bitPos in the flags. + * + * @param dic The dictionary to set the value into. + * @param field The name of the field to set the value into. + * @param bitFlag the bit position to set the value in. + * @param value the value the bit position should have. + */ + public static void setFlag( COSDictionary dic, String field, int bitFlag, boolean value ) + { + int currentFlags = dic.getInt( field, 0 ); + if( value ) + { + currentFlags = currentFlags | bitFlag; + } + else + { + currentFlags = currentFlags &= ~bitFlag; + } + dic.setInt( field, currentFlags ); + } + + /** + * Gets the boolean value from the flags at the given bit + * position. + * + * @param dic The dictionary to get the field from. + * @param field The name of the field to get the flag from. + * @param bitFlag the bitPosition to get the value from. + * + * @return true if the number at bitPos is '1' + */ + public static boolean getFlag(COSDictionary dic, String field, int bitFlag) + { + int ff = dic.getInt( field, 0 ); + return (ff & bitFlag) == bitFlag; + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/util/BoundingBox.java b/src/main/java/org/pdfbox/util/BoundingBox.java new file mode 100644 index 0000000..aaea354 --- /dev/null +++ b/src/main/java/org/pdfbox/util/BoundingBox.java @@ -0,0 +1,188 @@ +/** + * Copyright (c) 2003, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util; + +import java.awt.Point; + +/** + * This is an implementation of a bounding box. This was originally written for the + * AMF parser. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.6 $ + */ +public class BoundingBox +{ + private float lowerLeftX; + private float lowerLeftY; + private float upperRightX; + private float upperRightY; + + /** + * Getter for property lowerLeftX. + * + * @return Value of property lowerLeftX. + */ + public float getLowerLeftX() + { + return lowerLeftX; + } + + /** + * Setter for property lowerLeftX. + * + * @param lowerLeftXValue New value of property lowerLeftX. + */ + public void setLowerLeftX(float lowerLeftXValue) + { + this.lowerLeftX = lowerLeftXValue; + } + + /** + * Getter for property lowerLeftY. + * + * @return Value of property lowerLeftY. + */ + public float getLowerLeftY() + { + return lowerLeftY; + } + + /** + * Setter for property lowerLeftY. + * + * @param lowerLeftYValue New value of property lowerLeftY. + */ + public void setLowerLeftY(float lowerLeftYValue) + { + this.lowerLeftY = lowerLeftYValue; + } + + /** + * Getter for property upperRightX. + * + * @return Value of property upperRightX. + */ + public float getUpperRightX() + { + return upperRightX; + } + + /** + * Setter for property upperRightX. + * + * @param upperRightXValue New value of property upperRightX. + */ + public void setUpperRightX(float upperRightXValue) + { + this.upperRightX = upperRightXValue; + } + + /** + * Getter for property upperRightY. + * + * @return Value of property upperRightY. + */ + public float getUpperRightY() + { + return upperRightY; + } + + /** + * Setter for property upperRightY. + * + * @param upperRightYValue New value of property upperRightY. + */ + public void setUpperRightY(float upperRightYValue) + { + this.upperRightY = upperRightYValue; + } + + /** + * This will get the width of this rectangle as calculated by + * upperRightX - lowerLeftX. + * + * @return The width of this rectangle. + */ + public float getWidth() + { + return getUpperRightX() - getLowerLeftX(); + } + + /** + * This will get the height of this rectangle as calculated by + * upperRightY - lowerLeftY. + * + * @return The height of this rectangle. + */ + public float getHeight() + { + return getUpperRightY() - getLowerLeftY(); + } + + /** + * Checks if a point is inside this rectangle. + * + * @param x The x coordinate. + * @param y The y coordinate. + * + * @return true If the point is on the edge or inside the rectangle bounds. + */ + public boolean contains( float x, float y ) + { + return x >= lowerLeftX && x <= upperRightX && + y >= lowerLeftY && y <= upperRightY; + } + + /** + * Checks if a point is inside this rectangle. + * + * @param point The point to check + * + * @return true If the point is on the edge or inside the rectangle bounds. + */ + public boolean contains( Point point ) + { + return contains( (float)point.getX(), (float)point.getY() ); + } + + /** + * This will return a string representation of this rectangle. + * + * @return This object as a string. + */ + public String toString() + { + return "[" + getLowerLeftX() + "," + getLowerLeftY() + "," + + getUpperRightX() + "," + getUpperRightY() +"]"; + } + +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/util/DateConverter.java b/src/main/java/org/pdfbox/util/DateConverter.java new file mode 100644 index 0000000..43d3566 --- /dev/null +++ b/src/main/java/org/pdfbox/util/DateConverter.java @@ -0,0 +1,281 @@ +/** + * Copyright (c) 2003-2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util; + +import java.text.ParseException; +import java.text.SimpleDateFormat; + +import java.io.IOException; + +import java.util.Calendar; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.SimpleTimeZone; +import java.util.TimeZone; + +import org.pdfbox.cos.COSString; + +/** + * This class is used to convert dates to strings and back using the PDF + * date standards. Date are described in PDFReference1.4 section 3.8.2 + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.9 $ + */ +public class DateConverter +{ + private static final SimpleDateFormat PDF_DATE_FORMAT = new SimpleDateFormat( "yyyyMMddHHmmss" ); + + //The Date format is supposed to be the PDF_DATE_FORMAT, but not all PDF documents + //will use that date, so I have added a couple other potential formats + //to try if the original one does not work. + private static final SimpleDateFormat[] POTENTIAL_FORMATS = new SimpleDateFormat[] { + new SimpleDateFormat("EEEE, dd MMM yyyy hh:mm:ss a"), + new SimpleDateFormat("EEEE, MMM dd, yyyy hh:mm:ss a")}; + + private static final SimpleDateFormat ISO_8601_DATE_FORMAT = new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss" ); + + private DateConverter() + { + //utility class should not be constructed. + } + + /** + * This will convert the calendar to a string. + * + * @param date The date to convert to a string. + * + * @return The date as a String to be used in a PDF document. + */ + public static String toString( Calendar date ) + { + String retval = null; + if( date != null ) + { + StringBuffer buffer = new StringBuffer(); + TimeZone zone = date.getTimeZone(); + long offsetInMinutes = zone.getOffset( date.getTimeInMillis() )/1000/60; + long hours = Math.abs( offsetInMinutes/60 ); + long minutes = Math.abs( offsetInMinutes%60 ); + buffer.append( "D:" ); + buffer.append( PDF_DATE_FORMAT.format( date.getTime() ) ); + if( offsetInMinutes == 0 ) + { + buffer.append( "Z" ); + } + else if( offsetInMinutes < 0 ) + { + buffer.append( "-" ); + } + else + { + buffer.append( "+" ); + } + if( hours < 10 ) + { + buffer.append( "0" ); + } + buffer.append( hours ); + buffer.append( "'" ); + if( minutes < 10 ) + { + buffer.append( "0" ); + } + buffer.append( minutes ); + buffer.append( "'" ); + retval = buffer.toString(); + + } + return retval; + } + + /** + * This will convert a string to a calendar. + * + * @param date The string representation of the calendar. + * + * @return The calendar that this string represents. + * + * @throws IOException If the date string is not in the correct format. + */ + public static Calendar toCalendar( COSString date ) throws IOException + { + Calendar retval = null; + if( date != null ) + { + retval = toCalendar( date.getString() ); + } + + return retval; + } + + /** + * This will convert a string to a calendar. + * + * @param date The string representation of the calendar. + * + * @return The calendar that this string represents. + * + * @throws IOException If the date string is not in the correct format. + */ + public static Calendar toCalendar( String date ) throws IOException + { + Calendar retval = null; + if( date != null ) + { + //these are the default values + int year = 0; + int month = 1; + int day = 1; + int hour = 0; + int minute = 0; + int second = 0; + //first string off the prefix if it exists + try + { + SimpleTimeZone zone = null; + if( date.startsWith( "D:" ) ) + { + date = date.substring( 2, date.length() ); + } + if( date.length() < 4 ) + { + throw new IOException( "Error: Invalid date format '" + date + "'" ); + } + year = Integer.parseInt( date.substring( 0, 4 ) ); + if( date.length() >= 6 ) + { + month = Integer.parseInt( date.substring( 4, 6 ) ); + } + if( date.length() >= 8 ) + { + day = Integer.parseInt( date.substring( 6, 8 ) ); + } + if( date.length() >= 10 ) + { + hour = Integer.parseInt( date.substring( 8, 10 ) ); + } + if( date.length() >= 12 ) + { + minute = Integer.parseInt( date.substring( 10, 12 ) ); + } + if( date.length() >= 14 ) + { + second = Integer.parseInt( date.substring( 12, 14 ) ); + } + retval = new GregorianCalendar( year, month-1, day, hour, minute, second ); + if( date.length() >= 15 ) + { + char sign = date.charAt( 14 ); + if( sign == 'Z' ) + { + zone = new SimpleTimeZone(0,"Unknown"); + } + else + { + int hours = 0; + int minutes = 0; + if( date.length() >= 17 ) + { + hours = Integer.parseInt( date.substring( 15, 17 ) ); + } + if( date.length() > 20 ) + { + minutes = Integer.parseInt( date.substring( 18, 20 ) ); + } + zone = new SimpleTimeZone( hours*60*60*1000 + minutes*60*1000, "Unknown" ); + } + retval.setTimeZone( zone ); + } + } + catch( NumberFormatException e ) + { + for( int i=0; retval == null && i0 && i + * Adobe Highlight File Format + */ +public class PDFHighlighter extends PDFTextStripper +{ + private Writer highlighterOutput = null; + //private Color highlightColor = Color.YELLOW; + + private String[] searchedWords; + private ByteArrayOutputStream textOS = null; + private Writer textWriter = null; + + /** + * Default constructor. + * + * @throws IOException If there is an error constructing this class. + */ + public PDFHighlighter() throws IOException + { + super(); + super.setLineSeparator( "" ); + super.setPageSeparator( "" ); + super.setWordSeparator( "" ); + super.setShouldSeparateByBeads( false ); + super.setSuppressDuplicateOverlappingText( false ); + } + + /** + * Generate an XML highlight string based on the PDF. + * + * @param pdDocument The PDF to find words in. + * @param highlightWord The word to search for. + * @param xmlOutput The resulting output xml file. + * + * @throws IOException If there is an error reading from the PDF, or writing to the XML. + */ + public void generateXMLHighlight(PDDocument pdDocument, String highlightWord, Writer xmlOutput ) throws IOException + { + generateXMLHighlight( pdDocument, new String[] { highlightWord }, xmlOutput ); + } + + /** + * Generate an XML highlight string based on the PDF. + * + * @param pdDocument The PDF to find words in. + * @param sWords The words to search for. + * @param xmlOutput The resulting output xml file. + * + * @throws IOException If there is an error reading from the PDF, or writing to the XML. + */ + public void generateXMLHighlight(PDDocument pdDocument, String[] sWords, Writer xmlOutput ) throws IOException + { + highlighterOutput = xmlOutput; + searchedWords = sWords; + highlighterOutput.write("\n\n\n"); + textOS = new ByteArrayOutputStream(); + textWriter = new OutputStreamWriter( textOS, "UTF-16" ); + writeText(pdDocument, textWriter); + highlighterOutput.write("\n\n"); + highlighterOutput.flush(); + } + + /** + * @see PDFTextStripper#endPage( PDPage ) + */ + protected void endPage( PDPage pdPage ) throws IOException + { + textWriter.flush(); + + String page = new String( textOS.toByteArray(), "UTF-16" ); + textOS.reset(); + //page = page.replaceAll( "\n", "" ); + //page = page.replaceAll( "\r", "" ); + //page = CCRStringUtil.stripChar(page, '\n'); + //page = CCRStringUtil.stripChar(page, '\r'); + + // Traitement des listes à puces (caractères spéciaux) + if (page.indexOf("a") != -1) + { + page = page.replaceAll("a[0-9]{1,3}", "."); + } + + for (int i = 0; i < searchedWords.length; i++) + { + Pattern pattern = Pattern.compile(searchedWords[i], Pattern.CASE_INSENSITIVE); + Matcher matcher = pattern.matcher(page); + while( matcher.find() ) + { + int begin = matcher.start(); + int end = matcher.end(); + highlighterOutput.write(" \n"); + } + } + } + + /** + * Command line application. + * + * @param args The command line arguments to the application. + * + * @throws IOException If there is an error generating the highlight file. + */ + public static void main(String[] args) throws IOException + { + PDFHighlighter xmlExtractor = new PDFHighlighter(); + PDDocument doc = null; + try + { + if( args.length < 2 ) + { + usage(); + } + String[] highlightStrings = new String[ args.length - 1]; + System.arraycopy( args, 1, highlightStrings, 0, highlightStrings.length ); + doc = PDDocument.load( args[0] ); + + xmlExtractor.generateXMLHighlight( + doc, + highlightStrings, + new OutputStreamWriter( System.out ) ); + } + finally + { + if( doc != null ) + { + doc.close(); + } + } + } + + private static void usage() + { + System.err.println( "usage: java " + PDFHighlighter.class.getName() + " word1 word2 word3 ..." ); + System.exit( 1 ); + } + + + /** + * Get the color to highlight the strings with. Default is Color.YELLOW. + * + * @return The color to highlight strings with. + */ + /*public Color getHighlightColor() + { + return highlightColor; + }**/ + + /** + * Get the color to highlight the strings with. Default is Color.YELLOW. + * + * @param color The color to highlight strings with. + */ + /*public void setHighlightColor(Color color) + { + this.highlightColor = color; + }**/ + + /** + * Set the highlight color using HTML like rgb string. The string must be 6 characters long. + * + * @param color The color to use for highlighting. Should be in the format of "FF0000". + */ + /*public void setHighlightColor( String color ) + { + highlightColor = Color.decode( color ); + }**/ + + /** + * Get the highlight color as an HTML like string. This will return a string of six characters. + * + * @return The current highlight color. For example FF0000 + */ + /*public String getHighlightColorAsString() + { + //BJL: kudos to anyone that has a cleaner way of doing this! + String red = Integer.toHexString( highlightColor.getRed() ); + String green = Integer.toHexString( highlightColor.getGreen() ); + String blue = Integer.toHexString( highlightColor.getBlue() ); + + return (red.length() < 2 ? "0" + red : red) + + (green.length() < 2 ? "0" + green : green) + + (blue.length() < 2 ? "0" + blue : blue); + }**/ +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/util/PDFOperator.java b/src/main/java/org/pdfbox/util/PDFOperator.java new file mode 100644 index 0000000..76d9887 --- /dev/null +++ b/src/main/java/org/pdfbox/util/PDFOperator.java @@ -0,0 +1,153 @@ +/** + * Copyright (c) 2003-2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * This class represents an Operator in the content stream. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.13 $ + */ +public class PDFOperator +{ + private String theOperator; + private byte[] imageData; + private ImageParameters imageParameters; + + private static Map operators = Collections.synchronizedMap( new HashMap() ); + + /** + * Constructor. + * + * @param aOperator The operator that this object will represent. + */ + private PDFOperator( String aOperator ) + { + theOperator = aOperator; + if( aOperator.startsWith( "/" ) ) + { + throw new RuntimeException( "Operators are not allowed to start with / '" + aOperator + "'" ); + } + } + + /** + * This is used to create/cache operators in the system. + * + * @param operator The operator for the system. + * + * @return The operator that matches the operator keyword. + */ + public static PDFOperator getOperator( String operator ) + { + PDFOperator operation = null; + if( operator.equals( "ID" ) || operator.equals( "BI" ) ) + { + //we can't cache the ID operators. + operation = new PDFOperator( operator ); + } + else + { + operation = (PDFOperator)operators.get( operator ); + if( operation == null ) + { + operation = new PDFOperator( operator ); + operators.put( operator, operation ); + } + } + + return operation; + } + + /** + * This will get the operation that this operator represents. + * + * @return The string representation of the operation. + */ + public String getOperation() + { + return theOperator; + } + + /** + * This will print a string rep of this class. + * + * @return A string rep of this class. + */ + public String toString() + { + return "PDFOperator{" + theOperator + "}"; + } + + /** + * This is the special case for the ID operator where there are just random + * bytes inlined the stream. + * + * @return Value of property imageData. + */ + public byte[] getImageData() + { + return this.imageData; + } + + /** + * This will set the image data, this is only used for the ID operator. + * + * @param imageDataArray New value of property imageData. + */ + public void setImageData(byte[] imageDataArray) + { + imageData = imageDataArray; + } + + /** + * This will get the image parameters, this is only valid for BI operators. + * + * @return The image parameters. + */ + public ImageParameters getImageParameters() + { + return imageParameters; + } + + /** + * This will set the image parameters, this is only valid for BI operators. + * + * @param params The image parameters. + */ + public void setImageParameters( ImageParameters params) + { + imageParameters = params; + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/util/PDFStreamEngine.java b/src/main/java/org/pdfbox/util/PDFStreamEngine.java new file mode 100644 index 0000000..1e05f8a --- /dev/null +++ b/src/main/java/org/pdfbox/util/PDFStreamEngine.java @@ -0,0 +1,622 @@ +/** + * Copyright (c) 2003-2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util; + +import java.io.IOException; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Stack; + +import org.pdfbox.cos.COSObject; +import org.pdfbox.cos.COSStream; +import org.pdfbox.exceptions.WrappedIOException; + +import org.pdfbox.pdmodel.PDPage; +import org.pdfbox.pdmodel.PDResources; + +import org.pdfbox.pdmodel.font.PDFont; + +import org.pdfbox.pdmodel.graphics.PDGraphicsState; + +import org.pdfbox.util.operator.OperatorProcessor; + +import org.apache.log4j.Logger; + +/** + * This class will run through a PDF content stream and execute certain operations + * and provide a callback interface for clients that want to do things with the stream. + * See the PDFTextStripper class for an example of how to use this class. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.29 $ + */ +public class PDFStreamEngine +{ + private static Logger log = Logger.getLogger(PDFStreamEngine.class); + + static protected final byte[] SPACE_BYTES = { (byte)32 }; + + private PDGraphicsState graphicsState = null; + + protected Matrix textMatrix = null; + protected Matrix textLineMatrix = null; + protected Stack graphicsStack = new Stack(); + //private PDResources resources = null; + + protected Map operators = new HashMap(); + + protected Map fontToAverageWidths = new HashMap(); + + protected Stack streamResourcesStack = new Stack(); + + protected PDPage page; + + /** + * This is a simple internal class used by the Stream engine to handle the + * resources stack. + */ + protected static class StreamResources + { + protected Map fonts; + protected Map colorSpaces; + protected Map xobjects; + protected Map graphicsStates; + protected PDResources resources; + } + + /** + * Constructor. + */ + public PDFStreamEngine() + { + //default constructor + } + + /** + * Constructor with engine properties. The property keys are all + * PDF operators, the values are class names used to execute those + * operators. + * + * @param properties The engine properties. + * + * @throws IOException If there is an error setting the engine properties. + */ + public PDFStreamEngine( Properties properties ) throws IOException + { + try + { + Iterator keys = properties.keySet().iterator(); + while( keys.hasNext() ) + { + String operator = (String)keys.next(); + String operatorClass = properties.getProperty( operator ); + if( log.isDebugEnabled() ) + { + log.debug( "Operator Class: " + operator + "=" + operatorClass ); + } + OperatorProcessor op = (OperatorProcessor)Class.forName( operatorClass ).newInstance(); + op.setContext( this ); + operators.put( operator, op ); + } + } + catch( Exception e ) + { + throw new WrappedIOException( e ); + } + } + + /** + * This will process the contents of the stream. + * + * @param aPage The page. + * @param resources The location to retrieve resources. + * @param cosStream the Stream to execute. + * + * + * @throws IOException if there is an error accessing the stream. + */ + public void processStream( PDPage aPage, PDResources resources, COSStream cosStream ) throws IOException + { + graphicsState = new PDGraphicsState(); + textMatrix = null; + textLineMatrix = null; + graphicsStack.clear(); + streamResourcesStack.clear(); + fontToAverageWidths.clear(); + + processSubStream( aPage, resources, cosStream ); + } + + /** + * Process a sub stream of the current stream. + * + * @param aPage The page used for drawing. + * @param resources The resources used when processing the stream. + * @param cosStream The stream to process. + * + * @throws IOException If there is an exception while processing the stream. + */ + public void processSubStream( PDPage aPage, PDResources resources, COSStream cosStream ) throws IOException + { + page = aPage; + if( resources != null ) + { + StreamResources sr = new StreamResources(); + sr.fonts = resources.getFonts(); + sr.colorSpaces = resources.getColorSpaces(); + sr.xobjects = resources.getXObjects(); + sr.graphicsStates = resources.getGraphicsStates(); + sr.resources = resources; + streamResourcesStack.push(sr); + } + try + { + List arguments = new ArrayList(); + long startTokens = System.currentTimeMillis(); + List tokens = cosStream.getStreamTokens(); + long stopTokens = System.currentTimeMillis(); + if( log.isDebugEnabled() ) + { + log.debug( "Getting tokens time=" + (stopTokens-startTokens) ); + } + if( tokens != null ) + { + Iterator iter = tokens.iterator(); + while( iter.hasNext() ) + { + Object next = iter.next(); + if( next instanceof COSObject ) + { + arguments.add( ((COSObject)next).getObject() ); + } + else if( next instanceof PDFOperator ) + { + processOperator( (PDFOperator)next, arguments ); + arguments = new ArrayList(); + } + else + { + arguments.add( next ); + } + } + } + } + finally + { + if( resources != null ) + { + streamResourcesStack.pop(); + } + } + + } + + /** + * A method provided as an event interface to allow a subclass to perform + * some specific functionality when a character needs to be displayed. + * + * @param text The character to be displayed. + */ + protected void showCharacter( TextPosition text ) + { + //subclasses can override to provide specific functionality. + } + + /** + * You should override this method if you want to perform an action when a + * string is being shown. + * + * @param string The string to display. + * + * @throws IOException If there is an error showing the string + */ + public void showString( byte[] string ) throws IOException + { + float spaceWidth = 0; + float spacing = 0; + StringBuffer stringResult = new StringBuffer(string.length); + + float characterDisplacement = 0; + float spaceDisplacement = 0; + float fontSize = graphicsState.getTextState().getFontSize(); + float horizontalScaling = graphicsState.getTextState().getHorizontalScalingPercent()/100f; + float rise = graphicsState.getTextState().getRise(); + final float wordSpacing = graphicsState.getTextState().getWordSpacing(); + final float characterSpacing = graphicsState.getTextState().getCharacterSpacing(); + float wordSpacingDisplacement = 0; + + PDFont font = graphicsState.getTextState().getFont(); + + //This will typically be 1000 but in the case of a type3 font + //this might be a different number + float glyphSpaceToTextSpaceFactor = 1f/font.getFontMatrix().getValue( 0, 0 ); + Float averageWidth = (Float)fontToAverageWidths.get( font ); + if( averageWidth == null ) + { + averageWidth = new Float( font.getAverageFontWidth() ); + fontToAverageWidths.put( font, averageWidth ); + } + + Matrix initialMatrix = new Matrix(); + initialMatrix.setValue(0,0,1); + initialMatrix.setValue(0,1,0); + initialMatrix.setValue(0,2,0); + initialMatrix.setValue(1,0,0); + initialMatrix.setValue(1,1,1); + initialMatrix.setValue(1,2,0); + initialMatrix.setValue(2,0,0); + initialMatrix.setValue(2,1,rise); + initialMatrix.setValue(2,2,1); + + + //this + int codeLength = 1; + Matrix ctm = graphicsState.getCurrentTransformationMatrix(); + + //lets see what the space displacement should be + spaceDisplacement = (font.getFontWidth( SPACE_BYTES, 0, 1 )/glyphSpaceToTextSpaceFactor); + if( spaceDisplacement == 0 ) + { + spaceDisplacement = (averageWidth.floatValue()/glyphSpaceToTextSpaceFactor); + //The average space width appears to be higher than necessary + //so lets make it a little bit smaller. + spaceDisplacement *= .80f; + if( log.isDebugEnabled() ) + { + log.debug( "Font: Space From Average=" + spaceDisplacement ); + } + } + int pageRotation = page.findRotation(); + Matrix trm = initialMatrix.multiply( textMatrix ).multiply( ctm ); + float x = trm.getValue(2,0); + float y = trm.getValue(2,1); + if( pageRotation == 0 ) + { + trm.setValue( 2,1, -y + page.findMediaBox().getHeight() ); + } + else if( pageRotation == 90 ) + { + trm.setValue( 2,0, y ); + trm.setValue( 2,1, x ); + } + else if( pageRotation == 270 ) + { + trm.setValue( 2,0, -y + page.findMediaBox().getHeight() ); + trm.setValue( 2,1, x ); + } + for( int i=0; i"); + buf.append(""); + buf.append(getTitleGuess()); + buf.append(""); + buf.append(""); + buf.append("\n"); + getOutput().write(buf.toString()); + } + + /** + * The guess to the document title. + * + * @return A string that is the title of this document. + */ + protected String getTitleGuess() + { + return titleGuess; + } + + /** + * @see PDFTextStripper#flushText + */ + protected void flushText() throws IOException + { + Iterator textIter = getCharactersByArticle().iterator(); + + if (onFirstPage) + { + guessTitle(textIter); + writeHeader(); + onFirstPage = false; + } + super.flushText(); + } + + /** + * @see PDFTextStripper#endDocument( PDDocument ) + */ + public void endDocument(PDDocument pdf) throws IOException + { + output.write(""); + } + + /** + * This method will attempt to guess the title of the document. + * + * @param textIter The characters on the first page. + * @return The text position that is guessed to be the title. + */ + protected TextPosition guessTitle(Iterator textIter) + { + float lastFontSize = -1.0f; + int stringsInFont = 0; + StringBuffer titleText = new StringBuffer(); + while (textIter.hasNext()) + { + Iterator textByArticle = ((List)textIter.next()).iterator(); + while( textByArticle.hasNext() ) + { + TextPosition position = (TextPosition) textByArticle.next(); + float currentFontSize = position.getFontSize(); + if (currentFontSize != lastFontSize) + { + if (beginTitle != null) + { // font change in candidate title. + if (stringsInFont == 0) + { + beginTitle = null; // false alarm + titleText.setLength(0); + } + else + { + // had a significant font with some words: call it a title + titleGuess = titleText.toString(); + log.debug("Title candidate =" + titleGuess); + afterEndTitle = position; + return beginTitle; + } + } + else + { // font change and begin == null + if (currentFontSize > 13.0f) + { // most body text is 12pt max I guess + beginTitle = position; + } + } + + lastFontSize = currentFontSize; + stringsInFont = 0; + } + stringsInFont++; + if (beginTitle != null) + { + titleText.append(position.getCharacter()+" "); + } + } + } + return beginTitle; // null + } + + /** + * Write out the paragraph separator. + * + * @throws IOException If there is an error writing to the stream. + */ + protected void startParagraph() throws IOException + { + if (! suppressParagraphs) + { + getOutput().write("

"); + } + } + /** + * Write out the paragraph separator. + * + * @throws IOException If there is an error writing to the stream. + */ + protected void endParagraph() throws IOException + { + if (! suppressParagraphs) + { + getOutput().write("

"); + } + } + + /** + * @see PDFTextStripper#writeCharacters( TextPosition ) + */ + protected void writeCharacters(TextPosition position ) throws IOException + { + if (position == beginTitle) + { + output.write("

"); + suppressParagraphs = true; + } + if (position == afterEndTitle) + { + output.write("

"); // end title and start first paragraph + suppressParagraphs = false; + } + + String chars = position.getCharacter(); + + for (int i = 0; i < chars.length(); i++) + { + char c = chars.charAt(i); + if ((c < 32) || (c > 126)) + { + int charAsInt = c; + output.write("&#" + charAsInt + ";"); + } + else + { + switch (c) + { + case 34: + output.write("""); + break; + case 38: + output.write("&"); + break; + case 60: + output.write("<"); + break; + case 62: + output.write(">"); + break; + default: + output.write(c); + } + } + } + } + + /** + * @return Returns the suppressParagraphs. + */ + public boolean isSuppressParagraphs() + { + return suppressParagraphs; + } + /** + * @param shouldSuppressParagraphs The suppressParagraphs to set. + */ + public void setSuppressParagraphs(boolean shouldSuppressParagraphs) + { + this.suppressParagraphs = shouldSuppressParagraphs; + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/util/PDFTextStripper.java b/src/main/java/org/pdfbox/util/PDFTextStripper.java new file mode 100644 index 0000000..56e80cc --- /dev/null +++ b/src/main/java/org/pdfbox/util/PDFTextStripper.java @@ -0,0 +1,1033 @@ +/** + * Copyright (c) 2003-2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util; + +import java.io.IOException; +import java.io.StringWriter; +import java.io.Writer; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Vector; + +import org.pdfbox.cos.COSDocument; +import org.pdfbox.cos.COSStream; + +import org.pdfbox.pdmodel.PDDocument; +import org.pdfbox.pdmodel.PDPage; + +import org.pdfbox.pdmodel.common.PDRectangle; +import org.pdfbox.pdmodel.common.PDStream; + +import org.pdfbox.pdmodel.encryption.PDEncryptionDictionary; +import org.pdfbox.pdmodel.encryption.PDStandardEncryption; +import org.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem; +import org.pdfbox.pdmodel.interactive.pagenavigation.PDThreadBead; + +import org.pdfbox.exceptions.CryptographyException; +import org.pdfbox.exceptions.InvalidPasswordException; + +import org.apache.log4j.Logger; + + +/** + * This class will take a pdf document and strip out all of the text and ignore the + * formatting and such. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.62 $ + */ +public class PDFTextStripper extends PDFStreamEngine +{ + private static Logger log = Logger.getLogger(PDFTextStripper.class); + + private int currentPageNo = 0; + private int startPage = 1; + private int endPage = Integer.MAX_VALUE; + private PDOutlineItem startBookmark = null; + private int startBookmarkPageNumber = -1; + private PDOutlineItem endBookmark = null; + private int endBookmarkPageNumber = -1; + private PDDocument document; + private boolean suppressDuplicateOverlappingText = true; + private boolean shouldSeparateByBeads = true; + private boolean sortByPosition = false; + + private List pageArticles = null; + /** + * The charactersByArticle is used to extract text by article divisions. For example + * a PDF that has two columns like a newspaper, we want to extract the first column and + * then the second column. In this example the PDF would have 2 beads(or articles), one for + * each column. The size of the charactersByArticle would be 5, because not all text on the + * screen will fall into one of the articles. The five divisions are shown below + * + * Text before first article + * first article text + * text between first article and second article + * second article text + * text after second article + * + * Most PDFs won't have any beads, so charactersByArticle will contain a single entry. + */ + protected Vector charactersByArticle = new Vector(); + + private Map characterListMapping = new HashMap(); + + private String lineSeparator = System.getProperty("line.separator"); + private String pageSeparator = System.getProperty("line.separator"); + private String wordSeparator = " "; + + /** + * The stream to write the output to. + */ + protected Writer output; + + /** + * Instantiate a new PDFTextStripper object. This object will load properties from + * Resources/PDFTextStripper.properties. + * @throws IOException If there is an error loading the properties. + */ + public PDFTextStripper() throws IOException + { + super( ResourceLoader.loadProperties( "Resources/PDFTextStripper.properties" ) ); + } + + /** + * This will return the text of a document. See writeText.
+ * NOTE: The document must not be encrypted when coming into this method. + * + * @param doc The document to get the text from. + * + * @return The text of the PDF document. + * + * @throws IOException if the doc state is invalid or it is encrypted. + */ + public String getText( PDDocument doc ) throws IOException + { + StringWriter outputStream = new StringWriter(); + writeText( doc, outputStream ); + return outputStream.toString(); + } + + /** + * @deprecated + * @see PDFTextStripper#getText( PDDocument ) + * @param doc The document to extract the text from. + * @return The document text. + * @throws IOException If there is an error extracting the text. + */ + public String getText( COSDocument doc ) throws IOException + { + return getText( new PDDocument( doc ) ); + } + + /** + * @deprecated + * @see PDFTextStripper#writeText( PDDocument, Writer ) + * @param doc The document to extract the text. + * @param outputStream The stream to write the text to. + * @throws IOException If there is an error extracting the text. + */ + public void writeText( COSDocument doc, Writer outputStream ) throws IOException + { + writeText( new PDDocument( doc ), outputStream ); + } + + /** + * This will take a PDDocument and write the text of that document to the print writer. + * + * @param doc The document to get the data from. + * @param outputStream The location to put the text. + * + * @throws IOException If the doc is in an invalid state. + */ + public void writeText( PDDocument doc, Writer outputStream ) throws IOException + { + + PDEncryptionDictionary encDictionary = doc.getEncryptionDictionary(); + + //only care about standard encryption and if it was decrypted with the + //user password + if( encDictionary instanceof PDStandardEncryption && + !doc.wasDecryptedWithOwnerPassword() ) + { + PDStandardEncryption stdEncryption = (PDStandardEncryption)encDictionary; + if( !stdEncryption.canExtractContent() ) + { + throw new IOException( "You do not have permission to extract text" ); + } + } + currentPageNo = 0; + document = doc; + output = outputStream; + startDocument(document); + + if( document.isEncrypted() ) + { + // We are expecting non-encrypted documents here, but it is common + // for users to pass in a document that is encrypted with an empty + // password (such a document appears to not be encrypted by + // someone viewing the document, thus the confusion). We will + // attempt to decrypt with the empty password to handle this case. + // + log.debug("Document is encrypted, decrypting with empty password"); + try + { + document.decrypt(""); + } + catch (CryptographyException e) + { + throw new IOException("Error decrypting document, details: " + e.getMessage()); + } + catch (InvalidPasswordException e) + { + throw new IOException("Error: document is encrypted"); + } + } + + processPages( document.getDocumentCatalog().getAllPages() ); + endDocument(document); + } + + /** + * This will process all of the pages and the text that is in them. + * + * @param pages The pages object in the document. + * + * @throws IOException If there is an error parsing the text. + */ + protected void processPages( List pages ) throws IOException + { + if( log.isDebugEnabled() ) + { + log.debug( "processPages( " + pages + " )" ); + } + + if( startBookmark != null ) + { + startBookmarkPageNumber = getPageNumber( startBookmark, pages ); + } + + if( endBookmark != null ) + { + endBookmarkPageNumber = getPageNumber( endBookmark, pages ); + } + + if( startBookmarkPageNumber == -1 && startBookmark != null && + endBookmarkPageNumber == -1 && endBookmark != null && + startBookmark.getCOSObject() == endBookmark.getCOSObject() ) + { + //this is a special case where both the start and end bookmark + //are the same but point to nothing. In this case + //we will not extract any text. + startBookmarkPageNumber = 0; + endBookmarkPageNumber = 0; + } + + + Iterator pageIter = pages.iterator(); + while( pageIter.hasNext() ) + { + PDPage nextPage = (PDPage)pageIter.next(); + PDStream contentStream = nextPage.getContents(); + if( contentStream != null ) + { + COSStream contents = contentStream.getStream(); + processPage( nextPage, contents ); + } + } + if( log.isDebugEnabled() ) + { + log.debug( "processPages() end" ); + } + } + + private int getPageNumber( PDOutlineItem bookmark, List allPages ) throws IOException + { + int pageNumber = -1; + PDPage page = bookmark.findDestinationPage( document ); + if( page != null ) + { + pageNumber = allPages.indexOf( page )+1;//use one based indexing + } + return pageNumber; + } + + /** + * This method is available for subclasses of this class. It will be called before processing + * of the document start. + * + * @param pdf The PDF document that is being processed. + * @throws IOException If an IO error occurs. + */ + protected void startDocument(PDDocument pdf) throws IOException + { + // no default implementation, but available for subclasses + } + + /** + * This method is available for subclasses of this class. It will be called after processing + * of the document finishes. + * + * @param pdf The PDF document that is being processed. + * @throws IOException If an IO error occurs. + */ + protected void endDocument(PDDocument pdf ) throws IOException + { + // no default implementation, but available for subclasses + } + + /** + * This will process the contents of a page. + * + * @param page The page to process. + * @param content The contents of the page. + * + * @throws IOException If there is an error processing the page. + */ + protected void processPage( PDPage page, COSStream content ) throws IOException + { + long start = System.currentTimeMillis(); + if( log.isDebugEnabled() ) + { + log.debug( "processPage( " + page + ", " + content + " )" ); + } + currentPageNo++; + if( currentPageNo >= startPage && currentPageNo <= endPage && + (startBookmarkPageNumber == -1 || currentPageNo >= startBookmarkPageNumber ) && + (endBookmarkPageNumber == -1 || currentPageNo <= endBookmarkPageNumber )) + { + startPage( page ); + pageArticles = page.getThreadBeads(); + int numberOfArticleSections = 1 + pageArticles.size() * 2; + if( !shouldSeparateByBeads ) + { + numberOfArticleSections = 1; + } + int originalSize = charactersByArticle.size(); + charactersByArticle.setSize( numberOfArticleSections ); + for( int i=0; i"); + } + float endOfLastTextX = -1; + float startOfNextWordX = -1; + float lastWordSpacing = -1; + TextPosition lastProcessedCharacter = null; + + for( int i=0; i (currentY + (position.getFontSize() * 0.9f * verticalScaling))))) + { + if (log.isDebugEnabled()) + { + log.debug(""); + } + output.write(lineSeparator); + endOfLastTextX = -1; + startOfNextWordX = -1; + currentY = -1; + lastBaselineFontSize = -1; + } + + if (startOfNextWordX != -1 && startOfNextWordX < position.getX() && + lastProcessedCharacter != null && + //only bother adding a space if the last character was not a space + lastProcessedCharacter.getCharacter() != null && + !lastProcessedCharacter.getCharacter().endsWith( " " ) ) + { + if (log.isDebugEnabled()) + { + log.debug(""); + } + output.write( wordSeparator ); + } + + + if (log.isDebugEnabled()) + { + log.debug("flushText" + + " x=" + position.getX() + + " y=" + position.getY() + + " xScale=" + position.getXScale() + + " yScale=" + position.getYScale() + + " width=" + position.getWidth() + + " currentY=" + currentY + + " endOfLastTextX=" + endOfLastTextX + + " startOfNextWordX=" + startOfNextWordX + + " fontSize=" + position.getFontSize() + + " wordSpacing=" + wordSpacing + + " string=\"" + characterValue + "\""); + } + + if (currentY == -1) + { + currentY = position.getY(); + } + + if (currentY == position.getY()) + { + lastBaselineFontSize = position.getFontSize(); + } + + // RDD - endX is what PDF considers to be the x coordinate of the + // end position of the text. We use it in computing our metrics below. + // + endOfLastTextX = position.getX() + position.getWidth(); + + + if (characterValue != null) + { + output.write(characterValue); + } + else + { + log.debug( "Position.getString() is null so not writing anything" ); + } + lastProcessedCharacter = position; + } + endParagraph(); + } + + + // RDD - newline at end of flush - required for end of page (so that the top + // of the next page starts on its own line. + // + if( log.isDebugEnabled() ) + { + log.debug(""); + } + output.write(pageSeparator); + + output.flush(); + } + + /** + * Write the string to the output stream. + * + * @param text The text to write to the stream. + * @throws IOException If there is an error when writing the text. + */ + protected void writeCharacters( TextPosition text ) throws IOException + { + output.write( text.getCharacter() ); + } + + /** + * This will determine of two floating point numbers are within a specified variance. + * + * @param first The first number to compare to. + * @param second The second number to compare to. + * @param variance The allowed variance. + */ + private boolean within( float first, float second, float variance ) + { + return second > first - variance && second < first + variance; + } + + /** + * This will show add a character to the list of characters to be printed to + * the text file. + * + * @param text The description of the character to display. + */ + protected void showCharacter( TextPosition text ) + { + boolean showCharacter = true; + if( suppressDuplicateOverlappingText ) + { + showCharacter = false; + String textCharacter = text.getCharacter(); + float textX = text.getX(); + float textY = text.getY(); + List sameTextCharacters = (List)characterListMapping.get( textCharacter ); + if( sameTextCharacters == null ) + { + sameTextCharacters = new ArrayList(); + characterListMapping.put( textCharacter, sameTextCharacters ); + } + + // RDD - Here we compute the value that represents the end of the rendered + // text. This value is used to determine whether subsequent text rendered + // on the same line overwrites the current text. + // + // We subtract any positive padding to handle cases where extreme amounts + // of padding are applied, then backed off (not sure why this is done, but there + // are cases where the padding is on the order of 10x the character width, and + // the TJ just backs up to compensate after each character). Also, we subtract + // an amount to allow for kerning (a percentage of the width of the last + // character). + // + boolean suppressCharacter = false; + float tolerance = (text.getWidth()/textCharacter.length())/3.0f; + for( int i=0; i + * The default is to not sort by position.
+ *
+ * A PDF writer could choose to write each character in a different order. By + * default PDFBox does not sort the text tokens before processing them due to + * performance reasons. + * + * @param newSortByPosition Tell PDFBox to sort the text positions. + */ + public void setSortByPosition(boolean newSortByPosition) + { + sortByPosition = newSortByPosition; + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/util/PDFTextStripperByArea.java b/src/main/java/org/pdfbox/util/PDFTextStripperByArea.java new file mode 100644 index 0000000..91c76a5 --- /dev/null +++ b/src/main/java/org/pdfbox/util/PDFTextStripperByArea.java @@ -0,0 +1,165 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util; + +import java.awt.Rectangle; +import java.io.IOException; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Vector; + +import org.pdfbox.cos.COSStream; +import org.pdfbox.pdmodel.PDPage; +import org.pdfbox.pdmodel.common.PDStream; + +/** + * This will extract text from a specified region in the PDF. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.3 $ + */ +public class PDFTextStripperByArea extends PDFTextStripper +{ + private List regions = new ArrayList(); + private Map regionArea = new HashMap(); + private Map regionCharacterList = new HashMap(); + private Map regionText = new HashMap(); + + /** + * Constructor. + * @throws IOException If there is an error loading properties. + */ + public PDFTextStripperByArea() throws IOException + { + super(); + } + + /** + * Add a new region to group text by. + * + * @param regionName The name of the region. + * @param rect The rectangle area to retrieve the text from. + */ + public void addRegion( String regionName, Rectangle rect ) + { + regions.add( regionName ); + regionArea.put( regionName, rect ); + } + + /** + * Get the list of regions that have been setup. + * + * @return A list of java.lang.String objects to identify the region names. + */ + public List getRegions() + { + return regions; + } + + /** + * Get the text for the region, this should be called after extractRegions(). + * + * @param regionName The name of the region to get the text from. + * @return The text that was identified in that region. + */ + public String getTextForRegion( String regionName ) + { + StringWriter text = (StringWriter)regionText.get( regionName ); + return text.toString(); + } + + /** + * Process the page to extract the region text. + * + * @param page The page to extract the regions from. + * @throws IOException If there is an error while extracting text. + */ + public void extractRegions( PDPage page ) throws IOException + { + Iterator regionIter = regions.iterator(); + while( regionIter.hasNext() ) + { + //reset the stored text for the region so this class + //can be reused. + String regionName = (String)regionIter.next(); + Vector regionCharactersByArticle = new Vector(); + regionCharactersByArticle.add( new ArrayList() ); + regionCharacterList.put( regionName, regionCharactersByArticle ); + regionText.put( regionName, new StringWriter() ); + } + + PDStream contentStream = page.getContents(); + if( contentStream != null ) + { + COSStream contents = contentStream.getStream(); + processPage( page, contents ); + } + } + + /** + * @see PDFTextStripper#showCharacter(TextPosition) + */ + protected void showCharacter( TextPosition text ) + { + Iterator regionIter = regionArea.keySet().iterator(); + while( regionIter.hasNext() ) + { + String region = (String)regionIter.next(); + Rectangle rect = (Rectangle)regionArea.get( region ); + if( rect.contains( text.getX(), text.getY() ) ) + { + charactersByArticle = (Vector)regionCharacterList.get( region ); + super.showCharacter( text ); + } + } + } + + /** + * This will print the text to the output stream. + * + * @throws IOException If there is an error writing the text. + */ + protected void flushText() throws IOException + { + Iterator regionIter = regionArea.keySet().iterator(); + while( regionIter.hasNext() ) + { + String region = (String)regionIter.next(); + charactersByArticle = (Vector)regionCharacterList.get( region ); + output = (StringWriter)regionText.get( region ); + super.flushText(); + } + } +} diff --git a/src/main/java/org/pdfbox/util/ResourceLoader.java b/src/main/java/org/pdfbox/util/ResourceLoader.java new file mode 100644 index 0000000..0f80d09 --- /dev/null +++ b/src/main/java/org/pdfbox/util/ResourceLoader.java @@ -0,0 +1,169 @@ +/** + * Copyright (c) 2003, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + */ +package org.pdfbox.util; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.io.IOException; + +import java.util.Properties; + +import org.apache.log4j.Logger; + +/** + * This class will handle loading resource files(AFM/CMAP). + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.6 $ + */ +public class ResourceLoader +{ + private static Logger log = Logger.getLogger( ResourceLoader.class ); + + /** + * private constructor for utility class. + */ + private ResourceLoader() + { + //private utility class + } + + /** + * This will attempt to load the resource given the resource name. + * + * @param resourceName The resource to try and load. + * + * @return The resource as a stream or null if it could not be found. + * + * @throws IOException If there is an error while attempting to load the resource. + */ + public static InputStream loadResource( String resourceName ) throws IOException + { + if( log.isDebugEnabled() ) + { + log.debug( "loadResource( " + resourceName + ")" ); + } + + ClassLoader loader = ResourceLoader.class.getClassLoader(); + + InputStream is = null; + + if( loader != null ) + { + is = loader.getResourceAsStream( resourceName ); + } + + //see sourceforge bug 863053, this is a fix for a user that + //needed to have PDFBox loaded by the bootstrap classloader + if( is == null ) + { + loader = ClassLoader.getSystemClassLoader(); + if( loader != null ) + { + is = loader.getResourceAsStream( resourceName ); + } + } + + if( is == null ) + { + File f = new File( resourceName ); + if( f.exists() ) + { + is = new FileInputStream( f ); + } + } + + return is; + } + + /** + * This will attempt to load the resource given the resource name. + * + * @param resourceName The resource to try and load. + * + * @return The resource as a stream or null if it could not be found. + * + * @throws IOException If there is an error loading the properties. + */ + public static Properties loadProperties( String resourceName ) throws IOException + { + Properties properties = null; + InputStream is = null; + try + { + is = loadResource( resourceName ); + if( is != null ) + { + properties = new Properties(); + properties.load( is ); + } + } + finally + { + if( is != null ) + { + is.close(); + } + } + return properties; + } + + /** + * This will attempt to load the resource given the resource name. + * + * @param resourceName The resource to try and load. + * @param defaults A stream of default properties. + * + * @return The resource as a stream or null if it could not be found. + * + * @throws IOException If there is an error loading the properties. + */ + public static Properties loadProperties( String resourceName, Properties defaults ) throws IOException + { + InputStream is = null; + try + { + is = loadResource( resourceName ); + if( is != null ) + { + defaults.load( is ); + } + } + finally + { + if( is != null ) + { + is.close(); + } + } + return defaults; + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/util/SimpleConfigurator.java b/src/main/java/org/pdfbox/util/SimpleConfigurator.java new file mode 100644 index 0000000..df94029 --- /dev/null +++ b/src/main/java/org/pdfbox/util/SimpleConfigurator.java @@ -0,0 +1,68 @@ +/** + * Copyright (c) 2003, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + */ +package org.pdfbox.util; + +import java.net.URL; + +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Logger; +import org.apache.log4j.Level; + +import org.apache.log4j.spi.Configurator; +import org.apache.log4j.spi.LoggerRepository; + +/** + * Log4J configurator. + * + * @author Robert Dickinson (bob@brutesquadlabs.com) + * @version $Revision: 1.2 $ + */ +public class SimpleConfigurator implements Configurator +{ + /** + * Constructor. + */ + public SimpleConfigurator() + { + } + + /** + * Interpret a resource pointed to by a URL and set up log4J accordingly. + * The configuration is done relative to the heirarchy parameter. + * + * @param url The URL to parse + * @param repository The heirarchy to operate upon + */ + public void doConfigure(URL url, LoggerRepository repository) + { + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.DEBUG); + } +} diff --git a/src/main/java/org/pdfbox/util/Splitter.java b/src/main/java/org/pdfbox/util/Splitter.java new file mode 100644 index 0000000..c03a989 --- /dev/null +++ b/src/main/java/org/pdfbox/util/Splitter.java @@ -0,0 +1,201 @@ +/** + * Copyright (c) 2004-2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util; + +import org.pdfbox.pdmodel.PDDocument; +import org.pdfbox.pdmodel.PDPage; + +import java.io.IOException; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +/** + * Split a document into several other documents. + * + * @author Mario Ivankovits (mario@ops.co.at) + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.6 $ + */ +public class Splitter +{ + + /** + * The source PDF document. + */ + protected PDDocument pdfDocument; + + /** + * The current PDF document that contains the splitted page. + */ + protected PDDocument currentDocument = null; + + private int splitAtPage=1; + private List newDocuments = null; + + /** + * The current page number that we are processing, zero based. + */ + protected int pageNumber = 0; + + /** + * This will take a document and split into several other documents. + * + * @param document The document to split. + * + * @return A list of all the split documents. + * + * @throws IOException If there is an IOError + */ + public List split( PDDocument document ) throws IOException + { + newDocuments = new ArrayList(); + pdfDocument = document; + + List pages = pdfDocument.getDocumentCatalog().getAllPages(); + processPages(pages); + return newDocuments; + } + + /** + * This will tell the splitting algorithm where to split the pages. The default + * is 1, so every page will become a new document. If it was to then each document would + * contain 2 pages. So it the source document had 5 pages it would split into + * 3 new documents, 2 documents containing 2 pages and 1 document containing one + * page. + * + * @param split The number of pages each split document should contain. + */ + public void setSplitAtPage( int split ) + { + if( split <= 0 ) + { + throw new RuntimeException( "Error split must be at least one page." ); + } + splitAtPage = split; + } + + /** + * This will return how many pages each split document will contain. + * + * @return The split parameter. + */ + public int getSplitAtPage() + { + return splitAtPage; + } + + /** + * Interface method to handle the start of the page processing. + * + * @param pages The list of pages from the source document. + * + * @throws IOException If an IO error occurs. + */ + protected void processPages(List pages) throws IOException + { + Iterator iter = pages.iterator(); + while( iter.hasNext() ) + { + PDPage page = (PDPage)iter.next(); + processNextPage( page ); + } + } + + /** + * Interface method, you can control where a document gets split by implementing + * this method. By default a split occurs at every page. If you wanted to split + * based on some complex logic then you could override this method. For example. + * + * protected void createNewDocumentIfNecessary() + * { + * if( isPrime( pageNumber ) ) + * { + * super.createNewDocumentIfNecessary(); + * } + * } + * + * + * @throws IOException If there is an error creating the new document. + */ + protected void createNewDocumentIfNecessary() throws IOException + { + if (isNewDocNecessary()) + { + createNewDocument(); + } + } + + /** + * Check if it is necessary to create a new document. + * + * @return true If a new document should be created. + */ + protected boolean isNewDocNecessary() + { + return pageNumber % splitAtPage == 0 || currentDocument == null; + } + + /** + * Create a new document to write the splitted contents to. + * + * @throws IOException If there is an problem creating the new document. + */ + protected void createNewDocument() throws IOException + { + currentDocument = new PDDocument(); + currentDocument.setDocumentInformation(pdfDocument.getDocumentInformation()); + currentDocument.getDocumentCatalog().setViewerPreferences( + pdfDocument.getDocumentCatalog().getViewerPreferences()); + newDocuments.add(currentDocument); + } + + + + /** + * Interface to start processing a new page. + * + * @param page The page that is about to get processed. + * + * @throws IOException If there is an error creating the new document. + */ + protected void processNextPage( PDPage page ) throws IOException + { + createNewDocumentIfNecessary(); + PDPage imported = currentDocument.importPage( page ); + imported.setCropBox( page.findCropBox() ); + imported.setMediaBox( page.findMediaBox() ); + imported.setResources( page.findResources() ); + imported.setRotation( page.findRotation() ); + pageNumber++; + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/util/TextPosition.java b/src/main/java/org/pdfbox/util/TextPosition.java new file mode 100644 index 0000000..f44ca56 --- /dev/null +++ b/src/main/java/org/pdfbox/util/TextPosition.java @@ -0,0 +1,203 @@ +/** + * Copyright (c) 2003-2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util; + +import org.pdfbox.pdmodel.font.PDFont; + +/** + * This represents a character and a position on the screen of those characters. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.9 $ + */ +public class TextPosition +{ + private float x; + private float y; + private float xScale; + private float yScale; + private float width; + private float widthOfSpace; + private String c; + private PDFont font; + private float fontSize; + private float wordSpacing; + + /** + * Constructor. + * + * @param xPos The x coordinate of the character. + * @param yPos The y coordinate of the character. + * @param xScl The x scaling of the character. + * @param yScl The y scaling of the character. + * @param widthValue The width of the character. + * @param spaceWidth The width of the space character. + * @param string The character to be displayed. + * @param currentFont The current for for this text position. + * @param fontSizeValue The new font size. + * @param ws The word spacing parameter + */ + public TextPosition( + float xPos, + float yPos, + float xScl, + float yScl, + float widthValue, + float spaceWidth, + String string, + PDFont currentFont, + float fontSizeValue, + float ws + ) + { + this.x = xPos; + this.y = yPos; + this.xScale = xScl; + this.yScale = yScl; + this.width = widthValue; + this.widthOfSpace = spaceWidth; + this.c = string; + this.font = currentFont; + this.fontSize = fontSizeValue; + this.wordSpacing = ws; + } + + /** + * This will the character that will be displayed on the screen. + * + * @return The character on the screen. + */ + public String getCharacter() + { + return c; + } + + /** + * This will get the x position of the character. + * + * @return The x coordinate of the character. + */ + public float getX() + { + return x; + } + + /** + * This will get the y position of the character. + * + * @return The y coordinate of the character. + */ + public float getY() + { + return y; + } + + /** + * This will get with width of this character. + * + * @return The width of this character. + */ + public float getWidth() + { + return width; + } + + /** + * This will get the font size that this object is + * suppose to be drawn at. + * + * @return The font size. + */ + public float getFontSize() + { + return fontSize; + } + + /** + * This will get the font for the text being drawn. + * + * @return The font size. + */ + public PDFont getFont() + { + return font; + } + + /** + * This will get the current word spacing. + * + * @return The current word spacing. + */ + public float getWordSpacing() + { + return wordSpacing; + } + + /** + * This will get the width of a space character. This is useful for some + * algorithms such as the text stripper, that need to know the width of a + * space character. + * + * @return The width of a space character. + */ + public float getWidthOfSpace() + { + return widthOfSpace; + } + /** + * @return Returns the xScale. + */ + public float getXScale() + { + return xScale; + } + /** + * @param scale The xScale to set. + */ + public void setXScale(float scale) + { + xScale = scale; + } + /** + * @return Returns the yScale. + */ + public float getYScale() + { + return yScale; + } + /** + * @param scale The yScale to set. + */ + public void setYScale(float scale) + { + yScale = scale; + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/util/TextPositionComparator.java b/src/main/java/org/pdfbox/util/TextPositionComparator.java new file mode 100644 index 0000000..fab6a6e --- /dev/null +++ b/src/main/java/org/pdfbox/util/TextPositionComparator.java @@ -0,0 +1,126 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util; + +import java.util.Comparator; + +import org.pdfbox.pdmodel.PDPage; + +/** + * This class is a comparator for TextPosition operators. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.2 $ + */ +public class TextPositionComparator implements Comparator +{ + private PDPage thePage = null; + + /** + * Constuctor, comparison of TextPosition depends on the rotation + * of the page. + * @param page The page that the text position is on. + */ + public TextPositionComparator( PDPage page ) + { + thePage = page; + } + + /** + * @see Comparator#compare(java.lang.Object, java.lang.Object) + */ + public int compare(Object o1, Object o2) + { + int retval = 0; + TextPosition pos1 = (TextPosition)o1; + TextPosition pos2 = (TextPosition)o2; + int rotation = thePage.findRotation(); + float x1 = 0; + float x2 = 0; + float y1 = 0; + float y2 = 0; + if( rotation == 0 ) + { + x1 = pos1.getX(); + x2 = pos2.getX(); + y1 = pos1.getY(); + y2 = pos2.getY(); + } + else if( rotation == 90 ) + { + x1 = pos1.getY(); + x2 = pos2.getX(); + y1 = pos1.getX(); + y2 = pos2.getY(); + } + else if( rotation == 180 ) + { + x1 = -pos1.getX(); + x2 = -pos2.getX(); + y1 = -pos1.getY(); + y2 = -pos2.getY(); + } + else if( rotation == 270 ) + { + x1 = -pos1.getY(); + x2 = -pos2.getY(); + y1 = -pos1.getX(); + y2 = -pos2.getX(); + } + + if( y1 < y2 ) + { + retval = -1; + } + else if( y1 > y2 ) + { + return 1; + } + else + { + if( x1 < x2 ) + { + retval = -1; + } + else if( x1 > x2 ) + { + retval = 1; + } + else + { + retval = 0; + } + } + + return retval; + } + +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/util/XMLUtil.java b/src/main/java/org/pdfbox/util/XMLUtil.java new file mode 100644 index 0000000..1798aa2 --- /dev/null +++ b/src/main/java/org/pdfbox/util/XMLUtil.java @@ -0,0 +1,103 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util; + +import java.io.InputStream; +import java.io.IOException; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.w3c.dom.Text; + +/** + * This class with handle some simple XML operations. + * + * @author blitchfield + * @version $Revision: 1.2 $ + */ +public class XMLUtil +{ + /** + * Utility class, should not be instantiated. + * + */ + private XMLUtil() + { + } + + /** + * This will parse an XML stream and create a DOM document. + * + * @param is The stream to get the XML from. + * @return The DOM document. + * @throws IOException It there is an error creating the dom. + */ + public static Document parse( InputStream is ) throws IOException + { + try + { + DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); + DocumentBuilder builder = builderFactory.newDocumentBuilder(); + return builder.parse( is ); + } + catch( Exception e ) + { + IOException thrown = new IOException( e.getMessage() ); + throw thrown; + } + } + + /** + * This will get the text value of an element. + * + * @param node The node to get the text value for. + * @return The text of the node. + */ + public static String getNodeValue( Element node ) + { + String retval = ""; + NodeList children = node.getChildNodes(); + for( int i=0; iTitre : PDFEngine Modification.

+ *

Description : Structal modification of the PDFEngine class : +* the long sequence of conditions in processOperator is remplaced +* by this strategy pattern

+ *

Copyright : Copyright (c) 2004

+ *

Société : DBGS

+ * @author Huault : huault@free.fr + * @version $Revision: 1.3 $ + */ +public class BeginText extends OperatorProcessor +{ + + private static final Logger LOG = Logger.getLogger(BeginText.class); + + /** + * process : BT : Begin text object. + * @param operator The operator that is being executed. + * @param arguments List + */ + public void process(PDFOperator operator, List arguments) + { + if( LOG.isDebugEnabled() ) + { + LOG.debug(" " + this.toString()+ " from " + context.toString()); + } + context.setTextMatrix( new Matrix()); + context.setTextLineMatrix( new Matrix() ); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/Concatenate.java b/src/main/java/org/pdfbox/util/operator/Concatenate.java new file mode 100644 index 0000000..7fc986c --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/Concatenate.java @@ -0,0 +1,106 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import java.util.List; +import org.pdfbox.cos.COSNumber; +import org.apache.log4j.Logger; +import org.pdfbox.util.Matrix; +import org.pdfbox.util.PDFOperator; + +/** + * + *

Titre : PDFEngine Modification.

+ *

Description : Structal modification of the PDFEngine class : +* the long sequence of conditions in processOperator is remplaced by +* this strategy pattern

+ *

Copyright : Copyright (c) 2004

+ *

Société : DBGS

+ * @author Huault : huault@free.fr + * @version $Revision: 1.3 $ + */ + +public class Concatenate extends OperatorProcessor +{ + + private static final Logger LOG = Logger.getLogger(Concatenate.class); + + /** + * process : cm : Concatenate matrix to current transformation matrix. + * @param operator The operator that is being executed. + * @param arguments List + */ + public void process(PDFOperator operator, List arguments) + { + + //concatenate matrix to current transformation matrix + COSNumber a = (COSNumber) arguments.get(0); + COSNumber b = (COSNumber) arguments.get(1); + COSNumber c = (COSNumber) arguments.get(2); + COSNumber d = (COSNumber) arguments.get(3); + COSNumber e = (COSNumber) arguments.get(4); + COSNumber f = (COSNumber) arguments.get(5); + + if( LOG.isDebugEnabled() ) + { + LOG.debug(" " + + this.toString() + " from " + context.toString()); + } + + Matrix newMatrix = new Matrix(); + newMatrix.setValue(0, 0, a.floatValue()); + newMatrix.setValue(0, 1, b.floatValue()); + newMatrix.setValue(1, 0, c.floatValue()); + newMatrix.setValue(1, 1, d.floatValue()); + newMatrix.setValue(2, 0, e.floatValue()); + newMatrix.setValue(2, 1, f.floatValue()); + + // wprinz: BUG Fix: + // In PDF, matrices have to be multiplied from left to right + // (the new matrix is left, the old one(s) are right) + + Matrix old_ctm = context.getGraphicsState().getCurrentTransformationMatrix(); + Matrix matrix_to_concat = newMatrix; + Matrix new_ctm = matrix_to_concat.multiply(old_ctm); + context.getGraphicsState().setCurrentTransformationMatrix( + new_ctm ); + + // :wprinz + + } + +} diff --git a/src/main/java/org/pdfbox/util/operator/EndText.java b/src/main/java/org/pdfbox/util/operator/EndText.java new file mode 100644 index 0000000..2794ccf --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/EndText.java @@ -0,0 +1,67 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import java.util.List; +import org.apache.log4j.Logger; +import org.pdfbox.util.PDFOperator; +/** + * + *

Titre : PDFEngine Modification.

+ *

Description : Structal modification of the PDFEngine class : the +* long sequence of conditions in processOperator is remplaced by +* this strategy pattern

+ *

Copyright : Copyright (c) 2004

+ *

Société : DBGS

+ * @author Huault : huault@free.fr + * @version $Revision: 1.3 $ + */ +public class EndText extends OperatorProcessor +{ + + private static final Logger LOG = Logger.getLogger(EndText.class); + + /** + * process : ET : End text object. + * @param operator The operator that is being executed. + * @param arguments List + */ + public void process(PDFOperator operator, List arguments) + { + if( LOG.isDebugEnabled() ) + { + LOG.debug(" "+ this.toString()); + } + context.setTextMatrix( null); + context.setTextLineMatrix( null); + } + +} diff --git a/src/main/java/org/pdfbox/util/operator/GRestore.java b/src/main/java/org/pdfbox/util/operator/GRestore.java new file mode 100644 index 0000000..c07b8d8 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/GRestore.java @@ -0,0 +1,67 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import org.apache.log4j.Logger; +import java.util.List; + +import org.pdfbox.pdmodel.graphics.PDGraphicsState; +import org.pdfbox.util.PDFOperator; + +/** + * + *

Titre : PDFEngine Modification.

+ *

Description : Structal modification of the PDFEngine class : the long sequence of + * conditions in processOperator is remplaced by this strategy pattern

+ *

Copyright : Copyright (c) 2004

+ *

Société : DBGS

+ * @author Huault : huault@free.fr + * @version $Revision: 1.3 $ + */ +public class GRestore extends OperatorProcessor +{ + private static final Logger LOG = Logger.getLogger(GRestore.class); + + + /** + * process : Q : Restore graphics state. + * @param operator The operator that is being executed. + * @param arguments List + */ + public void process(PDFOperator operator, List arguments) + { + if( LOG.isDebugEnabled() ) + { + LOG.debug(" - restore state" + this.toString()); + } + context.setGraphicsState( (PDGraphicsState)context.getGraphicsStack().pop() ); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/GSave.java b/src/main/java/org/pdfbox/util/operator/GSave.java new file mode 100644 index 0000000..6a754b6 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/GSave.java @@ -0,0 +1,66 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import java.util.List; +import org.apache.log4j.Logger; +import org.pdfbox.util.PDFOperator; + +/** + * + *

Titre : PDFEngine Modification.

+ *

Description : Structal modification of the PDFEngine class : the long sequence of + * conditions in processOperator is remplaced by this strategy pattern

+ *

Copyright : Copyright (c) 2004

+ *

Société : DBGS

+ * @author Huault : huault@free.fr + * @version $Revision: 1.3 $ + */ + +public class GSave extends OperatorProcessor +{ + private static final Logger LOG = Logger.getLogger(GSave.class); + + /** + * process : q : Save graphics state. + * @param operator The operator that is being executed. + * @param arguments List + */ + public void process(PDFOperator operator, List arguments) + { + if( LOG.isDebugEnabled() ) + { + LOG.debug(" - save state " + this.toString()); + } + context.getGraphicsStack().push( context.getGraphicsState().clone() ); + } + +} diff --git a/src/main/java/org/pdfbox/util/operator/Invoke.java b/src/main/java/org/pdfbox/util/operator/Invoke.java new file mode 100644 index 0000000..2f8f789 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/Invoke.java @@ -0,0 +1,113 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import org.apache.log4j.Logger; +import org.pdfbox.cos.COSName; +import org.pdfbox.cos.COSStream; +import org.pdfbox.pdmodel.PDPage; +import org.pdfbox.pdmodel.PDResources; +import org.pdfbox.pdmodel.graphics.xobject.PDXObject; +import org.pdfbox.pdmodel.graphics.xobject.PDXObjectForm; +import org.pdfbox.util.PDFOperator; + +import java.io.IOException; +import java.util.List; +import java.util.Set; +import java.util.TreeSet; +import java.util.Map; + +/** + * Invoke named XObject. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @author Mario Ivankovits + * + * @version $Revision: 1.6 $ + */ +public class Invoke extends OperatorProcessor +{ + private static final Logger LOG = Logger.getLogger(Invoke.class); + + private Set inProcess = new TreeSet(); + + /** + * process : Do - Invoke a named xobject. + * @param operator The operator that is being executed. + * @param arguments List + * + * @throws IOException If there is an error processing this operator. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + COSName name = (COSName) arguments.get( 0 ); + if (LOG.isDebugEnabled()) + { + LOG.debug(""); + } + + // wprinz: allow recursive processing so that nested xobject forms are correctly dealt with +// if (inProcess.contains(name)) +// { +// // avoid recursive loop +// return; +// } + // /wprinz + + inProcess.add(name); + + try + { + //PDResources res = context.getResources(); + + Map xobjects = context.getXObjects(); + PDXObject xobject = (PDXObject) xobjects.get(name.getName()); + + if(xobject instanceof PDXObjectForm) + { + PDXObjectForm form = (PDXObjectForm)xobject; + COSStream invoke = (COSStream)form.getCOSObject(); + PDResources pdResources = form.getResources(); + PDPage page = context.getCurrentPage(); + if(pdResources == null) + { + pdResources = page.findResources(); + } + + getContext().processSubStream( page, pdResources, invoke ); + } + } + finally + { + inProcess.remove(name); + } + } +} diff --git a/src/main/java/org/pdfbox/util/operator/MoveAndShow.java b/src/main/java/org/pdfbox/util/operator/MoveAndShow.java new file mode 100644 index 0000000..ca3a3f9 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/MoveAndShow.java @@ -0,0 +1,75 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import java.util.List; +import org.apache.log4j.Logger; +import org.pdfbox.cos.COSString; +import org.pdfbox.util.PDFOperator; + +import java.io.IOException; + +/** + *

Titre : PDFEngine Modification.

+ *

Description : Structal modification of the PDFEngine class : +* the long sequence of conditions in processOperator is remplaced by +* this strategy pattern

+ *

Copyright : Copyright (c) 2004

+ *

Société : DBGS

+ * @author Huault : huault@free.fr + * @version $Revision: 1.4 $ + */ +public class MoveAndShow extends OperatorProcessor +{ + + private static final Logger LOG = Logger.getLogger(MoveAndShow.class); + + /** + * ' Move to next line and show text. + * @param arguments List + * @param operator The operator that is being executed. + * @throws IOException If there is an error processing the operator. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + // Move to start of next text line, and show text + // + if( LOG.isDebugEnabled()) + { + COSString string = (COSString)arguments.get( 0 ); + LOG.debug("<' string=\"" + string.getString() + "\">"); + } + + context.processOperator("T*", null); + context.processOperator("Tj", arguments); + } + +} diff --git a/src/main/java/org/pdfbox/util/operator/MoveText.java b/src/main/java/org/pdfbox/util/operator/MoveText.java new file mode 100644 index 0000000..10360bb --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/MoveText.java @@ -0,0 +1,76 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import java.util.List; +import org.apache.log4j.Logger; +import org.pdfbox.cos.COSNumber; +import org.pdfbox.util.Matrix; +import org.pdfbox.util.PDFOperator; + +/** + * + *

Titre : PDFEngine Modification.

+ *

Description : Structal modification of the PDFEngine class : + * the long sequence of conditions in processOperator is remplaced by + * this strategy pattern

+ *

Copyright : Copyright (c) 2004

+ *

Société : DBGS

+ * @author Huault : huault@free.fr + * @version $Revision: 1.3 $ + */ +public class MoveText extends OperatorProcessor +{ + private static final Logger LOG = Logger.getLogger(MoveText.class); + + + /** + * process : Td : Move text position. + * @param operator The operator that is being executed. + * @param arguments List + */ + public void process(PDFOperator operator, List arguments) + { + COSNumber x = (COSNumber)arguments.get( 0 ); + COSNumber y = (COSNumber)arguments.get( 1 ); + if (LOG.isDebugEnabled()) + { + LOG.debug(""); + } + Matrix td = new Matrix(); + td.setValue( 2, 0, x.floatValue() );//.* textMatrix.getValue(0,0) ); + td.setValue( 2, 1, y.floatValue() );//* textMatrix.getValue(1,1) ); + //log.debug( "textLineMatrix before " + textLineMatrix ); + context.setTextLineMatrix( td.multiply( context.getTextLineMatrix() ) ); //textLineMatrix.multiply( td ); + //log.debug( "textLineMatrix after " + textLineMatrix ); + context.setTextMatrix( context.getTextLineMatrix().copy() ); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/MoveTextSetLeading.java b/src/main/java/org/pdfbox/util/operator/MoveTextSetLeading.java new file mode 100644 index 0000000..e68a1d1 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/MoveTextSetLeading.java @@ -0,0 +1,80 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.apache.log4j.Logger; +import org.pdfbox.cos.COSFloat; +import org.pdfbox.cos.COSNumber; +import org.pdfbox.util.PDFOperator; + +/** + * + *

Titre : PDFEngine Modification.

+ *

Description : Structal modification of the PDFEngine class : + * the long sequence of conditions in processOperator is remplaced by + * this strategy pattern

+ *

Copyright : Copyright (c) 2004

+ *

Société : DBGS

+ * @author Huault : huault@free.fr + * @version $Revision: 1.4 $ + */ +public class MoveTextSetLeading extends OperatorProcessor +{ + + private static final Logger LOG = Logger.getLogger(MoveTextSetLeading.class); + + /** + * process : TD Move text position and set leading. + * @param operator The operator that is being executed. + * @param arguments List + * + * @throws IOException If there is an error during processing. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + //move text position and set leading + COSNumber y = (COSNumber)arguments.get( 1 ); + if (LOG.isDebugEnabled()) + { + COSNumber x = (COSNumber)arguments.get( 0 ); + LOG.debug(""); + } + + ArrayList args = new ArrayList(); + args.add(new COSFloat(-1*y.floatValue())); + context.processOperator("TL", args); + context.processOperator("Td", arguments); + + } +} diff --git a/src/main/java/org/pdfbox/util/operator/NextLine.java b/src/main/java/org/pdfbox/util/operator/NextLine.java new file mode 100644 index 0000000..daa35eb --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/NextLine.java @@ -0,0 +1,82 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import org.apache.log4j.Logger; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.pdfbox.cos.COSFloat; +import org.pdfbox.util.PDFOperator; + +/** + * + *

Titre : PDFEngine Modification.

+ *

Description : Structal modification of the PDFEngine class : the long sequence of + * conditions in processOperator is remplaced by this strategy pattern

+ *

Copyright : Copyright (c) 2004

+ *

Société : DBGS

+ * @author Huault : huault@free.fr + * @version $Revision: 1.4 $ + */ +public class NextLine extends OperatorProcessor +{ + + private static final Logger LOG = Logger.getLogger(NextLine.class); + + /** + * process : T* Move to start of next text line. + * @param operator The operator that is being executed. + * @param arguments List + * + * @throws IOException If there is an error during processing. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + + if (LOG.isDebugEnabled()) + { + LOG.debug(""); + } + //move to start of next text line + ArrayList args = new ArrayList(); + args.add(new COSFloat(0.0f)); + // this must be -leading instead of just leading as written in the + // specification (p.369) the acrobat reader seems to implement it the same way + args.add(new COSFloat(-1*context.getGraphicsState().getTextState().getLeading())); + // use Td instead of repeating code + context.processOperator("Td", args); + + } +} diff --git a/src/main/java/org/pdfbox/util/operator/OperatorProcessor.java b/src/main/java/org/pdfbox/util/operator/OperatorProcessor.java new file mode 100644 index 0000000..27c21b9 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/OperatorProcessor.java @@ -0,0 +1,93 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import org.pdfbox.util.PDFOperator; +import org.pdfbox.util.PDFStreamEngine; +import java.util.List; +import java.io.IOException; + +/** + * + *

Titre : OperatorProcessor

+ *

Description : This class is the strategy abstract class + * in the strategy GOF pattern. After instancated, you must ever call +* the setContext method to initiamise OPeratorProcessor

+ *

Copyright : Copyright (c) 2004

+ *

Société : DBGS

+ * @author Huault : huault@free.fr + * @version $Revision: 1.3 $ + */ +public abstract class OperatorProcessor +{ + + /** + * The stream engine processing context. + */ + protected PDFStreamEngine context = null; + + /** + * Constructor. + * + */ + protected OperatorProcessor() + { + } + + /** + * Get the context for processing. + * + * @return The processing context. + */ + protected PDFStreamEngine getContext() + { + return context; + } + + /** + * Set the processing context. + * + * @param ctx The context for processing. + */ + public void setContext(PDFStreamEngine ctx) + { + context = ctx; + } + + /** + * process the operator. + * @param operator The operator that is being processed. + * @param arguments arguments needed by this operator. + * + * @throws IOException If there is an error processing the operator. + */ + public abstract void process(PDFOperator operator, List arguments) throws IOException; +} diff --git a/src/main/java/org/pdfbox/util/operator/SetCharSpacing.java b/src/main/java/org/pdfbox/util/operator/SetCharSpacing.java new file mode 100644 index 0000000..6813dce --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/SetCharSpacing.java @@ -0,0 +1,79 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import java.util.List; +import org.apache.log4j.Logger; +import org.pdfbox.cos.COSNumber; +import org.pdfbox.util.PDFOperator; + +/** + * + *

Titre : PDFEngine Modification.

+ *

Description : Structal modification of the PDFEngine class : + * the long sequence of conditions in processOperator is remplaced by + * this strategy pattern

+ *

Copyright : Copyright (c) 2004

+ *

Société : DBGS

+ * @author Huault : huault@free.fr + * @version $Revision: 1.4 $ + */ +public class SetCharSpacing extends OperatorProcessor +{ + + private static final Logger LOG = Logger.getLogger(SetCharSpacing.class); + + /** + * process : Tc Set character spacing. + * @param operator The operator that is being executed. + * @param arguments List + */ + public void process(PDFOperator operator, List arguments) + { + //set character spacing + if( arguments.size() > 0 ) + { + //There are some documents which are incorrectly structured, and have + //a wrong number of arguments to this, so we will assume the last argument + //in the list + Object charSpacing = arguments.get( arguments.size()-1 ); + if( charSpacing instanceof COSNumber ) + { + COSNumber characterSpacing = (COSNumber)charSpacing; + if (LOG.isDebugEnabled()) + { + LOG.debug(""); + } + context.getGraphicsState().getTextState().setCharacterSpacing( characterSpacing.floatValue() ); + } + } + } +} diff --git a/src/main/java/org/pdfbox/util/operator/SetGraphicsStateParameters.java b/src/main/java/org/pdfbox/util/operator/SetGraphicsStateParameters.java new file mode 100644 index 0000000..e72e05d --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/SetGraphicsStateParameters.java @@ -0,0 +1,72 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import java.util.List; +import org.apache.log4j.Logger; +import org.pdfbox.cos.COSName; +import org.pdfbox.pdmodel.graphics.PDExtendedGraphicsState; +import org.pdfbox.util.PDFOperator; + +import java.io.IOException; + +/** + *

Structal modification of the PDFEngine class : + * the long sequence of conditions in processOperator is remplaced by + * this strategy pattern.

+ * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.3 $ + */ + +public class SetGraphicsStateParameters extends OperatorProcessor +{ + private static final Logger LOG = Logger.getLogger(SetTextFont.class); + + /** + * gs Set parameters from graphics state parameter dictionary. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + //set parameters from graphics state parameter dictionary + COSName graphicsName = (COSName)arguments.get( 0 ); + + if (LOG.isDebugEnabled()) + { + LOG.debug("" ); + } + PDExtendedGraphicsState gs = (PDExtendedGraphicsState)context.getGraphicsStates().get( graphicsName.getName() ); + gs.copyIntoGraphicsState( context.getGraphicsState() ); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/SetHorizontalTextScaling.java b/src/main/java/org/pdfbox/util/operator/SetHorizontalTextScaling.java new file mode 100644 index 0000000..bfb9ebe --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/SetHorizontalTextScaling.java @@ -0,0 +1,64 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import java.util.List; +import org.apache.log4j.Logger; +import org.pdfbox.cos.COSNumber; +import org.pdfbox.util.PDFOperator; + +import java.io.IOException; + +/** + *

Structal modification of the PDFEngine class : + * the long sequence of conditions in processOperator is remplaced by + * this strategy pattern.

+ * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.3 $ + */ + +public class SetHorizontalTextScaling extends OperatorProcessor +{ + private static final Logger LOG = Logger.getLogger(SetTextFont.class); + + /** + * Tz Set horizontal text scaling. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + COSNumber scaling = (COSNumber)arguments.get(0); + context.getGraphicsState().getTextState().setHorizontalScalingPercent( scaling.floatValue() ); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/SetLineWidth.java b/src/main/java/org/pdfbox/util/operator/SetLineWidth.java new file mode 100644 index 0000000..de0576d --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/SetLineWidth.java @@ -0,0 +1,64 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import java.util.List; +import org.apache.log4j.Logger; +import org.pdfbox.cos.COSNumber; +import org.pdfbox.util.PDFOperator; + +import java.io.IOException; + +/** + *

Structal modification of the PDFEngine class : + * the long sequence of conditions in processOperator is remplaced by + * this strategy pattern.

+ * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.3 $ + */ + +public class SetLineWidth extends OperatorProcessor +{ + private static final Logger LOG = Logger.getLogger(SetTextFont.class); + + /** + * w Set line width. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + COSNumber width = (COSNumber)arguments.get( 0 ); + context.getGraphicsState().setLineWidth( width.doubleValue() ); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/SetMatrix.java b/src/main/java/org/pdfbox/util/operator/SetMatrix.java new file mode 100644 index 0000000..5d5f1f5 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/SetMatrix.java @@ -0,0 +1,90 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import java.util.List; +import org.apache.log4j.Logger; +import org.pdfbox.cos.COSNumber; +import org.pdfbox.util.Matrix; +import org.pdfbox.util.PDFOperator; + +/** + *

Titre : PDFEngine Modification.

+ *

Description : Structal modification of the PDFEngine class : the long sequence of conditions + * in processOperator is remplaced by this strategy pattern

+ *

Copyright : Copyright (c) 2004

+ *

Société : DBGS

+ * @author Huault : huault@free.fr + * @version $Revision: 1.3 $ + */ + +public class SetMatrix extends OperatorProcessor +{ + + private static final Logger LOG = Logger.getLogger(SetMatrix.class); + + /** + * Tm Set text matrix and text line matrix. + * @param operator The operator that is being executed. + * @param arguments List + */ + public void process(PDFOperator operator, List arguments) + { + //Set text matrix and text line matrix + COSNumber a = (COSNumber)arguments.get( 0 ); + COSNumber b = (COSNumber)arguments.get( 1 ); + COSNumber c = (COSNumber)arguments.get( 2 ); + COSNumber d = (COSNumber)arguments.get( 3 ); + COSNumber e = (COSNumber)arguments.get( 4 ); + COSNumber f = (COSNumber)arguments.get( 5 ); + + if (LOG.isDebugEnabled()) + { + LOG.debug(""); + } + + Matrix textMatrix = new Matrix(); + textMatrix.setValue( 0, 0, a.floatValue() ); + textMatrix.setValue( 0, 1, b.floatValue() ); + textMatrix.setValue( 1, 0, c.floatValue() ); + textMatrix.setValue( 1, 1, d.floatValue() ); + textMatrix.setValue( 2, 0, e.floatValue() ); + textMatrix.setValue( 2, 1, f.floatValue() ); + context.setTextMatrix( textMatrix ); + context.setTextLineMatrix( textMatrix.copy() ); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/SetMoveAndShow.java b/src/main/java/org/pdfbox/util/operator/SetMoveAndShow.java new file mode 100644 index 0000000..b49206d --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/SetMoveAndShow.java @@ -0,0 +1,80 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import java.util.List; +import org.apache.log4j.Logger; +import org.pdfbox.cos.COSNumber; +import org.pdfbox.cos.COSString; +import org.pdfbox.util.PDFOperator; + +import java.io.IOException; + +/** + *

Titre : PDFEngine Modification.

+ *

Description : Structal modification of the PDFEngine class : the long sequence of conditions + * in processOperator is remplaced by this strategy pattern

+ *

Copyright : Copyright (c) 2004

+ *

Société : DBGS

+ * @author Huault : huault@free.fr + * @version $Revision: 1.5 $ + */ + +public class SetMoveAndShow extends OperatorProcessor +{ + + private static final Logger LOG = Logger.getLogger(SetMoveAndShow.class); + + /** + * " Set word and character spacing, move to next line, and show text. + * @param operator The operator that is being executed. + * @param arguments List. + * @throws IOException If there is an error processing the operator. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + //Set word and character spacing, move to next line, and show text + // + if (LOG.isDebugEnabled()) + { + COSNumber wordSpacing = (COSNumber)arguments.get( 0 ); + COSNumber characterSpacing = (COSNumber)arguments.get( 1 ); + COSString string = (COSString)arguments.get( 2 ); + LOG.debug("<\" wordSpacing=\"" + wordSpacing + + "\", characterSpacing=\"" + characterSpacing + + "\", string=\"" + string.getString() + "\">"); + } + + context.processOperator("Tw", arguments.subList(0,1)); + context.processOperator("Tc", arguments.subList(1,2)); + context.processOperator("'", arguments.subList(2,3)); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/SetNonStrokingCMYKColor.java b/src/main/java/org/pdfbox/util/operator/SetNonStrokingCMYKColor.java new file mode 100644 index 0000000..941fcd6 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/SetNonStrokingCMYKColor.java @@ -0,0 +1,69 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import java.util.List; + +import org.pdfbox.cos.COSNumber; +import org.pdfbox.pdmodel.graphics.color.PDColorSpace; +import org.pdfbox.pdmodel.graphics.color.PDColorSpaceInstance; +import org.pdfbox.pdmodel.graphics.color.PDDeviceCMYK; +import org.pdfbox.util.PDFOperator; + +import java.io.IOException; + +/** + *

Set the non stroking color space.

+ * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.2 $ + */ +public class SetNonStrokingCMYKColor extends OperatorProcessor +{ + /** + * cs Set color space for non stroking operations. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + PDColorSpace cs = PDDeviceCMYK.INSTANCE; + PDColorSpaceInstance colorInstance = context.getGraphicsState().getNonStrokingColorSpace(); + colorInstance.setColorSpace( cs ); + float[] values = new float[4]; + for( int i=0; iSet the non stroking color space.

+ * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.2 $ + */ +public class SetNonStrokingColorSpace extends OperatorProcessor +{ + private static final float[] EMPTY_FLOAT_ARRAY = new float[0]; + + /** + * cs Set color space for non stroking operations. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { +// (PDF 1.1) Set color space for stroking operations + COSName name = (COSName)arguments.get( 0 ); + PDColorSpace cs = (PDColorSpace)context.getColorSpaces().get( name.getName() ); + if( cs == null ) + { + cs = PDColorSpaceFactory.createColorSpace( name ); + } + PDColorSpaceInstance colorInstance = context.getGraphicsState().getNonStrokingColorSpace(); + colorInstance.setColorSpace( cs ); + int numComponents = cs.getNumberOfComponents(); + float[] values = EMPTY_FLOAT_ARRAY; + if( numComponents >= 0 ) + { + values = new float[numComponents]; + for( int i=0; iSet the non stroking color space.

+ * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.2 $ + */ +public class SetNonStrokingRGBColor extends OperatorProcessor +{ + /** + * rg Set color space for non stroking operations. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + PDColorSpace cs = PDDeviceRGB.INSTANCE; + PDColorSpaceInstance colorInstance = context.getGraphicsState().getNonStrokingColorSpace(); + colorInstance.setColorSpace( cs ); + float[] values = new float[3]; + for( int i=0; iStructal modification of the PDFEngine class : + * the long sequence of conditions in processOperator is remplaced by + * this strategy pattern.

+ * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.2 $ + */ +public class SetStrokingCMYKColor extends OperatorProcessor +{ + /** + * CS Set color space for stroking operations. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + PDColorSpace cs = PDDeviceCMYK.INSTANCE; + PDColorSpaceInstance colorInstance = context.getGraphicsState().getStrokingColorSpace(); + colorInstance.setColorSpace( cs ); + float[] values = new float[4]; + for( int i=0; iStructal modification of the PDFEngine class : + * the long sequence of conditions in processOperator is remplaced by + * this strategy pattern.

+ * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.3 $ + */ + +public class SetStrokingColorSpace extends OperatorProcessor +{ + private static final float[] EMPTY_FLOAT_ARRAY = new float[0]; + + /** + * CS Set color space for stroking operations. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + //(PDF 1.1) Set color space for stroking operations + COSName name = (COSName)arguments.get( 0 ); + PDColorSpace cs = (PDColorSpace)context.getColorSpaces().get( name.getName() ); + if( cs == null ) + { + cs = PDColorSpaceFactory.createColorSpace( name ); + } + PDColorSpaceInstance colorInstance = context.getGraphicsState().getStrokingColorSpace(); + colorInstance.setColorSpace( cs ); + int numComponents = cs.getNumberOfComponents(); + float[] values = EMPTY_FLOAT_ARRAY; + if( numComponents >= 0 ) + { + values = new float[numComponents]; + for( int i=0; iStructal modification of the PDFEngine class : + * the long sequence of conditions in processOperator is remplaced by + * this strategy pattern.

+ * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.2 $ + */ +public class SetStrokingRGBColor extends OperatorProcessor +{ + /** + * RG Set color space for stroking operations. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + PDColorSpace cs = PDDeviceRGB.INSTANCE; + PDColorSpaceInstance colorInstance = context.getGraphicsState().getStrokingColorSpace(); + colorInstance.setColorSpace( cs ); + float[] values = new float[3]; + for( int i=0; iTitre : PDFEngine Modification.

+ *

Description : Structal modification of the PDFEngine class : + * the long sequence of conditions in processOperator is remplaced by + * this strategy pattern

+ *

Copyright : Copyright (c) 2004

+ *

Société : DBGS

+ * @author Huault : huault@free.fr + * @version $Revision: 1.4 $ + */ + +public class SetTextFont extends OperatorProcessor +{ + private static final Logger LOG = Logger.getLogger(SetTextFont.class); + + /** + * Tf selectfont Set text font and size. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + //there are some documents that are incorrectly structured and + //arguments are in the wrong spot, so we will silently ignore them + //if there are no arguments + if( arguments.size() >= 2 ) + { + //set font and size + COSName fontName = (COSName)arguments.get( 0 ); + float fontSize = ((COSNumber)arguments.get( 1 ) ).floatValue(); + context.getGraphicsState().getTextState().setFontSize( fontSize ); + + if (LOG.isDebugEnabled()) + { + LOG.debug(""); + } + + //old way + //graphicsState.getTextState().getFont() = (COSObject)stream.getDictionaryObject( fontName ); + //if( graphicsState.getTextState().getFont() == null ) + //{ + // graphicsState.getTextState().getFont() = (COSObject)graphicsState.getTextState().getFont() + // Dictionary.getItem( fontName ); + //} + context.getGraphicsState().getTextState().setFont( (PDFont)context.getFonts().get( fontName.getName() ) ); + if( context.getGraphicsState().getTextState().getFont() == null ) + { + throw new IOException( "Error: Could not find font(" + fontName + ") in map=" + context.getFonts() ); + } + } + } + +} diff --git a/src/main/java/org/pdfbox/util/operator/SetTextLeading.java b/src/main/java/org/pdfbox/util/operator/SetTextLeading.java new file mode 100644 index 0000000..928172b --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/SetTextLeading.java @@ -0,0 +1,69 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import java.util.List; +import org.apache.log4j.Logger; +import org.pdfbox.cos.COSNumber; +import org.pdfbox.util.PDFOperator; + +/** + *

Titre : PDFEngine Modification.

+ *

Description : Structal modification of the PDFEngine class : +* the long sequence of conditions in processOperator is remplaced +* by this strategy pattern

+ *

Copyright : Copyright (c) 2004

+ *

Société : DBGS

+ * @author Huault : huault@free.fr + * @version $Revision: 1.3 $ + */ + +public class SetTextLeading extends OperatorProcessor +{ + + private static final Logger LOG = Logger.getLogger(SetTextLeading.class); + + /** + * TL Set text leading. + * @param operator The operator that is being executed. + * @param arguments List + */ + public void process(PDFOperator operator, List arguments) + { + COSNumber leading = (COSNumber)arguments.get( 0 ); + context.getGraphicsState().getTextState().setLeading( leading.floatValue() ); + if (LOG.isDebugEnabled()) + { + LOG.debug(""); + } + } + +} diff --git a/src/main/java/org/pdfbox/util/operator/SetTextRenderingMode.java b/src/main/java/org/pdfbox/util/operator/SetTextRenderingMode.java new file mode 100644 index 0000000..19da1e2 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/SetTextRenderingMode.java @@ -0,0 +1,64 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import java.util.List; +import org.apache.log4j.Logger; +import org.pdfbox.cos.COSNumber; +import org.pdfbox.util.PDFOperator; + +import java.io.IOException; + +/** + *

Structal modification of the PDFEngine class : + * the long sequence of conditions in processOperator is remplaced by + * this strategy pattern.

+ * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.3 $ + */ + +public class SetTextRenderingMode extends OperatorProcessor +{ + private static final Logger LOG = Logger.getLogger(SetTextFont.class); + + /** + * Tr Set text rendering mode. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + COSNumber mode = (COSNumber)arguments.get( 0 ); + context.getGraphicsState().getTextState().setRenderingMode( mode.intValue() ); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/SetTextRise.java b/src/main/java/org/pdfbox/util/operator/SetTextRise.java new file mode 100644 index 0000000..0d1e884 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/SetTextRise.java @@ -0,0 +1,64 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import java.util.List; +import org.apache.log4j.Logger; +import org.pdfbox.cos.COSNumber; +import org.pdfbox.util.PDFOperator; + +import java.io.IOException; + +/** + *

Structal modification of the PDFEngine class : + * the long sequence of conditions in processOperator is remplaced by + * this strategy pattern.

+ * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.3 $ + */ + +public class SetTextRise extends OperatorProcessor +{ + private static final Logger LOG = Logger.getLogger(SetTextFont.class); + + /** + * Ts Set text rise. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + COSNumber rise = (COSNumber)arguments.get(0); + context.getGraphicsState().getTextState().setRise( rise.floatValue() ); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/SetWordSpacing.java b/src/main/java/org/pdfbox/util/operator/SetWordSpacing.java new file mode 100644 index 0000000..3a1e176 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/SetWordSpacing.java @@ -0,0 +1,68 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import java.util.List; +import org.apache.log4j.Logger; +import org.pdfbox.cos.COSNumber; +import org.pdfbox.util.PDFOperator; + +/** + *

Titre : PDFEngine Modification.

+ *

Description : Structal modification of the PDFEngine class : the long sequence of + * conditions in processOperator is remplaced by this strategy pattern

+ *

Copyright : Copyright (c) 2004

+ *

Société : DBGS

+ * @author Huault : huault@free.fr + * @version $Revision: 1.3 $ + */ + +public class SetWordSpacing extends OperatorProcessor +{ + private static final Logger LOG = Logger.getLogger(SetWordSpacing.class); + + /** + * Tw Set word spacing. + * @param operator The operator that is being executed. + * @param arguments List + */ + public void process(PDFOperator operator, List arguments) + { + //set word spacing + COSNumber wordSpacing = (COSNumber)arguments.get( 0 ); + if (LOG.isDebugEnabled()) + { + LOG.debug(""); + } + context.getGraphicsState().getTextState().setWordSpacing( wordSpacing.floatValue() ); + } + +} diff --git a/src/main/java/org/pdfbox/util/operator/ShowText.java b/src/main/java/org/pdfbox/util/operator/ShowText.java new file mode 100644 index 0000000..1889259 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/ShowText.java @@ -0,0 +1,73 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import java.util.List; +import org.apache.log4j.Logger; +import org.pdfbox.cos.COSString; +import org.pdfbox.util.PDFOperator; + +import java.io.IOException; + +/** + *

Titre : PDFEngine Modification.

+ *

Description : Structal modification of the PDFEngine class : + * the long sequence of conditions in processOperator + * is remplaced by this strategy pattern

+ *

Copyright : Copyright (c) 2004

+ *

Société : DBGS

+ * @author Huault : huault@free.fr + * @version $Revision: 1.3 $ + */ + +public class ShowText extends OperatorProcessor +{ + + private static final Logger LOG = Logger.getLogger(ShowText.class); + + /** + * Tj show Show text. + * @param operator The operator that is being executed. + * @param arguments List + * + * @throws IOException If there is an error processing this operator. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + COSString string = (COSString)arguments.get( 0 ); + context.showString( string.getBytes() ); + if (LOG.isDebugEnabled()) + { + LOG.debug(""); + } + } + +} diff --git a/src/main/java/org/pdfbox/util/operator/ShowTextGlyph.java b/src/main/java/org/pdfbox/util/operator/ShowTextGlyph.java new file mode 100644 index 0000000..cf22e11 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/ShowTextGlyph.java @@ -0,0 +1,99 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator; + +import java.util.List; +import org.apache.log4j.Logger; +import org.pdfbox.util.Matrix; +import org.pdfbox.util.PDFOperator; +import org.pdfbox.cos.COSArray; +import org.pdfbox.cos.COSBase; +import org.pdfbox.cos.COSNumber; +import java.io.IOException; +import org.pdfbox.cos.COSString; + +/** + *

Titre : PDFEngine Modification.

+ *

Description : Structal modification of the PDFEngine class : the long sequence of + * conditions in processOperator is remplaced by this strategy pattern

+ *

Copyright : Copyright (c) 2004

+ *

Société : DBGS

+ * @author Huault : huault@free.fr + * @version $Revision: 1.5 $ + */ + +public class ShowTextGlyph extends OperatorProcessor +{ + private static final Logger LOG = Logger.getLogger(ShowTextGlyph.class); + + /** + * TJ Show text, allowing individual glyph positioning. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If there is an error processing this operator. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + COSArray array = (COSArray)arguments.get( 0 ); + float adjustment=0; + for( int i=0; i + + + + + +This package contains implementations of all of the PDF operators. + + diff --git a/src/main/java/org/pdfbox/util/operator/pagedrawer/AppendRectangleToPath.java b/src/main/java/org/pdfbox/util/operator/pagedrawer/AppendRectangleToPath.java new file mode 100644 index 0000000..7d2d49f --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/pagedrawer/AppendRectangleToPath.java @@ -0,0 +1,77 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator.pagedrawer; + +import java.awt.geom.Rectangle2D; +import java.util.List; + +import org.pdfbox.cos.COSNumber; +import org.pdfbox.pdfviewer.PageDrawer; +import org.pdfbox.util.PDFOperator; +import org.pdfbox.util.operator.OperatorProcessor; + +/** + * Implementation of content stream operator for page drawer. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class AppendRectangleToPath extends OperatorProcessor +{ + + + /** + * process : re : append rectangle to path. + * @param operator The operator that is being executed. + * @param arguments List + */ + public void process(PDFOperator operator, List arguments) + { + PageDrawer drawer = (PageDrawer)context; + + COSNumber x = (COSNumber)arguments.get( 0 ); + COSNumber y = (COSNumber)arguments.get( 1 ); + COSNumber w = (COSNumber)arguments.get( 2 ); + COSNumber h = (COSNumber)arguments.get( 3 ); + Rectangle2D rect = new Rectangle2D.Double( + x.doubleValue(), + drawer.fixY( x.doubleValue(), y.doubleValue())-h.doubleValue(), + w.doubleValue()+1, + h.doubleValue()+1); + drawer.getLinePath().reset(); + + drawer.getLinePath().append( rect, false ); + //graphics.drawRect((int)x.doubleValue(), (int)(pageSize.getHeight() - y.doubleValue()), + // (int)w.doubleValue(),(int)h.doubleValue() ); + //System.out.println( "" ); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/pagedrawer/BeginInlineImage.java b/src/main/java/org/pdfbox/util/operator/pagedrawer/BeginInlineImage.java new file mode 100644 index 0000000..9031345 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/pagedrawer/BeginInlineImage.java @@ -0,0 +1,114 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator.pagedrawer; + +import java.awt.Graphics2D; +import java.awt.geom.AffineTransform; +import java.awt.image.BufferedImage; +import java.io.IOException; +import java.util.List; + +import org.pdfbox.pdfviewer.PageDrawer; +import org.pdfbox.pdmodel.graphics.xobject.PDInlinedImage; +import org.pdfbox.util.ImageParameters; +import org.pdfbox.util.Matrix; +import org.pdfbox.util.PDFOperator; +import org.pdfbox.util.operator.OperatorProcessor; + +/** + * Implementation of content stream operator for page drawer. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class BeginInlineImage extends OperatorProcessor +{ + + + /** + * process : BI : begin inline image. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If there is an error displaying the inline image. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + PageDrawer drawer = (PageDrawer)context; + Graphics2D graphics = drawer.getGraphics(); + //begin inline image object + ImageParameters params = operator.getImageParameters(); + PDInlinedImage image = new PDInlinedImage(); + image.setImageParameters( params ); + image.setImageData( operator.getImageData() ); + BufferedImage awtImage = image.createImage(); + + Matrix ctm = drawer.getGraphicsState().getCurrentTransformationMatrix(); + + int width = awtImage.getWidth(); + int height = awtImage.getHeight(); + + + AffineTransform at = new AffineTransform( + ctm.getValue(0,0)/width, + ctm.getValue(0,1), + ctm.getValue(1,0), + ctm.getValue(1,1)/height, + ctm.getValue(2,0), + ctm.getValue(2,1) + ); + //at.setToRotation((double)page.getRotation()); + + + // The transformation should be done + // 1 - Translation + // 2 - Rotation + // 3 - Scale or Skew + //AffineTransform at = new AffineTransform(); + + // Translation + //at = new AffineTransform(); + //at.setToTranslation((double)ctm.getValue(0,0), + // (double)ctm.getValue(0,1)); + + // Rotation + //AffineTransform toAdd = new AffineTransform(); + //toAdd.setToRotation(1.5705); + //toAdd.setToRotation(ctm.getValue(2,0)*(Math.PI/180)); + //at.concatenate(toAdd); + + // Scale / Skew? + //toAdd.setToScale(width, height); + //at.concatenate(toAdd); + //at.setToScale( width, height ); + graphics.drawImage( awtImage, at, null ); + //graphics.drawImage( awtImage,0,0, width,height,null); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/pagedrawer/ClosePath.java b/src/main/java/org/pdfbox/util/operator/pagedrawer/ClosePath.java new file mode 100644 index 0000000..418af6d --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/pagedrawer/ClosePath.java @@ -0,0 +1,59 @@ +/** + * Copyright (c) 2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator.pagedrawer; + +import java.util.List; + +import org.pdfbox.pdfviewer.PageDrawer; +import org.pdfbox.util.PDFOperator; +import org.pdfbox.util.operator.OperatorProcessor; + +/** + * Implementation of content stream operator for page drawer. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class ClosePath extends OperatorProcessor +{ + + + /** + * process : h : Close path. + * @param operator The operator that is being executed. + * @param arguments List + */ + public void process(PDFOperator operator, List arguments) + { + PageDrawer drawer = (PageDrawer)context; + drawer.getLinePath().closePath(); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/pagedrawer/CurveTo.java b/src/main/java/org/pdfbox/util/operator/pagedrawer/CurveTo.java new file mode 100644 index 0000000..c056b91 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/pagedrawer/CurveTo.java @@ -0,0 +1,73 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator.pagedrawer; + +import java.util.List; + +import org.pdfbox.cos.COSNumber; +import org.pdfbox.pdfviewer.PageDrawer; +import org.pdfbox.util.PDFOperator; +import org.pdfbox.util.operator.OperatorProcessor; + +/** + * Implementation of content stream operator for page drawer. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class CurveTo extends OperatorProcessor +{ + + + /** + * process : c : Append curved segment to path. + * @param operator The operator that is being executed. + * @param arguments List + */ + public void process(PDFOperator operator, List arguments) + { + PageDrawer drawer = (PageDrawer)context; + + COSNumber x1 = (COSNumber)arguments.get( 0 ); + COSNumber y1 = (COSNumber)arguments.get( 1 ); + COSNumber x2 = (COSNumber)arguments.get( 2 ); + COSNumber y2 = (COSNumber)arguments.get( 3 ); + COSNumber x3 = (COSNumber)arguments.get( 4 ); + COSNumber y3 = (COSNumber)arguments.get( 5 ); + float x1f = x1.floatValue(); + float y1f = (float)drawer.fixY( x1f, y1.floatValue() ); + float x2f = x2.floatValue(); + float y2f = (float)drawer.fixY( x2f, y2.floatValue() ); + float x3f = x3.floatValue(); + float y3f = (float)drawer.fixY( x3f, y3.floatValue() ); + drawer.getLinePath().curveTo(x1f,y1f,x2f,y2f,x3f,y3f); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/pagedrawer/CurveToReplicateFinalPoint.java b/src/main/java/org/pdfbox/util/operator/pagedrawer/CurveToReplicateFinalPoint.java new file mode 100644 index 0000000..670cdb3 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/pagedrawer/CurveToReplicateFinalPoint.java @@ -0,0 +1,69 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator.pagedrawer; + +import java.util.List; + +import org.pdfbox.cos.COSNumber; +import org.pdfbox.pdfviewer.PageDrawer; +import org.pdfbox.util.PDFOperator; +import org.pdfbox.util.operator.OperatorProcessor; + +/** + * Implementation of content stream operator for page drawer. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class CurveToReplicateFinalPoint extends OperatorProcessor +{ + + + /** + * process : y : Append curved segment to path (final point replicated). + * @param operator The operator that is being executed. + * @param arguments List + */ + public void process(PDFOperator operator, List arguments) + { + PageDrawer drawer = (PageDrawer)context; + + COSNumber x1 = (COSNumber)arguments.get( 0 ); + COSNumber y1 = (COSNumber)arguments.get( 1 ); + COSNumber x3 = (COSNumber)arguments.get( 2 ); + COSNumber y3 = (COSNumber)arguments.get( 3 ); + float x1f = x1.floatValue(); + float y1f = (float)drawer.fixY( x1f, y1.floatValue() ); + float x3f = x3.floatValue(); + float y3f = (float)drawer.fixY( x3f, y3.floatValue() ); + drawer.getLinePath().curveTo(x1f,y1f,x3f,y3f,x3f,y3f); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/pagedrawer/CurveToReplicateInitialPoint.java b/src/main/java/org/pdfbox/util/operator/pagedrawer/CurveToReplicateInitialPoint.java new file mode 100644 index 0000000..ff354d0 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/pagedrawer/CurveToReplicateInitialPoint.java @@ -0,0 +1,76 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator.pagedrawer; + +import java.awt.geom.GeneralPath; +import java.awt.geom.Point2D; +import java.util.List; + +import org.pdfbox.cos.COSNumber; +import org.pdfbox.pdfviewer.PageDrawer; +import org.pdfbox.util.PDFOperator; +import org.pdfbox.util.operator.OperatorProcessor; + +/** + * Implementation of content stream operator for page drawer. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class CurveToReplicateInitialPoint extends OperatorProcessor +{ + + + /** + * process : v : Append curved segment to path (initial point replicated). + * @param operator The operator that is being executed. + * @param arguments List + */ + public void process(PDFOperator operator, List arguments) + { + PageDrawer drawer = (PageDrawer)context; + + COSNumber x2 = (COSNumber)arguments.get( 0 ); + COSNumber y2 = (COSNumber)arguments.get( 1 ); + COSNumber x3 = (COSNumber)arguments.get( 2 ); + COSNumber y3 = (COSNumber)arguments.get( 3 ); + float x2f = x2.floatValue(); + float y2f = (float)drawer.fixY( x2f, y2.floatValue() ); + float x3f = x3.floatValue(); + float y3f = (float)drawer.fixY( x3f, y3.floatValue() ); + + GeneralPath path = drawer.getLinePath(); + Point2D currentPoint = path.getCurrentPoint(); + float currentX = (float)currentPoint.getX(); + float currentY = (float)currentPoint.getY(); + drawer.getLinePath().curveTo(currentX,currentY,x2f,y2f,x3f,y3f); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/pagedrawer/FillEvenOddRule.java b/src/main/java/org/pdfbox/util/operator/pagedrawer/FillEvenOddRule.java new file mode 100644 index 0000000..da9e834 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/pagedrawer/FillEvenOddRule.java @@ -0,0 +1,71 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator.pagedrawer; + +import java.awt.Graphics2D; +import java.awt.RenderingHints; +import java.awt.geom.GeneralPath; +import java.util.List; + +import org.pdfbox.pdfviewer.PageDrawer; +import org.pdfbox.util.PDFOperator; +import org.pdfbox.util.operator.OperatorProcessor; + +/** + * Implementation of content stream operator for page drawer. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class FillEvenOddRule extends OperatorProcessor +{ + + + /** + * process : f* : fill path using even odd rule. + * @param operator The operator that is being executed. + * @param arguments List + */ + public void process(PDFOperator operator, List arguments) + { +// NOTE:changes here should probably also be made to FillNonZeroRule + PageDrawer drawer = (PageDrawer)context; + Graphics2D graphics = drawer.getGraphics(); + //linePath.closePath(); + graphics.setColor( drawer.getNonStrokingColor() ); + drawer.getLinePath().setWindingRule( GeneralPath.WIND_EVEN_ODD ); + graphics.setRenderingHint( RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_OFF ); + //else + //{ + graphics.fill( drawer.getLinePath() ); + //} + } +} diff --git a/src/main/java/org/pdfbox/util/operator/pagedrawer/FillNonZeroRule.java b/src/main/java/org/pdfbox/util/operator/pagedrawer/FillNonZeroRule.java new file mode 100644 index 0000000..56efa33 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/pagedrawer/FillNonZeroRule.java @@ -0,0 +1,71 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator.pagedrawer; + +import java.awt.Graphics2D; +import java.awt.RenderingHints; +import java.awt.geom.GeneralPath; +import java.util.List; + +import org.pdfbox.pdfviewer.PageDrawer; +import org.pdfbox.util.PDFOperator; +import org.pdfbox.util.operator.OperatorProcessor; + +/** + * Implementation of content stream operator for page drawer. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class FillNonZeroRule extends OperatorProcessor +{ + + + /** + * process : F/f : fill path using non zero winding rule. + * @param operator The operator that is being executed. + * @param arguments List + */ + public void process(PDFOperator operator, List arguments) + { + //NOTE:changes here should probably also be made to FillEvenOddRule + PageDrawer drawer = (PageDrawer)context; + Graphics2D graphics = drawer.getGraphics(); + //linePath.closePath(); + graphics.setColor( drawer.getNonStrokingColor() ); + drawer.getLinePath().setWindingRule( GeneralPath.WIND_NON_ZERO ); + graphics.setRenderingHint( RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_OFF ); + //else + //{ + graphics.fill( drawer.getLinePath() ); + //} + } +} diff --git a/src/main/java/org/pdfbox/util/operator/pagedrawer/Invoke.java b/src/main/java/org/pdfbox/util/operator/pagedrawer/Invoke.java new file mode 100644 index 0000000..3d99b74 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/pagedrawer/Invoke.java @@ -0,0 +1,180 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator.pagedrawer; + +import java.awt.Dimension; +import java.awt.Graphics2D; +import java.awt.geom.AffineTransform; +import java.awt.image.BufferedImage; +import java.io.IOException; +import java.util.List; +import java.util.Map; + +import org.apache.log4j.Logger; +import org.pdfbox.cos.COSName; +import org.pdfbox.pdfviewer.PageDrawer; +import org.pdfbox.pdmodel.PDPage; +import org.pdfbox.pdmodel.graphics.xobject.PDXObject; +import org.pdfbox.pdmodel.graphics.xobject.PDXObjectImage; +import org.pdfbox.util.Matrix; +import org.pdfbox.util.PDFOperator; +import org.pdfbox.util.operator.OperatorProcessor; + +/** + * Implementation of content stream operator for page drawer. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class Invoke extends OperatorProcessor +{ + private static Logger log = Logger.getLogger( Invoke.class ); + + /** + * process : re : append rectangle to path. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If there is an error invoking the sub object. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + PageDrawer drawer = (PageDrawer)context; + PDPage page = drawer.getPage(); + Dimension pageSize = drawer.getPageSize(); + Graphics2D graphics = drawer.getGraphics(); + COSName objectName = (COSName)arguments.get( 0 ); + Map xobjects = drawer.getResources().getXObjects(); + PDXObject xobject = (PDXObject)xobjects.get( objectName.getName() ); + if( xobject instanceof PDXObjectImage ) + { + PDXObjectImage image = (PDXObjectImage)xobject; + try + { + BufferedImage awtImage = image.getRGBImage(); + Matrix ctm = drawer.getGraphicsState().getCurrentTransformationMatrix(); + + int width = awtImage.getWidth(); + int height = awtImage.getHeight(); + + double rotationInRadians =(page.findRotation() * Math.PI)/180; + + + AffineTransform rotation = new AffineTransform(); + rotation.setToRotation( rotationInRadians ); + AffineTransform rotationInverse = rotation.createInverse(); + Matrix rotationInverseMatrix = new Matrix(); + rotationInverseMatrix.setFromAffineTransform( rotationInverse ); + Matrix rotationMatrix = new Matrix(); + rotationMatrix.setFromAffineTransform( rotation ); + + Matrix unrotatedCTM = ctm.multiply( rotationInverseMatrix ); + + Matrix scalingParams = unrotatedCTM.extractScaling(); + Matrix scalingMatrix = Matrix.getScaleInstance(1f/width,1f/height); + scalingParams = scalingParams.multiply( scalingMatrix ); + + Matrix translationParams = unrotatedCTM.extractTranslating(); + Matrix translationMatrix = null; + int pageRotation = page.findRotation(); + if( pageRotation == 0 ) + { + translationParams.setValue(2,1, -translationParams.getValue( 2,1 )); + translationMatrix = Matrix.getTranslatingInstance( + 0, (float)pageSize.getHeight()-height*scalingParams.getYScale() ); + } + else if( pageRotation == 90 ) + { + translationMatrix = Matrix.getTranslatingInstance( 0, (float)pageSize.getHeight() ); + } + else + { + //TODO need to figure out other cases + } + translationParams = translationParams.multiply( translationMatrix ); + + AffineTransform at = new AffineTransform( + scalingParams.getValue( 0,0), 0, + 0, scalingParams.getValue( 1, 1), + translationParams.getValue(2,0), translationParams.getValue( 2,1 ) + ); + + + + + //at.setToTranslation( pageSize.getHeight()-ctm.getValue(2,0),ctm.getValue(2,1) ); + //at.setToScale( ctm.getValue(0,0)/width, ctm.getValue(1,1)/height); + //at.setToRotation( (page.findRotation() * Math.PI)/180 ); + + + + //AffineTransform rotation = new AffineTransform(); + //rotation.rotate( (90*Math.PI)/180); + + /* + + // The transformation should be done + // 1 - Translation + // 2 - Rotation + // 3 - Scale or Skew + AffineTransform at = new AffineTransform(); + + // Translation + at = new AffineTransform(); + //at.setToTranslation((double)ctm.getValue(0,0), + // (double)ctm.getValue(0,1)); + + // Rotation + //AffineTransform toAdd = new AffineTransform(); + toAdd.setToRotation(1.5705); + toAdd.setToRotation(ctm.getValue(2,0)*(Math.PI/180)); + at.concatenate(toAdd); + */ + + // Scale / Skew? + //toAdd.setToScale(1, 1); + //at.concatenate(toAdd); + + graphics.drawImage( awtImage, at, null ); + } + catch( Exception e ) + { + e.printStackTrace(); + } + } + else + { + log.warn( "Unknown xobject type:" + xobject ); + } + + + //invoke named object. + } +} diff --git a/src/main/java/org/pdfbox/util/operator/pagedrawer/LineTo.java b/src/main/java/org/pdfbox/util/operator/pagedrawer/LineTo.java new file mode 100644 index 0000000..a70099b --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/pagedrawer/LineTo.java @@ -0,0 +1,65 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator.pagedrawer; + +import java.util.List; + +import org.pdfbox.cos.COSNumber; +import org.pdfbox.pdfviewer.PageDrawer; +import org.pdfbox.util.PDFOperator; +import org.pdfbox.util.operator.OperatorProcessor; + +/** + * Implementation of content stream operator for page drawer. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class LineTo extends OperatorProcessor +{ + + + /** + * process : l : Append straight line segment to path. + * @param operator The operator that is being executed. + * @param arguments List + */ + public void process(PDFOperator operator, List arguments) + { + PageDrawer drawer = (PageDrawer)context; + + //append straight line segment from the current point to the point. + COSNumber x = (COSNumber)arguments.get( 0 ); + COSNumber y = (COSNumber)arguments.get( 1 ); + + drawer.getLinePath().lineTo( x.floatValue(), (float)drawer.fixY( x.doubleValue(), y.doubleValue()) ); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/pagedrawer/MoveTo.java b/src/main/java/org/pdfbox/util/operator/pagedrawer/MoveTo.java new file mode 100644 index 0000000..e1c8f1e --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/pagedrawer/MoveTo.java @@ -0,0 +1,68 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator.pagedrawer; + +import java.awt.geom.GeneralPath; +import java.util.List; + +import org.pdfbox.cos.COSNumber; +import org.pdfbox.pdfviewer.PageDrawer; +import org.pdfbox.util.PDFOperator; +import org.pdfbox.util.operator.OperatorProcessor; + +/** + * Implementation of content stream operator for page drawer. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class MoveTo extends OperatorProcessor +{ + + + /** + * process : m : Begin new subpath. + * @param operator The operator that is being executed. + * @param arguments List + */ + public void process(PDFOperator operator, List arguments) + { + PageDrawer drawer = (PageDrawer)context; + + COSNumber x = (COSNumber)arguments.get( 0 ); + COSNumber y = (COSNumber)arguments.get( 1 ); + + drawer.getLineSubPaths().add( drawer.getLinePath() ); + GeneralPath newPath = new GeneralPath(); + newPath.moveTo( x.floatValue(), (float)drawer.fixY( x.doubleValue(), y.doubleValue()) ); + drawer.setLinePath( newPath ); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/pagedrawer/SetLineWidth.java b/src/main/java/org/pdfbox/util/operator/pagedrawer/SetLineWidth.java new file mode 100644 index 0000000..4217451 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/pagedrawer/SetLineWidth.java @@ -0,0 +1,65 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator.pagedrawer; + +import java.util.List; +import org.pdfbox.pdfviewer.PageDrawer; +import org.pdfbox.util.PDFOperator; + +import java.awt.BasicStroke; +import java.io.IOException; + +/** + * Implementation of content stream operator for page drawer. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class SetLineWidth extends org.pdfbox.util.operator.SetLineWidth +{ + + /** + * w Set line width. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + super.process( operator, arguments ); + float lineWidth = (float)context.getGraphicsState().getLineWidth(); + if( lineWidth == 0 ) + { + lineWidth = 1; + } + ((PageDrawer)context).getGraphics().setStroke( new BasicStroke( lineWidth ) ); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/pagedrawer/SetNonStrokingCMYKColor.java b/src/main/java/org/pdfbox/util/operator/pagedrawer/SetNonStrokingCMYKColor.java new file mode 100644 index 0000000..d1c37fe --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/pagedrawer/SetNonStrokingCMYKColor.java @@ -0,0 +1,64 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator.pagedrawer; + +import java.util.List; + +import java.awt.Color; +import java.io.IOException; + +import org.pdfbox.pdfviewer.PageDrawer; +import org.pdfbox.pdmodel.graphics.color.PDColorSpaceInstance; +import org.pdfbox.util.PDFOperator; + +/** + * Implementation of content stream operator for page drawer. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class SetNonStrokingCMYKColor extends org.pdfbox.util.operator.SetNonStrokingCMYKColor +{ + /** + * k Set color space for non stroking operations. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + super.process( operator, arguments ); + PageDrawer drawer = (PageDrawer)context; + PDColorSpaceInstance colorInstance = drawer.getGraphicsState().getNonStrokingColorSpace(); + Color color = colorInstance.createColor(); + drawer.setNonStrokingColor( color ); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/pagedrawer/SetNonStrokingColorSpace.java b/src/main/java/org/pdfbox/util/operator/pagedrawer/SetNonStrokingColorSpace.java new file mode 100644 index 0000000..e5abae8 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/pagedrawer/SetNonStrokingColorSpace.java @@ -0,0 +1,71 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator.pagedrawer; + +import java.util.List; +import org.pdfbox.pdfviewer.PageDrawer; +import org.pdfbox.pdmodel.graphics.color.PDColorSpaceInstance; +import org.pdfbox.util.PDFOperator; + + +import java.awt.Color; +import java.io.IOException; + +/** + * Implementation of content stream operator for page drawer. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class SetNonStrokingColorSpace extends org.pdfbox.util.operator.SetNonStrokingColorSpace +{ + /** + * cs Set color space for non stroking operations. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + try + { + super.process( operator, arguments ); + PageDrawer drawer = (PageDrawer)context; + PDColorSpaceInstance colorInstance = drawer.getGraphicsState().getNonStrokingColorSpace(); + Color color = colorInstance.createColor(); + drawer.setNonStrokingColor( color ); + } + catch( IOException e ) + { + //ignore for now and continue drawing + } + } +} diff --git a/src/main/java/org/pdfbox/util/operator/pagedrawer/SetNonStrokingRGBColor.java b/src/main/java/org/pdfbox/util/operator/pagedrawer/SetNonStrokingRGBColor.java new file mode 100644 index 0000000..f816d1f --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/pagedrawer/SetNonStrokingRGBColor.java @@ -0,0 +1,65 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator.pagedrawer; + +import java.util.List; + +import java.awt.Color; +import java.io.IOException; + +import org.pdfbox.cos.COSNumber; +import org.pdfbox.pdfviewer.PageDrawer; +import org.pdfbox.util.PDFOperator; + +/** + * Implementation of content stream operator for page drawer. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class SetNonStrokingRGBColor extends org.pdfbox.util.operator.SetNonStrokingRGBColor +{ + /** + * rg Set color space for non stroking operations. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + super.process( operator, arguments ); + PageDrawer drawer = (PageDrawer)context; + COSNumber r = (COSNumber)arguments.get( 0 ); + COSNumber g = (COSNumber)arguments.get( 1 ); + COSNumber b = (COSNumber)arguments.get( 2 ); + drawer.setNonStrokingColor( new Color( r.floatValue(), g.floatValue(), b.floatValue() ) ); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/pagedrawer/SetStrokingCMYKColor.java b/src/main/java/org/pdfbox/util/operator/pagedrawer/SetStrokingCMYKColor.java new file mode 100644 index 0000000..81fcef3 --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/pagedrawer/SetStrokingCMYKColor.java @@ -0,0 +1,64 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator.pagedrawer; + +import java.util.List; + +import java.awt.Color; +import java.io.IOException; + +import org.pdfbox.pdfviewer.PageDrawer; +import org.pdfbox.pdmodel.graphics.color.PDColorSpaceInstance; +import org.pdfbox.util.PDFOperator; + +/** + * Implementation of content stream operator for page drawer. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class SetStrokingCMYKColor extends org.pdfbox.util.operator.SetStrokingCMYKColor +{ + /** + * CS Set color space for stroking operations. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + super.process( operator, arguments ); + PageDrawer drawer = (PageDrawer)context; + PDColorSpaceInstance colorInstance = drawer.getGraphicsState().getNonStrokingColorSpace(); + Color color = colorInstance.createColor(); + drawer.setStrokingColor( color ); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/pagedrawer/SetStrokingColorSpace.java b/src/main/java/org/pdfbox/util/operator/pagedrawer/SetStrokingColorSpace.java new file mode 100644 index 0000000..22c9c5c --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/pagedrawer/SetStrokingColorSpace.java @@ -0,0 +1,70 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator.pagedrawer; + +import java.util.List; +import org.pdfbox.pdfviewer.PageDrawer; +import org.pdfbox.pdmodel.graphics.color.PDColorSpaceInstance; +import org.pdfbox.util.PDFOperator; + +import java.awt.Color; +import java.io.IOException; + +/** + * Implementation of content stream operator for page drawer. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class SetStrokingColorSpace extends org.pdfbox.util.operator.SetNonStrokingColorSpace +{ + /** + * CS Set color space for stroking operations. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + super.process( operator, arguments ); + try + { + PageDrawer drawer = (PageDrawer)context; + PDColorSpaceInstance colorInstance = drawer.getGraphicsState().getNonStrokingColorSpace(); + Color color = colorInstance.createColor(); + drawer.setStrokingColor( color ); + } + catch( IOException e ) + { + //ignore for now and continue drawing + } + } +} diff --git a/src/main/java/org/pdfbox/util/operator/pagedrawer/SetStrokingRGBColor.java b/src/main/java/org/pdfbox/util/operator/pagedrawer/SetStrokingRGBColor.java new file mode 100644 index 0000000..8d7099e --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/pagedrawer/SetStrokingRGBColor.java @@ -0,0 +1,65 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator.pagedrawer; + +import java.util.List; + +import java.awt.Color; +import java.io.IOException; + +import org.pdfbox.cos.COSNumber; +import org.pdfbox.pdfviewer.PageDrawer; +import org.pdfbox.util.PDFOperator; + +/** + * Implementation of content stream operator for page drawer. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class SetStrokingRGBColor extends org.pdfbox.util.operator.SetStrokingRGBColor +{ + /** + * RG Set color space for stroking operations. + * @param operator The operator that is being executed. + * @param arguments List + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + super.process( operator, arguments ); + PageDrawer drawer = (PageDrawer)context; + COSNumber r = (COSNumber)arguments.get( 0 ); + COSNumber g = (COSNumber)arguments.get( 1 ); + COSNumber b = (COSNumber)arguments.get( 2 ); + drawer.setStrokingColor( new Color( r.floatValue(), g.floatValue(), b.floatValue() ) ); + } +} diff --git a/src/main/java/org/pdfbox/util/operator/pagedrawer/StrokePath.java b/src/main/java/org/pdfbox/util/operator/pagedrawer/StrokePath.java new file mode 100644 index 0000000..31a489c --- /dev/null +++ b/src/main/java/org/pdfbox/util/operator/pagedrawer/StrokePath.java @@ -0,0 +1,73 @@ +/** + * Copyright (c) 2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.util.operator.pagedrawer; + +import java.util.List; +import org.pdfbox.pdfviewer.PageDrawer; +import org.pdfbox.util.PDFOperator; + +import java.awt.Graphics2D; +import java.awt.geom.GeneralPath; +import java.io.IOException; + +/** + * Implementation of content stream operator for page drawer. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.1 $ + */ +public class StrokePath extends org.pdfbox.util.operator.SetLineWidth +{ + + /** + * S stroke the path. + * @param operator The operator that is being executed. + * @param arguments List + * + * @throws IOException If an error occurs while processing the font. + */ + public void process(PDFOperator operator, List arguments) throws IOException + { + PageDrawer drawer = (PageDrawer)context; + Graphics2D graphics = ((PageDrawer)context).getGraphics(); + graphics.setColor( drawer.getStrokingColor() ); + List subPaths = drawer.getLineSubPaths(); + for( int i=0; i + + + + + +This package contains implementations of all of the PDF operators. + + diff --git a/src/main/java/org/pdfbox/util/package.html b/src/main/java/org/pdfbox/util/package.html new file mode 100644 index 0000000..f8948fb --- /dev/null +++ b/src/main/java/org/pdfbox/util/package.html @@ -0,0 +1,9 @@ + + + + + + +This package contains utility classes that are used by the PDFBox project. + + -- cgit v1.2.3