/** * Copyright (c) 2006 by Know-Center, Graz, Austria * * This software is the confidential and proprietary information of Know-Center, * Graz, Austria. You shall not disclose such Confidential Information and shall * use it only in accordance with the terms of the license agreement you entered * into with Know-Center. * * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS * DERIVATIVES. * * $Id: PDFPage.java,v 1.5 2006/10/31 08:09:33 wprinz Exp $ */ package at.knowcenter.wag.egov.egiz.pdf; import java.io.IOException; import java.util.List; import java.util.Map; import org.apache.log4j.Logger; import org.pdfbox.cos.COSName; import org.pdfbox.cos.COSStream; import org.pdfbox.pdmodel.PDPage; import org.pdfbox.pdmodel.PDResources; import org.pdfbox.pdmodel.common.PDStream; import org.pdfbox.pdmodel.graphics.xobject.PDXObject; import org.pdfbox.pdmodel.graphics.xobject.PDXObjectForm; import org.pdfbox.util.Matrix; import org.pdfbox.util.PDFOperator; import org.pdfbox.util.PDFTextStripper; import org.pdfbox.util.TextPosition; import org.pdfbox.util.operator.OperatorProcessor; import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger; /** * PDFPage is an inner class that is used to calculate the page length of a PDF * Document page. It extends the PDFTextStripper class and implement one * interested method: {@link PDFPage#showCharacter(TextPosition)}
* This method is called when processing the FileStream. By calling the method * {@link org.pdfbox.util.PDFStreamEngine#processStream(org.pdfbox.pdmodel.PDPage, org.pdfbox.pdmodel.PDResources, org.pdfbox.cos.COSStream)} * the implemented method showCharacter is called. * * @author wlackner * @see PDFTextStripper */ public class PDFPage extends PDFTextStripper { /** * The logger definition. */ private static final Logger logger_ = ConfigLogger.getLogger(PDFPage.class); /** * The maximum (lowest) y position of a character. */ protected float max_character_ypos = Float.NEGATIVE_INFINITY; /** * The maximum (lowest y position of an image. */ protected float max_image_ypos = Float.NEGATIVE_INFINITY; /** * The y coordinate of the footer line. PDF elements below this footer line will not be regarded. */ protected float footer_line = 0.0f; /** * Constructor. * * @param footer_line The y coordinate of the footer line. PDF elements below this footer line will not be regarded. * * @throws IOException */ public PDFPage(float footer_line) throws IOException { super(); this.footer_line = footer_line; OperatorProcessor newInvoke = new MyInvoke(); newInvoke.setContext(this); operators.put("Do", newInvoke); } // /** // * You should override this method if you want to perform an action when a // * string is being shown. // * // * @param string The string to display. // * // * @throws IOException If there is an error showing the string // */ // public void showString( byte[] string ) throws IOException // { // float spaceWidth = 0; // float spacing = 0; // StringBuffer stringResult = new StringBuffer(string.length); // // float characterDisplacement = 0; // float spaceDisplacement = 0; // // PDGraphicsState graphicsState = getGraphicsState(); // float fontSize = graphicsState.getTextState().getFontSize(); // float horizontalScaling = // graphicsState.getTextState().getHorizontalScalingPercent()/100f; // float rise = graphicsState.getTextState().getRise(); // final float wordSpacing = graphicsState.getTextState().getWordSpacing(); // final float characterSpacing = // graphicsState.getTextState().getCharacterSpacing(); // float wordSpacingDisplacement = 0; // // PDFont font = graphicsState.getTextState().getFont(); // // //This will typically be 1000 but in the case of a type3 font // //this might be a different number // float glyphSpaceToTextSpaceFactor = 1f/font.getFontMatrix().getValue( 0, 0 // ); // Float averageWidth = (Float)fontToAverageWidths.get( font ); // if( averageWidth == null ) // { // averageWidth = new Float( font.getAverageFontWidth() ); // fontToAverageWidths.put( font, averageWidth ); // } // // Matrix initialMatrix = new Matrix(); // initialMatrix.setValue(0,0,1); // initialMatrix.setValue(0,1,0); // initialMatrix.setValue(0,2,0); // initialMatrix.setValue(1,0,0); // initialMatrix.setValue(1,1,1); // initialMatrix.setValue(1,2,0); // initialMatrix.setValue(2,0,0); // initialMatrix.setValue(2,1,rise); // initialMatrix.setValue(2,2,1); // // // //this // int codeLength = 1; // Matrix ctm = graphicsState.getCurrentTransformationMatrix(); // // //lets see what the space displacement should be // spaceDisplacement = (font.getFontWidth( SPACE_BYTES, 0, 1 // )/glyphSpaceToTextSpaceFactor); // if( spaceDisplacement == 0 ) // { // spaceDisplacement = // (averageWidth.floatValue()/glyphSpaceToTextSpaceFactor); // //The average space width appears to be higher than necessary // //so lets make it a little bit smaller. // spaceDisplacement *= .80f; // if( log.isDebugEnabled() ) // { // log.debug( "Font: Space From Average=" + spaceDisplacement ); // } // } // int pageRotation = page.findRotation(); // // // very strange.... the ctms are multiplied by right, but suddenly the // textM is multiplied from the left. // // but: PDF matrices are multiplied from left ==> ctm is wrong // Matrix trm = initialMatrix.multiply( textMatrix ).multiply( ctm ); // float x = trm.getValue(2,0); // float y = trm.getValue(2,1); // float flipped_y = -y + page.findMediaBox().getHeight(); // if( pageRotation == 0 ) // { // trm.setValue( 2,1, flipped_y ); // } // else if( pageRotation == 90 ) // { // trm.setValue( 2,0, y ); // trm.setValue( 2,1, x ); // } // else if( pageRotation == 270 ) // { // trm.setValue( 2,0, flipped_y ); // trm.setValue( 2,1, x ); // } // for( int i=0; i this.max_character_ypos) { this.max_character_ypos = current_y; //logger_.debug("text.character=" + character + ", y=" + current_y); // System.err.println(character + "|" + current_y); } //logger_.debug("text.character=" + character + ", y=" + current_y); // System.err.println(character + "|" + current_y); } // use this funtion getting an unsorted text output // public void showString(byte[] string) { // logger_.debug(new String(string)); // } /** * Returns the calculated page length. * * @return the max page length value */ public float getMaxPageLength() { float max_ypos = Float.NEGATIVE_INFINITY; if (this.max_character_ypos > this.max_image_ypos) { max_ypos = this.max_character_ypos; } else { max_ypos = this.max_image_ypos; } return max_ypos; } public class MyInvoke extends OperatorProcessor { public void process(PDFOperator operator, List arguments) throws IOException { COSName name = (COSName) arguments.get(0); //logger_.debug(""); // PDResources res = context.getResources(); Map xobjects = context.getXObjects(); PDXObject xobject = (PDXObject) xobjects.get(name.getName()); PDStream stream = xobject.getPDStream(); COSStream cos_stream = stream.getStream(); COSName subtype = (COSName) cos_stream.getDictionaryObject(COSName.SUBTYPE); if (subtype.equals(COSName.IMAGE)) { logger_.debug("XObject Image"); Matrix ctm = context.getGraphicsState().getCurrentTransformationMatrix(); logger_.debug("ctm = " + ctm); Pos [] coordinates = new Pos [] { new Pos(0, 0, 1), new Pos(1, 0, 1), new Pos(0, 1, 1), new Pos(1, 1, 1) }; Pos [] transformed_coordinates = transtormCoordinates(coordinates, ctm); float actual_lowest_point = Float.NaN; int pageRotation = page.findRotation(); logger_.debug("PageRotation = " + pageRotation); if (pageRotation == 0) { float min_y = findMinY(transformed_coordinates); logger_.debug("min_y = " + min_y); float page_height = page.findMediaBox().getHeight(); logger_.debug("page_height = " + page_height); actual_lowest_point = page_height - min_y; } if (pageRotation == 90) { float max_x = findMaxX(transformed_coordinates); logger_.debug("max_x = " + max_x); // float page_width = page.findMediaBox().getWidth(); // logger_.debug("page_width = " + page_width); actual_lowest_point = max_x; } if (pageRotation == 180) { float min_y = findMinY(transformed_coordinates); logger_.debug("min_y = " + min_y); actual_lowest_point = min_y; } if (pageRotation == 270) { float min_x = findMinX(transformed_coordinates); logger_.debug("min_x = " + min_x); // float page_width = page.findMediaBox().getWidth(); // logger_.debug("page_width = " + page_width); actual_lowest_point = min_x; } logger_.debug("actual_lowest_point = " + actual_lowest_point); if (actual_lowest_point > PDFPage.this.footer_line) { logger_.debug("image is below footer_line. footer_line = " + PDFPage.this.footer_line); return; } if (actual_lowest_point > PDFPage.this.max_image_ypos) { PDFPage.this.max_image_ypos = actual_lowest_point; } return; } if (xobject instanceof PDXObjectForm) { PDXObjectForm form = (PDXObjectForm) xobject; COSStream invoke = (COSStream) form.getCOSObject(); PDResources pdResources = form.getResources(); PDPage page = context.getCurrentPage(); if (pdResources == null) { pdResources = page.findResources(); } getContext().processSubStream(page, pdResources, invoke); } } } public static Pos [] transtormCoordinates (Pos [] coordinates, Matrix m) { Pos [] transformed = new Pos [coordinates.length]; for (int i = 0; i < coordinates.length; i++) { transformed[i] = transtormCoordinate(coordinates[i], m); } return transformed; } public static Pos transtormCoordinate (Pos pos, Matrix m) { Pos transformed = new Pos(); transformed.x = pos.x * m.getValue(0, 0) + pos.y * m.getValue(1, 0) + pos.z * m.getValue(2, 0); transformed.y = pos.x * m.getValue(0, 1) + pos.y * m.getValue(1, 1) + pos.z * m.getValue(2, 1); transformed.z = pos.x * m.getValue(0, 2) + pos.y * m.getValue(1, 2) + pos.z * m.getValue(2, 2); logger_.debug(" transformed " + pos + " --> " + transformed); return transformed; } public static float findMinY (Pos [] coordinates) { float min = Float.POSITIVE_INFINITY; for (int i = 0; i < coordinates.length; i++) { if (coordinates[i].y < min) { min = coordinates[i].y; } } return min; } public static float findMaxX (Pos [] coordinates) { float max = Float.NEGATIVE_INFINITY; for (int i = 0; i < coordinates.length; i++) { if (coordinates[i].x > max) { max = coordinates[i].x; } } return max; } public static float findMinX (Pos [] coordinates) { float min = Float.POSITIVE_INFINITY; for (int i = 0; i < coordinates.length; i++) { if (coordinates[i].x < min) { min = coordinates[i].x; } } return min; } }