/** * Copyright 2006 by Know-Center, Graz, Austria * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a * joint initiative of the Federal Chancellery Austria and Graz University of * Technology. * * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by * the European Commission - subsequent versions of the EUPL (the "Licence"); * You may not use this work except in compliance with the Licence. * You may obtain a copy of the Licence at: * http://www.osor.eu/eupl/ * * Unless required by applicable law or agreed to in writing, software * distributed under the Licence is distributed on an "AS IS" basis, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Licence for the specific language governing permissions and * limitations under the Licence. * * This product combines work with different licenses. See the "NOTICE" text * file for details on the various modules and licenses. * The "NOTICE" text file is part of the distribution. Any derivative works * that you distribute must include a readable copy of the "NOTICE" text file. * * $Id: PDFPage.java,v 1.5 2006/10/31 08:09:33 wprinz Exp $ */ package at.knowcenter.wag.egov.egiz.pdf; import java.awt.Rectangle; import java.awt.geom.GeneralPath; import java.io.IOException; import java.util.List; import java.util.Map; import org.apache.commons.lang.math.NumberUtils; import org.apache.log4j.Logger; import org.pdfbox.cos.COSName; import org.pdfbox.cos.COSStream; import org.pdfbox.pdmodel.PDPage; import org.pdfbox.pdmodel.PDResources; import org.pdfbox.pdmodel.common.PDRectangle; import org.pdfbox.pdmodel.common.PDStream; import org.pdfbox.pdmodel.graphics.xobject.PDXObject; import org.pdfbox.pdmodel.graphics.xobject.PDXObjectForm; import org.pdfbox.util.Matrix; import org.pdfbox.util.PDFOperator; import org.pdfbox.util.PDFTextStripper; import org.pdfbox.util.TextPosition; import org.pdfbox.util.operator.OperatorProcessor; import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger; import at.knowcenter.wag.egov.egiz.cfg.SettingsReader; import at.knowcenter.wag.egov.egiz.exceptions.SettingsException; import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.ClosePath; import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.CurveTo; import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.CurveToReplicateFinalPoint; import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.CurveToReplicateInitialPoint; import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.LineTo; import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.MoveTo; import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.CloseAndStrokePath; import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.CloseFillEvenOddAndStrokePath; import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.CloseFillNonZeroAndStrokePath; import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.EndPath; import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillEvenOddAndStrokePath; import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillNonZeroAndStrokePath; import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillPathEvenOddRule; import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillPathNonZeroWindingNumberRule; import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.StrokePath; /** * PDFPage is an inner class that is used to calculate the page length of a PDF * Document page. It extends the PDFTextStripper class and implement one * interested method: {@link PDFPage#showCharacter(TextPosition)}
* This method is called when processing the FileStream. By calling the method * {@link org.pdfbox.util.PDFStreamEngine#processStream(org.pdfbox.pdmodel.PDPage, org.pdfbox.pdmodel.PDResources, org.pdfbox.cos.COSStream)} * the implemented method showCharacter is called. * * @author wlackner * @see PDFTextStripper */ public class PDFPage extends PDFTextStripper { /** * The logger definition. */ private static final Logger logger_ = ConfigLogger.getLogger(PDFPage.class); /** * The maximum (lowest) y position of a character. */ protected float max_character_ypos = Float.NEGATIVE_INFINITY; /** * The maximum (lowest y position of an image. */ protected float max_image_ypos = Float.NEGATIVE_INFINITY; /** * The effective page height. */ protected float effectivePageHeight; /** * The path currently being constructed. */ private GeneralPath currentPath = new GeneralPath(); /** * The lowest position of a drawn path (originating from top). */ private float maxPathRelatedYPositionFromTop = Float.NEGATIVE_INFINITY; /** * Constructor. * * @param effectivePageHeight * The height of the page to be evaluated. PDF elements outside * this height will not be considered. * * @throws IOException */ public PDFPage(float effectivePageHeight) throws IOException { super(); this.effectivePageHeight = effectivePageHeight; OperatorProcessor newInvoke = new MyInvoke(); newInvoke.setContext(this); operators.put("Do", newInvoke); boolean legacy = false; try { String leg = SettingsReader.getInstance().getSetting("legacy.pos", "false"); if (leg != null) { if ("true".equals(leg.trim())) { legacy = true; } } } catch (SettingsException e) { // TODO Auto-generated catch block e.printStackTrace(); } if (!legacy) { registerCustomPathOperators(); } } /** * Registers operators responsible for path construction and painting in * order to fix auto positioning on pages with path elements. * * @author Datentechnik Innovation GmbH */ @SuppressWarnings("unchecked") private void registerCustomPathOperators() { // *** path construction operators.put("m", new MoveTo(this)); operators.put("l", new LineTo(this)); operators.put("c", new CurveTo(this)); operators.put("y", new CurveToReplicateFinalPoint(this)); operators.put("v", new CurveToReplicateInitialPoint(this)); operators.put("h", new ClosePath(this)); // *** path painting // "S": stroke path operators.put("S", new StrokePath(this)); operators.put("s", new CloseAndStrokePath(this)); operators.put("f", new FillPathNonZeroWindingNumberRule(this)); operators.put("F", new FillPathNonZeroWindingNumberRule(this)); operators.put("f*", new FillPathEvenOddRule(this)); operators.put("b", new CloseFillNonZeroAndStrokePath(this)); operators.put("B", new FillNonZeroAndStrokePath(this)); operators.put("b*", new CloseFillEvenOddAndStrokePath(this)); operators.put("B*", new FillEvenOddAndStrokePath(this)); operators.put("n", new EndPath(this)); // Note: The graphic context // (org.pdfbox.pdmodel.graphics.PDGraphicsState) of the underlying // pdfbox library does // not yet support clipping. This prevents feasible usage of clipping // operators (W, W*). // operators.put("W", new ...(this)); // operators.put("W*", new ...(this)); } /** * Returns the path currently being constructed. * * @return The path currently being constructed. */ public GeneralPath getCurrentPath() { return currentPath; } /** * Sets the current path. * * @param currentPath * The new current path. */ public void setCurrentPath(GeneralPath currentPath) { this.currentPath = currentPath; } /** * Registers a rectangle that bounds the path currently being drawn. * * @param bounds * A rectangle depicting the bounds (coordinates originating from * bottom left). * @author Datentechnik Innovation GmbH */ public void registerPathBounds(Rectangle bounds) { if (!bounds.isEmpty()) { logger_.trace("Registering path bounds: " + bounds); // vertical start of rectangle (counting from top of page) float upperBoundYPositionFromTop; // vertical end of rectangle (counting from top of page) // this depicts the current end of path-related page content float lowerBoundYPositionFromTop; PDRectangle boundaryBox = page.findMediaBox(); float pageHeight; switch (page.findRotation()) { case 90: // CW pageHeight = boundaryBox.getWidth(); upperBoundYPositionFromTop = (float) bounds.getMinX(); lowerBoundYPositionFromTop = (float) bounds.getMaxX(); break; case 180: pageHeight = boundaryBox.getHeight(); upperBoundYPositionFromTop = (float) bounds.getMinY(); lowerBoundYPositionFromTop = (float) bounds.getMaxY(); break; case 270: // CCW pageHeight = boundaryBox.getWidth(); upperBoundYPositionFromTop = pageHeight - (float) bounds.getMaxX(); lowerBoundYPositionFromTop = pageHeight - (float) bounds.getMinX(); break; default: pageHeight = boundaryBox.getHeight(); upperBoundYPositionFromTop = pageHeight - (float) bounds.getMaxY(); lowerBoundYPositionFromTop = pageHeight - (float) bounds.getMinY(); break; } // new maximum ? if (lowerBoundYPositionFromTop > maxPathRelatedYPositionFromTop) { // Is the rectangle (at least partly) located above the footer // line? // (effective page height := page height - footer line) if (upperBoundYPositionFromTop <= effectivePageHeight) { // yes: update current end of path-related page content maxPathRelatedYPositionFromTop = lowerBoundYPositionFromTop; logger_.trace("New max path related y position (from top): " + maxPathRelatedYPositionFromTop); } else { // no: rectangle is fully located below the footer line -> // ignore logger_.trace("Ignoring path bound below the footer line."); } } } } protected void processOperator(PDFOperator operator, List arguments) throws IOException { // logger_.debug("operator = " + operator); super.processOperator(operator, arguments); } // exthex /** * A method provided as an event interface to allow a subclass to perform * some specific functionality when a character needs to be displayed. This * method is used to calculate the latest position of a text in the page. * Sorry for this missinterpretation of the method, but it is the only way * to do this (provided by PDFBox)!!! * * @param text * the character to be displayed -> calculate there y position. */ protected void showCharacter(TextPosition text) { float current_y = text.getY(); final String character = text.getCharacter(); int pageRotation = page.findRotation(); // logger_.debug("PageRotation = " + pageRotation); if (pageRotation == 0) { current_y = text.getY(); } if (pageRotation == 90) { current_y = text.getX(); } if (pageRotation == 180) { float page_height = page.findMediaBox().getHeight(); current_y = page_height - text.getY(); } if (pageRotation == 270) { float page_height = page.findMediaBox().getHeight(); current_y = page_height - text.getX(); } if (current_y > this.effectivePageHeight) { // logger_.debug("character is below footer_line. footer_line = " + // this.footer_line + ", text.character=" + character + ", y=" + // current_y); return; } // store ypos of the char if it is not empty if (!character.equals(" ") && current_y > this.max_character_ypos) { this.max_character_ypos = current_y; } } // use this funtion getting an unsorted text output // public void showString(byte[] string) { // logger_.debug(new String(string)); // } /** * Returns the calculated page length. * * @return the max page length value */ public float getMaxPageLength() { if (logger_.isDebugEnabled()) { logger_.debug("Determining page content length: text=" + max_character_ypos + ", image=" + max_image_ypos + ", path=" + maxPathRelatedYPositionFromTop); } return NumberUtils.max(max_character_ypos, max_image_ypos, maxPathRelatedYPositionFromTop); } public class MyInvoke extends OperatorProcessor { public void process(PDFOperator operator, List arguments) throws IOException { COSName name = (COSName) arguments.get(0); // PDResources res = context.getResources(); Map xobjects = context.getXObjects(); PDXObject xobject = (PDXObject) xobjects.get(name.getName()); PDStream stream = xobject.getPDStream(); COSStream cos_stream = stream.getStream(); COSName subtype = (COSName) cos_stream .getDictionaryObject(COSName.SUBTYPE); if (subtype.equals(COSName.IMAGE)) { logger_.debug("XObject Image"); Matrix ctm = context.getGraphicsState() .getCurrentTransformationMatrix(); logger_.debug("ctm = " + ctm); Pos[] coordinates = new Pos[] { new Pos(0, 0, 1), new Pos(1, 0, 1), new Pos(0, 1, 1), new Pos(1, 1, 1) }; Pos[] transformed_coordinates = transtormCoordinates( coordinates, ctm); /********************************************************** * pdf-as fix: calculating min and max point of an image to look * where the signature should be placed fix solves problems with * footer and images and placement of the signature in an image * only pdf document **********************************************************/ float actual_lowest_point = Float.NaN; float actual_starting_point = Float.NaN; int pageRotation = page.findRotation(); logger_.debug("PageRotation = " + pageRotation); if (pageRotation == 0) { float min_y = findMinY(transformed_coordinates); logger_.debug("min_y = " + min_y); float page_height = page.findMediaBox().getHeight(); logger_.debug("page_height = " + page_height); actual_lowest_point = page_height - min_y; actual_starting_point = page_height - findMaxY(transformed_coordinates); } if (pageRotation == 90) { float max_x = findMaxX(transformed_coordinates); logger_.debug("max_x = " + max_x); float page_width = page.findMediaBox().getWidth(); logger_.debug("page_width = " + page_width); actual_lowest_point = max_x; actual_starting_point = findMinX(transformed_coordinates); } if (pageRotation == 180) { float min_y = findMinY(transformed_coordinates); logger_.debug("min_y = " + min_y); float page_height = page.findMediaBox().getHeight(); actual_lowest_point = page_height - findMaxY(transformed_coordinates); actual_starting_point = page_height - min_y; } if (pageRotation == 270) { float min_x = findMinX(transformed_coordinates); logger_.debug("min_x = " + min_x); float page_width = page.findMediaBox().getWidth(); logger_.debug("page_width = " + page_width); actual_lowest_point = page_width - min_x; actual_starting_point = page_width - findMaxX(transformed_coordinates); } logger_.debug("actual_lowest_point = " + actual_lowest_point); if (actual_lowest_point > PDFPage.this.effectivePageHeight && actual_starting_point > PDFPage.this.effectivePageHeight) { logger_.debug("image is below footer_line"); return; } if (actual_lowest_point > PDFPage.this.max_image_ypos) { PDFPage.this.max_image_ypos = actual_lowest_point; } return; } if (xobject instanceof PDXObjectForm) { PDXObjectForm form = (PDXObjectForm) xobject; COSStream invoke = (COSStream) form.getCOSObject(); PDResources pdResources = form.getResources(); PDPage page = context.getCurrentPage(); if (pdResources == null) { pdResources = page.findResources(); } getContext().processSubStream(page, pdResources, invoke); } } } public static Pos[] transtormCoordinates(Pos[] coordinates, Matrix m) { Pos[] transformed = new Pos[coordinates.length]; for (int i = 0; i < coordinates.length; i++) { transformed[i] = transtormCoordinate(coordinates[i], m); } return transformed; } public static Pos transtormCoordinate(Pos pos, Matrix m) { Pos transformed = new Pos(); transformed.x = pos.x * m.getValue(0, 0) + pos.y * m.getValue(1, 0) + pos.z * m.getValue(2, 0); transformed.y = pos.x * m.getValue(0, 1) + pos.y * m.getValue(1, 1) + pos.z * m.getValue(2, 1); transformed.z = pos.x * m.getValue(0, 2) + pos.y * m.getValue(1, 2) + pos.z * m.getValue(2, 2); logger_.debug(" transformed " + pos + " --> " + transformed); return transformed; } public static float findMinY(Pos[] coordinates) { float min = Float.POSITIVE_INFINITY; for (int i = 0; i < coordinates.length; i++) { if (coordinates[i].y < min) { min = coordinates[i].y; } } return min; } public static float findMaxY(Pos[] coordinates) { float max = 0; for (int i = 0; i < coordinates.length; i++) { if (coordinates[i].y > max) { max = coordinates[i].y; } } return max; } public static float findMaxX(Pos[] coordinates) { float max = Float.NEGATIVE_INFINITY; for (int i = 0; i < coordinates.length; i++) { if (coordinates[i].x > max) { max = coordinates[i].x; } } return max; } public static float findMinX(Pos[] coordinates) { float min = Float.POSITIVE_INFINITY; for (int i = 0; i < coordinates.length; i++) { if (coordinates[i].x < min) { min = coordinates[i].x; } } return min; } }