/******************************************************************************* * Copyright 2014 by E-Government Innovation Center EGIZ, Graz, Austria * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a * joint initiative of the Federal Chancellery Austria and Graz University of * Technology. * * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by * the European Commission - subsequent versions of the EUPL (the "Licence"); * You may not use this work except in compliance with the Licence. * You may obtain a copy of the Licence at: * http://www.osor.eu/eupl/ * * Unless required by applicable law or agreed to in writing, software * distributed under the Licence is distributed on an "AS IS" basis, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Licence for the specific language governing permissions and * limitations under the Licence. * * This product combines work with different licenses. See the "NOTICE" text * file for details on the various modules and licenses. * The "NOTICE" text file is part of the distribution. Any derivative works * that you distribute must include a readable copy of the "NOTICE" text file. ******************************************************************************/ /** * Copyright 2006 by Know-Center, Graz, Austria * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a * joint initiative of the Federal Chancellery Austria and Graz University of * Technology. * * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by * the European Commission - subsequent versions of the EUPL (the "Licence"); * You may not use this work except in compliance with the Licence. * You may obtain a copy of the Licence at: * http://www.osor.eu/eupl/ * * Unless required by applicable law or agreed to in writing, software * distributed under the Licence is distributed on an "AS IS" basis, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Licence for the specific language governing permissions and * limitations under the Licence. * * This product combines work with different licenses. See the "NOTICE" text * file for details on the various modules and licenses. * The "NOTICE" text file is part of the distribution. Any derivative works * that you distribute must include a readable copy of the "NOTICE" text file. * * $Id: PDFPage.java,v 1.5 2006/10/31 08:09:33 wprinz Exp $ */ package at.knowcenter.wag.egov.egiz.pdf; import java.awt.Rectangle; import java.awt.geom.GeneralPath; import java.io.IOException; import java.util.List; import java.util.Map; import org.apache.commons.lang3.math.NumberUtils; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.common.PDStream; import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject; import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectForm; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; import org.apache.pdfbox.util.Matrix; import org.apache.pdfbox.util.PDFOperator; import org.apache.pdfbox.util.PDFTextStripper; import org.apache.pdfbox.util.TextPosition; import org.apache.pdfbox.util.operator.OperatorProcessor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.ClosePath; import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.CurveTo; import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.CurveToReplicateFinalPoint; import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.CurveToReplicateInitialPoint; import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.LineTo; import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.MoveTo; import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.CloseAndStrokePath; import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.CloseFillEvenOddAndStrokePath; import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.CloseFillNonZeroAndStrokePath; import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.EndPath; import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillEvenOddAndStrokePath; import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillNonZeroAndStrokePath; import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillPathEvenOddRule; import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillPathNonZeroWindingNumberRule; import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.StrokePath; /** * PDFPage is an inner class that is used to calculate the page length of a PDF * Document page. It extends the PDFTextStripper class and implement one * interested method: * {@link at.knowcenter.wag.egov.egiz.pdf.PDFPage#showCharacter(TextPosition)}
* This method is called when processing the FileStream. By calling the method * {@link org.apache.pdfbox.util.PDFStreamEngine#processStream(org.apache.pdfbox.pdmodel.PDPage, org.apache.pdfbox.pdmodel.PDResources, org.pdfbox.cos.COSStream)} * the implemented method showCharacter is called. * * @author wlackner * @see PDFTextStripper */ public class PDFPage extends PDFTextStripper { /** * The logger definition. */ private static final Logger logger = LoggerFactory.getLogger(PDFPage.class); /** * The maximum (lowest) y position of a character. */ protected float max_character_ypos = Float.NEGATIVE_INFINITY; /** * The maximum (lowest y position of an image. */ protected float max_image_ypos = Float.NEGATIVE_INFINITY; /** * The effective page height. */ protected float effectivePageHeight; /** * The path currently being constructed. */ private GeneralPath currentPath = new GeneralPath(); private boolean legacy40; /** * The lowest position of a drawn path (originating from top). */ private float maxPathRelatedYPositionFromTop = Float.NEGATIVE_INFINITY; /** * Constructor. * * @param effectivePageHeight * The height of the page to be evaluated. PDF elements outside * this height will not be considered. * * @throws java.io.IOException */ public PDFPage(float effectivePageHeight, boolean legacy32, boolean legacy40) throws IOException { super(); this.legacy40 = legacy40; this.effectivePageHeight = effectivePageHeight; OperatorProcessor newInvoke = new MyInvoke(this); newInvoke.setContext(this); this.registerOperatorProcessor("Do", newInvoke); if (!legacy32) { registerCustomPathOperators(); } } /** * Registers operators responsible for path construction and painting in * order to fix auto positioning on pages with path elements. * * @author Datentechnik Innovation GmbH */ private void registerCustomPathOperators() { // *** path construction this.registerOperatorProcessor("m", new MoveTo(this)); this.registerOperatorProcessor("l", new LineTo(this)); this.registerOperatorProcessor("c", new CurveTo(this)); this.registerOperatorProcessor("y", new CurveToReplicateFinalPoint(this)); this.registerOperatorProcessor("v", new CurveToReplicateInitialPoint( this)); this.registerOperatorProcessor("h", new ClosePath(this)); // *** path painting // "S": stroke path this.registerOperatorProcessor("S", new StrokePath(this)); this.registerOperatorProcessor("s", new CloseAndStrokePath(this)); this.registerOperatorProcessor("f", new FillPathNonZeroWindingNumberRule(this)); this.registerOperatorProcessor("F", new FillPathNonZeroWindingNumberRule(this)); this.registerOperatorProcessor("f*", new FillPathEvenOddRule(this)); this.registerOperatorProcessor("b", new CloseFillNonZeroAndStrokePath( this)); this.registerOperatorProcessor("B", new FillNonZeroAndStrokePath(this)); this.registerOperatorProcessor("b*", new CloseFillEvenOddAndStrokePath( this)); this.registerOperatorProcessor("B*", new FillEvenOddAndStrokePath(this)); this.registerOperatorProcessor("n", new EndPath(this)); // Note: The graphic context // (org.pdfbox.pdmodel.graphics.PDGraphicsState) of the underlying // pdfbox library does // not yet support clipping. This prevents feasible usage of clipping // operators (W, W*). // operators.put("W", new ...(this)); // operators.put("W*", new ...(this)); } /** * Returns the path currently being constructed. * * @return The path currently being constructed. */ public GeneralPath getCurrentPath() { return currentPath; } /** * Sets the current path. * * @param currentPath * The new current path. */ public void setCurrentPath(GeneralPath currentPath) { this.currentPath = currentPath; } /** * Registers a rectangle that bounds the path currently being drawn. * * @param bounds * A rectangle depicting the bounds (coordinates originating from * bottom left). * @author Datentechnik Innovation GmbH */ public void registerPathBounds(Rectangle bounds) { if (!bounds.isEmpty()) { logger.debug("Registering path bounds: " + bounds); // vertical start of rectangle (counting from top of page) float upperBoundYPositionFromTop; // vertical end of rectangle (counting from top of page) // this depicts the current end of path-related page content float lowerBoundYPositionFromTop; PDRectangle boundaryBox = this.getCurrentPage().findCropBox(); if (boundaryBox == null) { boundaryBox = this.getCurrentPage().findMediaBox(); } float pageHeight; switch (this.getCurrentPage().findRotation()) { case 90: // CW pageHeight = boundaryBox.getWidth(); upperBoundYPositionFromTop = (float) bounds.getMinX(); lowerBoundYPositionFromTop = (float) bounds.getMaxX(); break; case 180: pageHeight = boundaryBox.getHeight(); upperBoundYPositionFromTop = (float) bounds.getMinY(); lowerBoundYPositionFromTop = (float) bounds.getMaxY(); break; case 270: // CCW pageHeight = boundaryBox.getWidth(); upperBoundYPositionFromTop = pageHeight - (float) bounds.getMaxX(); lowerBoundYPositionFromTop = pageHeight - (float) bounds.getMinX(); break; default: pageHeight = boundaryBox.getHeight(); upperBoundYPositionFromTop = pageHeight - (float) bounds.getMaxY(); lowerBoundYPositionFromTop = pageHeight - (float) bounds.getMinY(); break; } // new maximum ? if (lowerBoundYPositionFromTop > maxPathRelatedYPositionFromTop) { // Is the rectangle (at least partly) located above the footer // line? // (effective page height := page height - footer line) if (upperBoundYPositionFromTop <= effectivePageHeight) { // yes: update current end of path-related page content maxPathRelatedYPositionFromTop = lowerBoundYPositionFromTop; logger.trace("New max path related y position (from top): " + maxPathRelatedYPositionFromTop); } else { // no: rectangle is fully located below the footer line -> // ignore logger.trace("Ignoring path bound below the footer line."); } } } } protected void processOperator(PDFOperator operator, List arguments) throws IOException { logger.trace("operator = " + operator); super.processOperator(operator, arguments); } @Override protected void processTextPosition(TextPosition text) { showCharacter(text); } // exthex /** * A method provided as an event interface to allow a subclass to perform * some specific functionality when a character needs to be displayed. This * method is used to calculate the latest position of a text in the page. * Sorry for this missinterpretation of the method, but it is the only way * to do this (provided by PDFBox)!!! * * @param text * the character to be displayed -> calculate there y position. */ protected void showCharacter(TextPosition text) { float current_y = text.getY(); final String character = text.getCharacter(); if (at.gv.egiz.pdfas.common.utils.StringUtils.whiteSpaceTrim(character) .isEmpty()) { return; } int pageRotation = this.getCurrentPage().findRotation(); // logger_.debug("PageRotation = " + pageRotation); /*if (pageRotation == 0) { current_y = text.getY(); } if (pageRotation == 90) { current_y = text.getY(); } if (pageRotation == 180) { current_y = text.getY(); } if (pageRotation == 270) { current_y = text.getY(); } if (current_y > this.effectivePageHeight) { this.max_character_ypos = this.effectivePageHeight; return; } // store ypos of the char if it is not empty if (current_y > this.max_character_ypos) { this.max_character_ypos = current_y; }*/ if (pageRotation == 0) { current_y = text.getY(); } if (pageRotation == 90) { current_y = text.getX(); } if (pageRotation == 180) { float page_height = this.getCurrentPage().findMediaBox().getHeight(); current_y = page_height - text.getY(); } if (pageRotation == 270) { float page_height = this.getCurrentPage().findMediaBox().getHeight(); current_y = page_height - text.getX(); } if (current_y > this.effectivePageHeight) { // logger_.debug("character is below footer_line. footer_line = " + // this.footer_line + ", text.character=" + character + ", y=" + // current_y); return; } // store ypos of the char if it is not empty if (current_y > this.max_character_ypos) { this.max_character_ypos = current_y; } } // use this funtion getting an unsorted text output // public void showString(byte[] string) { // logger_.debug(new String(string)); // } /** * Returns the calculated page length. * * @return the max page length value */ public float getMaxPageLength() { if (logger.isDebugEnabled()) { logger.debug("Determining page content length: text=" + max_character_ypos + ", image=" + max_image_ypos + ", path=" + maxPathRelatedYPositionFromTop); } return NumberUtils.max(max_character_ypos, max_image_ypos, maxPathRelatedYPositionFromTop); } @Override public Map getFonts() { COSBase fontObj = null; if (getCurrentPage().getResources() != null && getCurrentPage().getResources().getCOSDictionary() != null && getCurrentPage().getResources().getCOSDictionary() .getDictionaryObject(COSName.FONT) != null) { fontObj = getCurrentPage().getResources().getCOSDictionary() .getDictionaryObject(COSName.FONT); } Map fontMap = getCurrentPage().findResources() .getFonts(); if (fontObj != null) { getCurrentPage().getResources().getCOSDictionary() .setItem(COSName.FONT, fontObj); } return fontMap; } public class MyInvoke extends OperatorProcessor { private PDFPage mypage; public MyInvoke(PDFPage page) { this.mypage = page; } public void process(PDFOperator operator, List arguments) throws IOException { COSName name = (COSName) arguments.get(0); // PDResources res = context.getResources(); Map xobjects = context.getXObjects(); PDXObject xobject = xobjects.get(name.getName()); PDStream stream = xobject.getPDStream(); COSStream cos_stream = stream.getStream(); COSName subtype = (COSName) cos_stream .getDictionaryObject(COSName.SUBTYPE); if (subtype.equals(COSName.IMAGE)) { logger.debug("XObject Image"); Matrix ctm = context.getGraphicsState() .getCurrentTransformationMatrix(); logger.debug("ctm = " + ctm); Pos[] coordinates = new Pos[] { new Pos(0, 0, 1), new Pos(1, 0, 1), new Pos(0, 1, 1), new Pos(1, 1, 1) }; Pos[] transformed_coordinates = transtormCoordinates( coordinates, ctm); /********************************************************** * pdf-as fix: calculating min and max point of an image to look * where the signature should be placed fix solves problems with * footer and images and placement of the signature in an image * only pdf document **********************************************************/ float actual_lowest_point = Float.NaN; float actual_starting_point = Float.NaN; int pageRotation = this.mypage.getCurrentPage().findRotation(); logger.debug("PageRotation = " + pageRotation); if (pageRotation == 0) { float min_y = findMinY(transformed_coordinates); logger.debug("min_y = " + min_y); float page_height = this.mypage.getCurrentPage() .findMediaBox().getHeight(); logger.debug("page_height = " + page_height); actual_lowest_point = page_height - min_y; actual_starting_point = page_height - findMaxY(transformed_coordinates); } if (pageRotation == 90) { float max_x = findMaxX(transformed_coordinates); logger.debug("max_x = " + max_x); float page_width = this.mypage.getCurrentPage() .findMediaBox().getWidth(); logger.debug("page_width = " + page_width); actual_lowest_point = max_x; actual_starting_point = findMinX(transformed_coordinates); } if (pageRotation == 180) { float min_y = findMinY(transformed_coordinates); logger.debug("min_y = " + min_y); actual_lowest_point = findMaxY(transformed_coordinates); actual_starting_point = actual_lowest_point + min_y; } if (pageRotation == 270) { float min_x = findMinX(transformed_coordinates); logger.debug("min_x = " + min_x); float page_width = this.mypage.getCurrentPage() .findMediaBox().getWidth(); logger.debug("page_width = " + page_width); actual_lowest_point = page_width - min_x; actual_starting_point = page_width - findMaxX(transformed_coordinates); } logger.debug("actual_lowest_point = " + actual_lowest_point); if (actual_lowest_point > PDFPage.this.effectivePageHeight && actual_starting_point > PDFPage.this.effectivePageHeight) { logger.debug("image is below footer_line"); return; } if (actual_lowest_point > PDFPage.this.max_image_ypos) { PDFPage.this.max_image_ypos = actual_lowest_point; } return; } if (xobject instanceof PDXObjectForm) { PDXObjectForm form = (PDXObjectForm) xobject; COSStream invoke = (COSStream) form.getCOSObject(); PDResources pdResources = form.getResources(); PDPage page = context.getCurrentPage(); if (pdResources == null) { pdResources = page.findResources(); } getContext().processSubStream(page, pdResources, invoke); } } } public static Pos[] transtormCoordinates(Pos[] coordinates, Matrix m) { Pos[] transformed = new Pos[coordinates.length]; for (int i = 0; i < coordinates.length; i++) { transformed[i] = transtormCoordinate(coordinates[i], m); } return transformed; } public static Pos transtormCoordinate(Pos pos, Matrix m) { Pos transformed = new Pos(); transformed.x = pos.x * m.getValue(0, 0) + pos.y * m.getValue(1, 0) + pos.z * m.getValue(2, 0); transformed.y = pos.x * m.getValue(0, 1) + pos.y * m.getValue(1, 1) + pos.z * m.getValue(2, 1); transformed.z = pos.x * m.getValue(0, 2) + pos.y * m.getValue(1, 2) + pos.z * m.getValue(2, 2); logger.debug(" transformed " + pos + " --> " + transformed); return transformed; } public static float findMinY(Pos[] coordinates) { float min = Float.POSITIVE_INFINITY; for (int i = 0; i < coordinates.length; i++) { if (coordinates[i].y < min) { min = coordinates[i].y; } } return min; } public static float findMaxY(Pos[] coordinates) { float max = 0; for (int i = 0; i < coordinates.length; i++) { if (coordinates[i].y > max) { max = coordinates[i].y; } } return max; } public static float findMaxX(Pos[] coordinates) { float max = Float.NEGATIVE_INFINITY; for (int i = 0; i < coordinates.length; i++) { if (coordinates[i].x > max) { max = coordinates[i].x; } } return max; } public static float findMinX(Pos[] coordinates) { float min = Float.POSITIVE_INFINITY; for (int i = 0; i < coordinates.length; i++) { if (coordinates[i].x < min) { min = coordinates[i].x; } } return min; } public void processAnnotation(PDAnnotation anno) { float current_y = anno.getRectangle().getLowerLeftY(); float upper_y = 0; PDPage page = anno.getPage(); if (page == null) { page = getCurrentPage(); } if (page == null) { logger.warn("Annotation without page! The position might not be correct!"); return; } int pageRotation = page.findRotation(); // logger_.debug("PageRotation = " + pageRotation); if (pageRotation == 0) { float page_height = page.findMediaBox().getHeight(); current_y = page_height - anno.getRectangle().getLowerLeftY(); upper_y = page_height - anno.getRectangle().getUpperRightY(); } if (pageRotation == 90) { current_y = anno.getRectangle().getUpperRightX(); upper_y = anno.getRectangle().getLowerLeftX(); } if (pageRotation == 180) { current_y = anno.getRectangle().getUpperRightY(); upper_y = anno.getRectangle().getLowerLeftY(); } if (pageRotation == 270) { float page_width = page.findMediaBox().getWidth(); current_y = page_width - anno.getRectangle().getLowerLeftX(); upper_y = page_width - anno.getRectangle().getUpperRightX(); } if (current_y > this.effectivePageHeight) { if(!this.legacy40 && upper_y < this.effectivePageHeight) { // Bottom of annotation is below footer line, // but top of annotation is above footer line! // so no place left on this page! this.max_character_ypos = this.effectivePageHeight; } return; } // store ypos of the char if it is not empty if (current_y > this.max_character_ypos) { this.max_character_ypos = current_y; } } }