From 8ed8e4e931c1cb3d3c814a53a07f73b566c3b719 Mon Sep 17 00:00:00 2001 From: Alexander Marsalek Date: Fri, 2 Jul 2021 14:06:44 +0200 Subject: v4.2.0 --- .../at/knowcenter/wag/egov/egiz/pdf/PDFPage.java | 661 --------------------- 1 file changed, 661 deletions(-) delete mode 100644 pdf-as-pdfbox/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java (limited to 'pdf-as-pdfbox/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java') diff --git a/pdf-as-pdfbox/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java b/pdf-as-pdfbox/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java deleted file mode 100644 index d02f23b0..00000000 --- a/pdf-as-pdfbox/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java +++ /dev/null @@ -1,661 +0,0 @@ -/******************************************************************************* - * Copyright 2014 by E-Government Innovation Center EGIZ, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - ******************************************************************************/ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: PDFPage.java,v 1.5 2006/10/31 08:09:33 wprinz Exp $ - */ -package at.knowcenter.wag.egov.egiz.pdf; - -import java.awt.Rectangle; -import java.awt.geom.GeneralPath; -import java.io.IOException; -import java.util.List; -import java.util.Map; - -import org.apache.commons.lang3.math.NumberUtils; -import org.apache.pdfbox.cos.COSBase; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.cos.COSStream; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.PDResources; -import org.apache.pdfbox.pdmodel.common.PDRectangle; -import org.apache.pdfbox.pdmodel.common.PDStream; -import org.apache.pdfbox.pdmodel.font.PDFont; -import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject; -import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectForm; -import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; -import org.apache.pdfbox.util.Matrix; -import org.apache.pdfbox.util.PDFOperator; -import org.apache.pdfbox.util.PDFTextStripper; -import org.apache.pdfbox.util.TextPosition; -import org.apache.pdfbox.util.operator.OperatorProcessor; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.ClosePath; -import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.CurveTo; -import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.CurveToReplicateFinalPoint; -import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.CurveToReplicateInitialPoint; -import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.LineTo; -import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.MoveTo; -import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.CloseAndStrokePath; -import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.CloseFillEvenOddAndStrokePath; -import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.CloseFillNonZeroAndStrokePath; -import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.EndPath; -import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillEvenOddAndStrokePath; -import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillNonZeroAndStrokePath; -import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillPathEvenOddRule; -import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillPathNonZeroWindingNumberRule; -import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.StrokePath; - -/** - * PDFPage is an inner class that is used to calculate the page length of a PDF - * Document page. It extends the PDFTextStripper class and implement one - * interested method: - * {@link at.knowcenter.wag.egov.egiz.pdf.PDFPage#showCharacter(TextPosition)}
- * This method is called when processing the FileStream. By calling the method - * {@link org.apache.pdfbox.util.PDFStreamEngine#processStream(org.apache.pdfbox.pdmodel.PDPage, org.apache.pdfbox.pdmodel.PDResources, org.pdfbox.cos.COSStream)} - * the implemented method showCharacter is called. - * - * @author wlackner - * @see PDFTextStripper - */ -public class PDFPage extends PDFTextStripper { - /** - * The logger definition. - */ - private static final Logger logger = LoggerFactory.getLogger(PDFPage.class); - - /** - * The maximum (lowest) y position of a character. - */ - protected float max_character_ypos = Float.NEGATIVE_INFINITY; - - /** - * The maximum (lowest y position of an image. - */ - protected float max_image_ypos = Float.NEGATIVE_INFINITY; - - /** - * The effective page height. - */ - protected float effectivePageHeight; - - /** - * The path currently being constructed. - */ - private GeneralPath currentPath = new GeneralPath(); - - private boolean legacy40; - - /** - * The lowest position of a drawn path (originating from top). - */ - private float maxPathRelatedYPositionFromTop = Float.NEGATIVE_INFINITY; - - /** - * Constructor. - * - * @param effectivePageHeight - * The height of the page to be evaluated. PDF elements outside - * this height will not be considered. - * - * @throws java.io.IOException - */ - public PDFPage(float effectivePageHeight, boolean legacy32, boolean legacy40) - throws IOException { - super(); - - this.legacy40 = legacy40; - - this.effectivePageHeight = effectivePageHeight; - - OperatorProcessor newInvoke = new MyInvoke(this); - newInvoke.setContext(this); - this.registerOperatorProcessor("Do", newInvoke); - - if (!legacy32) { - registerCustomPathOperators(); - } - } - - /** - * Registers operators responsible for path construction and painting in - * order to fix auto positioning on pages with path elements. - * - * @author Datentechnik Innovation GmbH - */ - private void registerCustomPathOperators() { - - // *** path construction - - this.registerOperatorProcessor("m", new MoveTo(this)); - this.registerOperatorProcessor("l", new LineTo(this)); - this.registerOperatorProcessor("c", new CurveTo(this)); - this.registerOperatorProcessor("y", - new CurveToReplicateFinalPoint(this)); - this.registerOperatorProcessor("v", new CurveToReplicateInitialPoint( - this)); - this.registerOperatorProcessor("h", new ClosePath(this)); - - // *** path painting - - // "S": stroke path - this.registerOperatorProcessor("S", new StrokePath(this)); - this.registerOperatorProcessor("s", new CloseAndStrokePath(this)); - this.registerOperatorProcessor("f", - new FillPathNonZeroWindingNumberRule(this)); - this.registerOperatorProcessor("F", - new FillPathNonZeroWindingNumberRule(this)); - this.registerOperatorProcessor("f*", new FillPathEvenOddRule(this)); - this.registerOperatorProcessor("b", new CloseFillNonZeroAndStrokePath( - this)); - this.registerOperatorProcessor("B", new FillNonZeroAndStrokePath(this)); - this.registerOperatorProcessor("b*", new CloseFillEvenOddAndStrokePath( - this)); - this.registerOperatorProcessor("B*", new FillEvenOddAndStrokePath(this)); - this.registerOperatorProcessor("n", new EndPath(this)); - - // Note: The graphic context - // (org.pdfbox.pdmodel.graphics.PDGraphicsState) of the underlying - // pdfbox library does - // not yet support clipping. This prevents feasible usage of clipping - // operators (W, W*). - // operators.put("W", new ...(this)); - // operators.put("W*", new ...(this)); - - } - - /** - * Returns the path currently being constructed. - * - * @return The path currently being constructed. - */ - public GeneralPath getCurrentPath() { - return currentPath; - } - - /** - * Sets the current path. - * - * @param currentPath - * The new current path. - */ - public void setCurrentPath(GeneralPath currentPath) { - this.currentPath = currentPath; - } - - /** - * Registers a rectangle that bounds the path currently being drawn. - * - * @param bounds - * A rectangle depicting the bounds (coordinates originating from - * bottom left). - * @author Datentechnik Innovation GmbH - */ - public void registerPathBounds(Rectangle bounds) { - if (!bounds.isEmpty()) { - logger.debug("Registering path bounds: " + bounds); - - // vertical start of rectangle (counting from top of page) - float upperBoundYPositionFromTop; - - // vertical end of rectangle (counting from top of page) - // this depicts the current end of path-related page content - float lowerBoundYPositionFromTop; - - PDRectangle boundaryBox = this.getCurrentPage().findCropBox(); - - if (boundaryBox == null) { - boundaryBox = this.getCurrentPage().findMediaBox(); - } - - float pageHeight; - - switch (this.getCurrentPage().findRotation()) { - case 90: // CW - pageHeight = boundaryBox.getWidth(); - upperBoundYPositionFromTop = (float) bounds.getMinX(); - lowerBoundYPositionFromTop = (float) bounds.getMaxX(); - break; - case 180: - pageHeight = boundaryBox.getHeight(); - upperBoundYPositionFromTop = (float) bounds.getMinY(); - lowerBoundYPositionFromTop = (float) bounds.getMaxY(); - break; - case 270: // CCW - pageHeight = boundaryBox.getWidth(); - upperBoundYPositionFromTop = pageHeight - - (float) bounds.getMaxX(); - lowerBoundYPositionFromTop = pageHeight - - (float) bounds.getMinX(); - break; - default: - pageHeight = boundaryBox.getHeight(); - upperBoundYPositionFromTop = pageHeight - - (float) bounds.getMaxY(); - lowerBoundYPositionFromTop = pageHeight - - (float) bounds.getMinY(); - break; - } - - // new maximum ? - if (lowerBoundYPositionFromTop > maxPathRelatedYPositionFromTop) { - // Is the rectangle (at least partly) located above the footer - // line? - // (effective page height := page height - footer line) - if (upperBoundYPositionFromTop <= effectivePageHeight) { - // yes: update current end of path-related page content - maxPathRelatedYPositionFromTop = lowerBoundYPositionFromTop; - logger.trace("New max path related y position (from top): " - + maxPathRelatedYPositionFromTop); - } else { - // no: rectangle is fully located below the footer line -> - // ignore - logger.trace("Ignoring path bound below the footer line."); - } - } - } - } - - protected void processOperator(PDFOperator operator, List arguments) - throws IOException { - logger.trace("operator = " + operator); - super.processOperator(operator, arguments); - } - - @Override - protected void processTextPosition(TextPosition text) { - showCharacter(text); - } - - // exthex - /** - * A method provided as an event interface to allow a subclass to perform - * some specific functionality when a character needs to be displayed. This - * method is used to calculate the latest position of a text in the page. - * Sorry for this missinterpretation of the method, but it is the only way - * to do this (provided by PDFBox)!!! - * - * @param text - * the character to be displayed -> calculate there y position. - */ - protected void showCharacter(TextPosition text) { - float current_y = text.getY(); - final String character = text.getCharacter(); - - if (at.gv.egiz.pdfas.common.utils.StringUtils.whiteSpaceTrim(character) - .isEmpty()) { - return; - } - - int pageRotation = this.getCurrentPage().findRotation(); - // logger_.debug("PageRotation = " + pageRotation); - /*if (pageRotation == 0) { - current_y = text.getY(); - } - if (pageRotation == 90) { - current_y = text.getY(); - } - if (pageRotation == 180) { - current_y = text.getY(); - } - if (pageRotation == 270) { - current_y = text.getY(); - } - - if (current_y > this.effectivePageHeight) { - this.max_character_ypos = this.effectivePageHeight; - return; - } - - // store ypos of the char if it is not empty - if (current_y > this.max_character_ypos) { - this.max_character_ypos = current_y; - }*/ - - if (pageRotation == 0) { - current_y = text.getY(); - } - if (pageRotation == 90) { - current_y = text.getX(); - } - if (pageRotation == 180) { - float page_height = this.getCurrentPage().findMediaBox().getHeight(); - current_y = page_height - text.getY(); - } - if (pageRotation == 270) { - float page_height = this.getCurrentPage().findMediaBox().getHeight(); - current_y = page_height - text.getX(); - } - - if (current_y > this.effectivePageHeight) { - // logger_.debug("character is below footer_line. footer_line = " + - // this.footer_line + ", text.character=" + character + ", y=" + - // current_y); - return; - } - - // store ypos of the char if it is not empty - if (current_y > this.max_character_ypos) { - this.max_character_ypos = current_y; - } - } - - // use this funtion getting an unsorted text output - // public void showString(byte[] string) { - // logger_.debug(new String(string)); - // } - - /** - * Returns the calculated page length. - * - * @return the max page length value - */ - public float getMaxPageLength() { - if (logger.isDebugEnabled()) { - logger.debug("Determining page content length: text=" - + max_character_ypos + ", image=" + max_image_ypos - + ", path=" + maxPathRelatedYPositionFromTop); - } - return NumberUtils.max(max_character_ypos, max_image_ypos, - maxPathRelatedYPositionFromTop); - } - - @Override - public Map getFonts() { - - COSBase fontObj = null; - - if (getCurrentPage().getResources() != null - && getCurrentPage().getResources().getCOSDictionary() != null - && getCurrentPage().getResources().getCOSDictionary() - .getDictionaryObject(COSName.FONT) != null) { - fontObj = getCurrentPage().getResources().getCOSDictionary() - .getDictionaryObject(COSName.FONT); - } - - Map fontMap = getCurrentPage().findResources() - .getFonts(); - - if (fontObj != null) { - getCurrentPage().getResources().getCOSDictionary() - .setItem(COSName.FONT, fontObj); - } - - return fontMap; - } - - public class MyInvoke extends OperatorProcessor { - - private PDFPage mypage; - - public MyInvoke(PDFPage page) { - this.mypage = page; - } - - public void process(PDFOperator operator, List arguments) - throws IOException { - COSName name = (COSName) arguments.get(0); - - // PDResources res = context.getResources(); - - Map xobjects = context.getXObjects(); - PDXObject xobject = xobjects.get(name.getName()); - - PDStream stream = xobject.getPDStream(); - COSStream cos_stream = stream.getStream(); - - COSName subtype = (COSName) cos_stream - .getDictionaryObject(COSName.SUBTYPE); - if (subtype.equals(COSName.IMAGE)) { - logger.debug("XObject Image"); - - Matrix ctm = context.getGraphicsState() - .getCurrentTransformationMatrix(); - logger.debug("ctm = " + ctm); - - Pos[] coordinates = new Pos[] { new Pos(0, 0, 1), - new Pos(1, 0, 1), new Pos(0, 1, 1), new Pos(1, 1, 1) }; - - Pos[] transformed_coordinates = transtormCoordinates( - coordinates, ctm); - - /********************************************************** - * pdf-as fix: calculating min and max point of an image to look - * where the signature should be placed fix solves problems with - * footer and images and placement of the signature in an image - * only pdf document - **********************************************************/ - - float actual_lowest_point = Float.NaN; - float actual_starting_point = Float.NaN; - - int pageRotation = this.mypage.getCurrentPage().findRotation(); - logger.debug("PageRotation = " + pageRotation); - if (pageRotation == 0) { - float min_y = findMinY(transformed_coordinates); - logger.debug("min_y = " + min_y); - float page_height = this.mypage.getCurrentPage() - .findMediaBox().getHeight(); - logger.debug("page_height = " + page_height); - - actual_lowest_point = page_height - min_y; - actual_starting_point = page_height - - findMaxY(transformed_coordinates); - } - if (pageRotation == 90) { - float max_x = findMaxX(transformed_coordinates); - logger.debug("max_x = " + max_x); - float page_width = this.mypage.getCurrentPage() - .findMediaBox().getWidth(); - logger.debug("page_width = " + page_width); - - actual_lowest_point = max_x; - actual_starting_point = findMinX(transformed_coordinates); - } - if (pageRotation == 180) { - float min_y = findMinY(transformed_coordinates); - logger.debug("min_y = " + min_y); - actual_lowest_point = findMaxY(transformed_coordinates); - actual_starting_point = actual_lowest_point + min_y; - } - if (pageRotation == 270) { - float min_x = findMinX(transformed_coordinates); - logger.debug("min_x = " + min_x); - - float page_width = this.mypage.getCurrentPage() - .findMediaBox().getWidth(); - logger.debug("page_width = " + page_width); - - actual_lowest_point = page_width - min_x; - actual_starting_point = page_width - - findMaxX(transformed_coordinates); - } - - logger.debug("actual_lowest_point = " + actual_lowest_point); - - if (actual_lowest_point > PDFPage.this.effectivePageHeight - && actual_starting_point > PDFPage.this.effectivePageHeight) { - logger.debug("image is below footer_line"); - return; - } - - if (actual_lowest_point > PDFPage.this.max_image_ypos) { - PDFPage.this.max_image_ypos = actual_lowest_point; - } - - return; - } - - if (xobject instanceof PDXObjectForm) { - PDXObjectForm form = (PDXObjectForm) xobject; - COSStream invoke = (COSStream) form.getCOSObject(); - PDResources pdResources = form.getResources(); - PDPage page = context.getCurrentPage(); - if (pdResources == null) { - pdResources = page.findResources(); - } - - getContext().processSubStream(page, pdResources, invoke); - } - } - } - - public static Pos[] transtormCoordinates(Pos[] coordinates, Matrix m) { - Pos[] transformed = new Pos[coordinates.length]; - for (int i = 0; i < coordinates.length; i++) { - transformed[i] = transtormCoordinate(coordinates[i], m); - } - return transformed; - } - - public static Pos transtormCoordinate(Pos pos, Matrix m) { - Pos transformed = new Pos(); - transformed.x = pos.x * m.getValue(0, 0) + pos.y * m.getValue(1, 0) - + pos.z * m.getValue(2, 0); - transformed.y = pos.x * m.getValue(0, 1) + pos.y * m.getValue(1, 1) - + pos.z * m.getValue(2, 1); - transformed.z = pos.x * m.getValue(0, 2) + pos.y * m.getValue(1, 2) - + pos.z * m.getValue(2, 2); - - logger.debug(" transformed " + pos + " --> " + transformed); - return transformed; - } - - public static float findMinY(Pos[] coordinates) { - float min = Float.POSITIVE_INFINITY; - for (int i = 0; i < coordinates.length; i++) { - if (coordinates[i].y < min) { - min = coordinates[i].y; - } - } - return min; - } - - public static float findMaxY(Pos[] coordinates) { - float max = 0; - for (int i = 0; i < coordinates.length; i++) { - if (coordinates[i].y > max) { - max = coordinates[i].y; - } - } - return max; - } - - public static float findMaxX(Pos[] coordinates) { - float max = Float.NEGATIVE_INFINITY; - for (int i = 0; i < coordinates.length; i++) { - if (coordinates[i].x > max) { - max = coordinates[i].x; - } - } - return max; - } - - public static float findMinX(Pos[] coordinates) { - float min = Float.POSITIVE_INFINITY; - for (int i = 0; i < coordinates.length; i++) { - if (coordinates[i].x < min) { - min = coordinates[i].x; - } - } - return min; - } - - public void processAnnotation(PDAnnotation anno) { - float current_y = anno.getRectangle().getLowerLeftY(); - float upper_y = 0; - PDPage page = anno.getPage(); - - if (page == null) { - page = getCurrentPage(); - } - - if (page == null) { - logger.warn("Annotation without page! The position might not be correct!"); - return; - } - - int pageRotation = page.findRotation(); - // logger_.debug("PageRotation = " + pageRotation); - if (pageRotation == 0) { - float page_height = page.findMediaBox().getHeight(); - current_y = page_height - anno.getRectangle().getLowerLeftY(); - upper_y = page_height - anno.getRectangle().getUpperRightY(); - } - if (pageRotation == 90) { - current_y = anno.getRectangle().getUpperRightX(); - upper_y = anno.getRectangle().getLowerLeftX(); - } - if (pageRotation == 180) { - current_y = anno.getRectangle().getUpperRightY(); - upper_y = anno.getRectangle().getLowerLeftY(); - } - if (pageRotation == 270) { - float page_width = page.findMediaBox().getWidth(); - current_y = page_width - anno.getRectangle().getLowerLeftX(); - upper_y = page_width - anno.getRectangle().getUpperRightX(); - } - - - - if (current_y > this.effectivePageHeight) { - if(!this.legacy40 && upper_y < this.effectivePageHeight) { - // Bottom of annotation is below footer line, - // but top of annotation is above footer line! - // so no place left on this page! - this.max_character_ypos = this.effectivePageHeight; - } - return; - } - - // store ypos of the char if it is not empty - if (current_y > this.max_character_ypos) { - this.max_character_ypos = current_y; - } - } - -} -- cgit v1.2.3