aboutsummaryrefslogtreecommitdiff
path: root/pdf-as-pdfbox/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java
diff options
context:
space:
mode:
Diffstat (limited to 'pdf-as-pdfbox/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java')
-rw-r--r--pdf-as-pdfbox/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java661
1 files changed, 0 insertions, 661 deletions
diff --git a/pdf-as-pdfbox/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java b/pdf-as-pdfbox/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java
deleted file mode 100644
index d02f23b0..00000000
--- a/pdf-as-pdfbox/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java
+++ /dev/null
@@ -1,661 +0,0 @@
-/*******************************************************************************
- * <copyright> Copyright 2014 by E-Government Innovation Center EGIZ, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- ******************************************************************************/
-/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: PDFPage.java,v 1.5 2006/10/31 08:09:33 wprinz Exp $
- */
-package at.knowcenter.wag.egov.egiz.pdf;
-
-import java.awt.Rectangle;
-import java.awt.geom.GeneralPath;
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.commons.lang3.math.NumberUtils;
-import org.apache.pdfbox.cos.COSBase;
-import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.cos.COSStream;
-import org.apache.pdfbox.pdmodel.PDPage;
-import org.apache.pdfbox.pdmodel.PDResources;
-import org.apache.pdfbox.pdmodel.common.PDRectangle;
-import org.apache.pdfbox.pdmodel.common.PDStream;
-import org.apache.pdfbox.pdmodel.font.PDFont;
-import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject;
-import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectForm;
-import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
-import org.apache.pdfbox.util.Matrix;
-import org.apache.pdfbox.util.PDFOperator;
-import org.apache.pdfbox.util.PDFTextStripper;
-import org.apache.pdfbox.util.TextPosition;
-import org.apache.pdfbox.util.operator.OperatorProcessor;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.ClosePath;
-import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.CurveTo;
-import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.CurveToReplicateFinalPoint;
-import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.CurveToReplicateInitialPoint;
-import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.LineTo;
-import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.MoveTo;
-import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.CloseAndStrokePath;
-import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.CloseFillEvenOddAndStrokePath;
-import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.CloseFillNonZeroAndStrokePath;
-import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.EndPath;
-import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillEvenOddAndStrokePath;
-import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillNonZeroAndStrokePath;
-import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillPathEvenOddRule;
-import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillPathNonZeroWindingNumberRule;
-import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.StrokePath;
-
-/**
- * PDFPage is an inner class that is used to calculate the page length of a PDF
- * Document page. It extends the PDFTextStripper class and implement one
- * interested method:
- * {@link at.knowcenter.wag.egov.egiz.pdf.PDFPage#showCharacter(TextPosition)}<br>
- * This method is called when processing the FileStream. By calling the method
- * {@link org.apache.pdfbox.util.PDFStreamEngine#processStream(org.apache.pdfbox.pdmodel.PDPage, org.apache.pdfbox.pdmodel.PDResources, org.pdfbox.cos.COSStream)}
- * the implemented method showCharacter is called.
- *
- * @author wlackner
- * @see PDFTextStripper
- */
-public class PDFPage extends PDFTextStripper {
- /**
- * The logger definition.
- */
- private static final Logger logger = LoggerFactory.getLogger(PDFPage.class);
-
- /**
- * The maximum (lowest) y position of a character.
- */
- protected float max_character_ypos = Float.NEGATIVE_INFINITY;
-
- /**
- * The maximum (lowest y position of an image.
- */
- protected float max_image_ypos = Float.NEGATIVE_INFINITY;
-
- /**
- * The effective page height.
- */
- protected float effectivePageHeight;
-
- /**
- * The path currently being constructed.
- */
- private GeneralPath currentPath = new GeneralPath();
-
- private boolean legacy40;
-
- /**
- * The lowest position of a drawn path (originating from top).
- */
- private float maxPathRelatedYPositionFromTop = Float.NEGATIVE_INFINITY;
-
- /**
- * Constructor.
- *
- * @param effectivePageHeight
- * The height of the page to be evaluated. PDF elements outside
- * this height will not be considered.
- *
- * @throws java.io.IOException
- */
- public PDFPage(float effectivePageHeight, boolean legacy32, boolean legacy40)
- throws IOException {
- super();
-
- this.legacy40 = legacy40;
-
- this.effectivePageHeight = effectivePageHeight;
-
- OperatorProcessor newInvoke = new MyInvoke(this);
- newInvoke.setContext(this);
- this.registerOperatorProcessor("Do", newInvoke);
-
- if (!legacy32) {
- registerCustomPathOperators();
- }
- }
-
- /**
- * Registers operators responsible for path construction and painting in
- * order to fix auto positioning on pages with path elements.
- *
- * @author Datentechnik Innovation GmbH
- */
- private void registerCustomPathOperators() {
-
- // *** path construction
-
- this.registerOperatorProcessor("m", new MoveTo(this));
- this.registerOperatorProcessor("l", new LineTo(this));
- this.registerOperatorProcessor("c", new CurveTo(this));
- this.registerOperatorProcessor("y",
- new CurveToReplicateFinalPoint(this));
- this.registerOperatorProcessor("v", new CurveToReplicateInitialPoint(
- this));
- this.registerOperatorProcessor("h", new ClosePath(this));
-
- // *** path painting
-
- // "S": stroke path
- this.registerOperatorProcessor("S", new StrokePath(this));
- this.registerOperatorProcessor("s", new CloseAndStrokePath(this));
- this.registerOperatorProcessor("f",
- new FillPathNonZeroWindingNumberRule(this));
- this.registerOperatorProcessor("F",
- new FillPathNonZeroWindingNumberRule(this));
- this.registerOperatorProcessor("f*", new FillPathEvenOddRule(this));
- this.registerOperatorProcessor("b", new CloseFillNonZeroAndStrokePath(
- this));
- this.registerOperatorProcessor("B", new FillNonZeroAndStrokePath(this));
- this.registerOperatorProcessor("b*", new CloseFillEvenOddAndStrokePath(
- this));
- this.registerOperatorProcessor("B*", new FillEvenOddAndStrokePath(this));
- this.registerOperatorProcessor("n", new EndPath(this));
-
- // Note: The graphic context
- // (org.pdfbox.pdmodel.graphics.PDGraphicsState) of the underlying
- // pdfbox library does
- // not yet support clipping. This prevents feasible usage of clipping
- // operators (W, W*).
- // operators.put("W", new ...(this));
- // operators.put("W*", new ...(this));
-
- }
-
- /**
- * Returns the path currently being constructed.
- *
- * @return The path currently being constructed.
- */
- public GeneralPath getCurrentPath() {
- return currentPath;
- }
-
- /**
- * Sets the current path.
- *
- * @param currentPath
- * The new current path.
- */
- public void setCurrentPath(GeneralPath currentPath) {
- this.currentPath = currentPath;
- }
-
- /**
- * Registers a rectangle that bounds the path currently being drawn.
- *
- * @param bounds
- * A rectangle depicting the bounds (coordinates originating from
- * bottom left).
- * @author Datentechnik Innovation GmbH
- */
- public void registerPathBounds(Rectangle bounds) {
- if (!bounds.isEmpty()) {
- logger.debug("Registering path bounds: " + bounds);
-
- // vertical start of rectangle (counting from top of page)
- float upperBoundYPositionFromTop;
-
- // vertical end of rectangle (counting from top of page)
- // this depicts the current end of path-related page content
- float lowerBoundYPositionFromTop;
-
- PDRectangle boundaryBox = this.getCurrentPage().findCropBox();
-
- if (boundaryBox == null) {
- boundaryBox = this.getCurrentPage().findMediaBox();
- }
-
- float pageHeight;
-
- switch (this.getCurrentPage().findRotation()) {
- case 90: // CW
- pageHeight = boundaryBox.getWidth();
- upperBoundYPositionFromTop = (float) bounds.getMinX();
- lowerBoundYPositionFromTop = (float) bounds.getMaxX();
- break;
- case 180:
- pageHeight = boundaryBox.getHeight();
- upperBoundYPositionFromTop = (float) bounds.getMinY();
- lowerBoundYPositionFromTop = (float) bounds.getMaxY();
- break;
- case 270: // CCW
- pageHeight = boundaryBox.getWidth();
- upperBoundYPositionFromTop = pageHeight
- - (float) bounds.getMaxX();
- lowerBoundYPositionFromTop = pageHeight
- - (float) bounds.getMinX();
- break;
- default:
- pageHeight = boundaryBox.getHeight();
- upperBoundYPositionFromTop = pageHeight
- - (float) bounds.getMaxY();
- lowerBoundYPositionFromTop = pageHeight
- - (float) bounds.getMinY();
- break;
- }
-
- // new maximum ?
- if (lowerBoundYPositionFromTop > maxPathRelatedYPositionFromTop) {
- // Is the rectangle (at least partly) located above the footer
- // line?
- // (effective page height := page height - footer line)
- if (upperBoundYPositionFromTop <= effectivePageHeight) {
- // yes: update current end of path-related page content
- maxPathRelatedYPositionFromTop = lowerBoundYPositionFromTop;
- logger.trace("New max path related y position (from top): "
- + maxPathRelatedYPositionFromTop);
- } else {
- // no: rectangle is fully located below the footer line ->
- // ignore
- logger.trace("Ignoring path bound below the footer line.");
- }
- }
- }
- }
-
- protected void processOperator(PDFOperator operator, List<COSBase> arguments)
- throws IOException {
- logger.trace("operator = " + operator);
- super.processOperator(operator, arguments);
- }
-
- @Override
- protected void processTextPosition(TextPosition text) {
- showCharacter(text);
- }
-
- // exthex
- /**
- * A method provided as an event interface to allow a subclass to perform
- * some specific functionality when a character needs to be displayed. This
- * method is used to calculate the latest position of a text in the page.
- * Sorry for this missinterpretation of the method, but it is the only way
- * to do this (provided by PDFBox)!!!
- *
- * @param text
- * the character to be displayed -> calculate there y position.
- */
- protected void showCharacter(TextPosition text) {
- float current_y = text.getY();
- final String character = text.getCharacter();
-
- if (at.gv.egiz.pdfas.common.utils.StringUtils.whiteSpaceTrim(character)
- .isEmpty()) {
- return;
- }
-
- int pageRotation = this.getCurrentPage().findRotation();
- // logger_.debug("PageRotation = " + pageRotation);
- /*if (pageRotation == 0) {
- current_y = text.getY();
- }
- if (pageRotation == 90) {
- current_y = text.getY();
- }
- if (pageRotation == 180) {
- current_y = text.getY();
- }
- if (pageRotation == 270) {
- current_y = text.getY();
- }
-
- if (current_y > this.effectivePageHeight) {
- this.max_character_ypos = this.effectivePageHeight;
- return;
- }
-
- // store ypos of the char if it is not empty
- if (current_y > this.max_character_ypos) {
- this.max_character_ypos = current_y;
- }*/
-
- if (pageRotation == 0) {
- current_y = text.getY();
- }
- if (pageRotation == 90) {
- current_y = text.getX();
- }
- if (pageRotation == 180) {
- float page_height = this.getCurrentPage().findMediaBox().getHeight();
- current_y = page_height - text.getY();
- }
- if (pageRotation == 270) {
- float page_height = this.getCurrentPage().findMediaBox().getHeight();
- current_y = page_height - text.getX();
- }
-
- if (current_y > this.effectivePageHeight) {
- // logger_.debug("character is below footer_line. footer_line = " +
- // this.footer_line + ", text.character=" + character + ", y=" +
- // current_y);
- return;
- }
-
- // store ypos of the char if it is not empty
- if (current_y > this.max_character_ypos) {
- this.max_character_ypos = current_y;
- }
- }
-
- // use this funtion getting an unsorted text output
- // public void showString(byte[] string) {
- // logger_.debug(new String(string));
- // }
-
- /**
- * Returns the calculated page length.
- *
- * @return the max page length value
- */
- public float getMaxPageLength() {
- if (logger.isDebugEnabled()) {
- logger.debug("Determining page content length: text="
- + max_character_ypos + ", image=" + max_image_ypos
- + ", path=" + maxPathRelatedYPositionFromTop);
- }
- return NumberUtils.max(max_character_ypos, max_image_ypos,
- maxPathRelatedYPositionFromTop);
- }
-
- @Override
- public Map<String, PDFont> getFonts() {
-
- COSBase fontObj = null;
-
- if (getCurrentPage().getResources() != null
- && getCurrentPage().getResources().getCOSDictionary() != null
- && getCurrentPage().getResources().getCOSDictionary()
- .getDictionaryObject(COSName.FONT) != null) {
- fontObj = getCurrentPage().getResources().getCOSDictionary()
- .getDictionaryObject(COSName.FONT);
- }
-
- Map<String, PDFont> fontMap = getCurrentPage().findResources()
- .getFonts();
-
- if (fontObj != null) {
- getCurrentPage().getResources().getCOSDictionary()
- .setItem(COSName.FONT, fontObj);
- }
-
- return fontMap;
- }
-
- public class MyInvoke extends OperatorProcessor {
-
- private PDFPage mypage;
-
- public MyInvoke(PDFPage page) {
- this.mypage = page;
- }
-
- public void process(PDFOperator operator, List<COSBase> arguments)
- throws IOException {
- COSName name = (COSName) arguments.get(0);
-
- // PDResources res = context.getResources();
-
- Map<String, PDXObject> xobjects = context.getXObjects();
- PDXObject xobject = xobjects.get(name.getName());
-
- PDStream stream = xobject.getPDStream();
- COSStream cos_stream = stream.getStream();
-
- COSName subtype = (COSName) cos_stream
- .getDictionaryObject(COSName.SUBTYPE);
- if (subtype.equals(COSName.IMAGE)) {
- logger.debug("XObject Image");
-
- Matrix ctm = context.getGraphicsState()
- .getCurrentTransformationMatrix();
- logger.debug("ctm = " + ctm);
-
- Pos[] coordinates = new Pos[] { new Pos(0, 0, 1),
- new Pos(1, 0, 1), new Pos(0, 1, 1), new Pos(1, 1, 1) };
-
- Pos[] transformed_coordinates = transtormCoordinates(
- coordinates, ctm);
-
- /**********************************************************
- * pdf-as fix: calculating min and max point of an image to look
- * where the signature should be placed fix solves problems with
- * footer and images and placement of the signature in an image
- * only pdf document
- **********************************************************/
-
- float actual_lowest_point = Float.NaN;
- float actual_starting_point = Float.NaN;
-
- int pageRotation = this.mypage.getCurrentPage().findRotation();
- logger.debug("PageRotation = " + pageRotation);
- if (pageRotation == 0) {
- float min_y = findMinY(transformed_coordinates);
- logger.debug("min_y = " + min_y);
- float page_height = this.mypage.getCurrentPage()
- .findMediaBox().getHeight();
- logger.debug("page_height = " + page_height);
-
- actual_lowest_point = page_height - min_y;
- actual_starting_point = page_height
- - findMaxY(transformed_coordinates);
- }
- if (pageRotation == 90) {
- float max_x = findMaxX(transformed_coordinates);
- logger.debug("max_x = " + max_x);
- float page_width = this.mypage.getCurrentPage()
- .findMediaBox().getWidth();
- logger.debug("page_width = " + page_width);
-
- actual_lowest_point = max_x;
- actual_starting_point = findMinX(transformed_coordinates);
- }
- if (pageRotation == 180) {
- float min_y = findMinY(transformed_coordinates);
- logger.debug("min_y = " + min_y);
- actual_lowest_point = findMaxY(transformed_coordinates);
- actual_starting_point = actual_lowest_point + min_y;
- }
- if (pageRotation == 270) {
- float min_x = findMinX(transformed_coordinates);
- logger.debug("min_x = " + min_x);
-
- float page_width = this.mypage.getCurrentPage()
- .findMediaBox().getWidth();
- logger.debug("page_width = " + page_width);
-
- actual_lowest_point = page_width - min_x;
- actual_starting_point = page_width
- - findMaxX(transformed_coordinates);
- }
-
- logger.debug("actual_lowest_point = " + actual_lowest_point);
-
- if (actual_lowest_point > PDFPage.this.effectivePageHeight
- && actual_starting_point > PDFPage.this.effectivePageHeight) {
- logger.debug("image is below footer_line");
- return;
- }
-
- if (actual_lowest_point > PDFPage.this.max_image_ypos) {
- PDFPage.this.max_image_ypos = actual_lowest_point;
- }
-
- return;
- }
-
- if (xobject instanceof PDXObjectForm) {
- PDXObjectForm form = (PDXObjectForm) xobject;
- COSStream invoke = (COSStream) form.getCOSObject();
- PDResources pdResources = form.getResources();
- PDPage page = context.getCurrentPage();
- if (pdResources == null) {
- pdResources = page.findResources();
- }
-
- getContext().processSubStream(page, pdResources, invoke);
- }
- }
- }
-
- public static Pos[] transtormCoordinates(Pos[] coordinates, Matrix m) {
- Pos[] transformed = new Pos[coordinates.length];
- for (int i = 0; i < coordinates.length; i++) {
- transformed[i] = transtormCoordinate(coordinates[i], m);
- }
- return transformed;
- }
-
- public static Pos transtormCoordinate(Pos pos, Matrix m) {
- Pos transformed = new Pos();
- transformed.x = pos.x * m.getValue(0, 0) + pos.y * m.getValue(1, 0)
- + pos.z * m.getValue(2, 0);
- transformed.y = pos.x * m.getValue(0, 1) + pos.y * m.getValue(1, 1)
- + pos.z * m.getValue(2, 1);
- transformed.z = pos.x * m.getValue(0, 2) + pos.y * m.getValue(1, 2)
- + pos.z * m.getValue(2, 2);
-
- logger.debug(" transformed " + pos + " --> " + transformed);
- return transformed;
- }
-
- public static float findMinY(Pos[] coordinates) {
- float min = Float.POSITIVE_INFINITY;
- for (int i = 0; i < coordinates.length; i++) {
- if (coordinates[i].y < min) {
- min = coordinates[i].y;
- }
- }
- return min;
- }
-
- public static float findMaxY(Pos[] coordinates) {
- float max = 0;
- for (int i = 0; i < coordinates.length; i++) {
- if (coordinates[i].y > max) {
- max = coordinates[i].y;
- }
- }
- return max;
- }
-
- public static float findMaxX(Pos[] coordinates) {
- float max = Float.NEGATIVE_INFINITY;
- for (int i = 0; i < coordinates.length; i++) {
- if (coordinates[i].x > max) {
- max = coordinates[i].x;
- }
- }
- return max;
- }
-
- public static float findMinX(Pos[] coordinates) {
- float min = Float.POSITIVE_INFINITY;
- for (int i = 0; i < coordinates.length; i++) {
- if (coordinates[i].x < min) {
- min = coordinates[i].x;
- }
- }
- return min;
- }
-
- public void processAnnotation(PDAnnotation anno) {
- float current_y = anno.getRectangle().getLowerLeftY();
- float upper_y = 0;
- PDPage page = anno.getPage();
-
- if (page == null) {
- page = getCurrentPage();
- }
-
- if (page == null) {
- logger.warn("Annotation without page! The position might not be correct!");
- return;
- }
-
- int pageRotation = page.findRotation();
- // logger_.debug("PageRotation = " + pageRotation);
- if (pageRotation == 0) {
- float page_height = page.findMediaBox().getHeight();
- current_y = page_height - anno.getRectangle().getLowerLeftY();
- upper_y = page_height - anno.getRectangle().getUpperRightY();
- }
- if (pageRotation == 90) {
- current_y = anno.getRectangle().getUpperRightX();
- upper_y = anno.getRectangle().getLowerLeftX();
- }
- if (pageRotation == 180) {
- current_y = anno.getRectangle().getUpperRightY();
- upper_y = anno.getRectangle().getLowerLeftY();
- }
- if (pageRotation == 270) {
- float page_width = page.findMediaBox().getWidth();
- current_y = page_width - anno.getRectangle().getLowerLeftX();
- upper_y = page_width - anno.getRectangle().getUpperRightX();
- }
-
-
-
- if (current_y > this.effectivePageHeight) {
- if(!this.legacy40 && upper_y < this.effectivePageHeight) {
- // Bottom of annotation is below footer line,
- // but top of annotation is above footer line!
- // so no place left on this page!
- this.max_character_ypos = this.effectivePageHeight;
- }
- return;
- }
-
- // store ypos of the char if it is not empty
- if (current_y > this.max_character_ypos) {
- this.max_character_ypos = current_y;
- }
- }
-
-}