aboutsummaryrefslogtreecommitdiff
path: root/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java
diff options
context:
space:
mode:
Diffstat (limited to 'pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java')
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java249
1 files changed, 193 insertions, 56 deletions
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java
index 7de89d2..1a89b7b 100644
--- a/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java
@@ -25,15 +25,19 @@
*/
package at.knowcenter.wag.egov.egiz.pdf;
+import java.awt.Rectangle;
+import java.awt.geom.GeneralPath;
import java.io.IOException;
import java.util.List;
import java.util.Map;
+import org.apache.commons.lang.math.NumberUtils;
import org.apache.log4j.Logger;
import org.pdfbox.cos.COSName;
import org.pdfbox.cos.COSStream;
import org.pdfbox.pdmodel.PDPage;
import org.pdfbox.pdmodel.PDResources;
+import org.pdfbox.pdmodel.common.PDRectangle;
import org.pdfbox.pdmodel.common.PDStream;
import org.pdfbox.pdmodel.graphics.xobject.PDXObject;
import org.pdfbox.pdmodel.graphics.xobject.PDXObjectForm;
@@ -44,6 +48,21 @@ import org.pdfbox.util.TextPosition;
import org.pdfbox.util.operator.OperatorProcessor;
import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger;
+import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.ClosePath;
+import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.CurveTo;
+import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.CurveToReplicateFinalPoint;
+import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.CurveToReplicateInitialPoint;
+import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.LineTo;
+import at.knowcenter.wag.egov.egiz.pdf.operator.path.construction.MoveTo;
+import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.CloseAndStrokePath;
+import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.CloseFillEvenOddAndStrokePath;
+import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.CloseFillNonZeroAndStrokePath;
+import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.EndPath;
+import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillEvenOddAndStrokePath;
+import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillNonZeroAndStrokePath;
+import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillPathEvenOddRule;
+import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.FillPathNonZeroWindingNumberRule;
+import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.StrokePath;
/**
* PDFPage is an inner class that is used to calculate the page length of a PDF
@@ -52,7 +71,7 @@ import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger;
* This method is called when processing the FileStream. By calling the method
* {@link org.pdfbox.util.PDFStreamEngine#processStream(org.pdfbox.pdmodel.PDPage, org.pdfbox.pdmodel.PDResources, org.pdfbox.cos.COSStream)}
* the implemented method showCharacter is called.
- *
+ *
* @author wlackner
* @see PDFTextStripper
*/
@@ -67,41 +86,167 @@ public class PDFPage extends PDFTextStripper
* The maximum (lowest) y position of a character.
*/
protected float max_character_ypos = Float.NEGATIVE_INFINITY;
-
+
/**
* The maximum (lowest y position of an image.
*/
protected float max_image_ypos = Float.NEGATIVE_INFINITY;
-
+
/**
* The effective page height.
*/
protected float effectivePageHeight;
-
+
+ /**
+ * The path currently being constructed.
+ */
+ private GeneralPath currentPath = new GeneralPath();
+
+ /**
+ * The lowest position of a drawn path (originating from top).
+ */
+ private float maxPathRelatedYPositionFromTop = Float.NEGATIVE_INFINITY;
+
/**
* Constructor.
- *
+ *
* @param effectivePageHeight The height of the page to be evaluated. PDF elements outside this height will not be considered.
- *
+ *
* @throws IOException
*/
public PDFPage(float effectivePageHeight) throws IOException
{
super();
-
+
this.effectivePageHeight = effectivePageHeight;
-
+
OperatorProcessor newInvoke = new MyInvoke();
newInvoke.setContext(this);
operators.put("Do", newInvoke);
+
+ registerCustomPathOperators();
}
+ /**
+ * Registers operators responsible for path construction and painting in order to fix auto positioning on pages with
+ * path elements.
+ *
+ * @author Datentechnik Innovation GmbH
+ */
+ @SuppressWarnings("unchecked")
+ private void registerCustomPathOperators() {
+
+ // *** path construction
+
+ operators.put("m", new MoveTo(this));
+ operators.put("l", new LineTo(this));
+ operators.put("c", new CurveTo(this));
+ operators.put("y", new CurveToReplicateFinalPoint(this));
+ operators.put("v", new CurveToReplicateInitialPoint(this));
+ operators.put("h", new ClosePath(this));
+
+ // *** path painting
+
+ // "S": stroke path
+ operators.put("S", new StrokePath(this));
+ operators.put("s", new CloseAndStrokePath(this));
+ operators.put("f", new FillPathNonZeroWindingNumberRule(this));
+ operators.put("F", new FillPathNonZeroWindingNumberRule(this));
+ operators.put("f*", new FillPathEvenOddRule(this));
+ operators.put("b", new CloseFillNonZeroAndStrokePath(this));
+ operators.put("B", new FillNonZeroAndStrokePath(this));
+ operators.put("b*", new CloseFillEvenOddAndStrokePath(this));
+ operators.put("B*", new FillEvenOddAndStrokePath(this));
+ operators.put("n", new EndPath(this));
+
+ // Note: The graphic context (org.pdfbox.pdmodel.graphics.PDGraphicsState) of the underlying pdfbox library does
+ // not yet support clipping. This prevents feasible usage of clipping operators (W, W*).
+// operators.put("W", new ...(this));
+// operators.put("W*", new ...(this));
+
+ }
+
+ /**
+ * Returns the path currently being constructed.
+ *
+ * @return The path currently being constructed.
+ */
+ public GeneralPath getCurrentPath() {
+ return currentPath;
+ }
+
+ /**
+ * Sets the current path.
+ * @param currentPath The new current path.
+ */
+ public void setCurrentPath(GeneralPath currentPath) {
+ this.currentPath = currentPath;
+ }
+
+ /**
+ * Registers a rectangle that bounds the path currently being drawn.
+ *
+ * @param bounds
+ * A rectangle depicting the bounds (coordinates originating from bottom left).
+ * @author Datentechnik Innovation GmbH
+ */
+ public void registerPathBounds(Rectangle bounds) {
+ if (!bounds.isEmpty()) {
+ logger_.trace("Registering path bounds: " + bounds);
+
+ // vertical start of rectangle (counting from top of page)
+ float upperBoundYPositionFromTop;
+
+ // vertical end of rectangle (counting from top of page)
+ // this depicts the current end of path-related page content
+ float lowerBoundYPositionFromTop;
+
+ PDRectangle boundaryBox = page.findMediaBox();
+ float pageHeight;
+
+ switch (page.findRotation()) {
+ case 90: // CW
+ pageHeight = boundaryBox.getWidth();
+ upperBoundYPositionFromTop = (float) bounds.getMinX();
+ lowerBoundYPositionFromTop = (float) bounds.getMaxX();
+ break;
+ case 180:
+ pageHeight = boundaryBox.getHeight();
+ upperBoundYPositionFromTop = (float) bounds.getMinY();
+ lowerBoundYPositionFromTop = (float) bounds.getMaxY();
+ break;
+ case 270: // CCW
+ pageHeight = boundaryBox.getWidth();
+ upperBoundYPositionFromTop = pageHeight - (float) bounds.getMaxX();
+ lowerBoundYPositionFromTop = pageHeight - (float) bounds.getMinX();
+ break;
+ default:
+ pageHeight = boundaryBox.getHeight();
+ upperBoundYPositionFromTop = pageHeight - (float) bounds.getMaxY();
+ lowerBoundYPositionFromTop = pageHeight - (float) bounds.getMinY();
+ break;
+ }
+
+ // new maximum ?
+ if (lowerBoundYPositionFromTop > maxPathRelatedYPositionFromTop) {
+ // Is the rectangle (at least partly) located above the footer line?
+ // (effective page height := page height - footer line)
+ if (upperBoundYPositionFromTop <= effectivePageHeight) {
+ // yes: update current end of path-related page content
+ maxPathRelatedYPositionFromTop = lowerBoundYPositionFromTop;
+ logger_.trace("New max path related y position (from top): " + maxPathRelatedYPositionFromTop);
+ } else {
+ // no: rectangle is fully located below the footer line -> ignore
+ logger_.trace("Ignoring path bound below the footer line.");
+ }
+ }
+ }
+ }
protected void processOperator(PDFOperator operator, List arguments) throws IOException
{
- //logger_.debug("operator = " + operator);
-
- super.processOperator(operator, arguments);
+// logger_.debug("operator = " + operator);
+ super.processOperator(operator, arguments);
}
// exthex
@@ -111,7 +256,7 @@ public class PDFPage extends PDFTextStripper
* is used to calculate the latest position of a text in the page. Sorry for
* this missinterpretation of the method, but it is the only way to do this
* (provided by PDFBox)!!!
- *
+ *
* @param text
* the character to be displayed -> calculate there y position.
*/
@@ -119,7 +264,7 @@ public class PDFPage extends PDFTextStripper
{
float current_y = text.getY();
final String character = text.getCharacter();
-
+
int pageRotation = page.findRotation();
//logger_.debug("PageRotation = " + pageRotation);
if (pageRotation == 0)
@@ -146,7 +291,7 @@ public class PDFPage extends PDFTextStripper
//logger_.debug("character is below footer_line. footer_line = " + this.footer_line + ", text.character=" + character + ", y=" + current_y);
return;
}
-
+
// store ypos of the char if it is not empty
if (!character.equals(" ") && current_y > this.max_character_ypos)
{
@@ -160,26 +305,18 @@ public class PDFPage extends PDFTextStripper
// logger_.debug(new String(string));
// }
- /**
- * Returns the calculated page length.
- *
- * @return the max page length value
- */
- public float getMaxPageLength()
- {
- float max_ypos = Float.NEGATIVE_INFINITY;
-
- if (this.max_character_ypos > this.max_image_ypos)
- {
- max_ypos = this.max_character_ypos;
- }
- else
- {
- max_ypos = this.max_image_ypos;
- }
-
- return max_ypos;
- }
+ /**
+ * Returns the calculated page length.
+ *
+ * @return the max page length value
+ */
+ public float getMaxPageLength() {
+ if (logger_.isDebugEnabled()) {
+ logger_.debug("Determining page content length: text=" + max_character_ypos + ", image=" + max_image_ypos
+ + ", path=" + maxPathRelatedYPositionFromTop);
+ }
+ return NumberUtils.max(max_character_ypos, max_image_ypos, maxPathRelatedYPositionFromTop);
+ }
public class MyInvoke extends OperatorProcessor
{
@@ -192,26 +329,26 @@ public class PDFPage extends PDFTextStripper
Map xobjects = context.getXObjects();
PDXObject xobject = (PDXObject) xobjects.get(name.getName());
-
+
PDStream stream = xobject.getPDStream();
COSStream cos_stream = stream.getStream();
-
+
COSName subtype = (COSName) cos_stream.getDictionaryObject(COSName.SUBTYPE);
if (subtype.equals(COSName.IMAGE))
{
logger_.debug("XObject Image");
-
+
Matrix ctm = context.getGraphicsState().getCurrentTransformationMatrix();
logger_.debug("ctm = " + ctm);
-
+
Pos [] coordinates = new Pos [] {
new Pos(0, 0, 1),
new Pos(1, 0, 1),
new Pos(0, 1, 1),
new Pos(1, 1, 1) };
-
+
Pos [] transformed_coordinates = transtormCoordinates(coordinates, ctm);
-
+
/**********************************************************
* pdf-as fix:
* calculating min and max point of an image to look where
@@ -219,10 +356,10 @@ public class PDFPage extends PDFTextStripper
* fix solves problems with footer and images and
* placement of the signature in an image only pdf document
**********************************************************/
-
+
float actual_lowest_point = Float.NaN;
float actual_starting_point = Float.NaN;
-
+
int pageRotation = page.findRotation();
logger_.debug("PageRotation = " + pageRotation);
if (pageRotation == 0)
@@ -231,7 +368,7 @@ public class PDFPage extends PDFTextStripper
logger_.debug("min_y = " + min_y);
float page_height = page.findMediaBox().getHeight();
logger_.debug("page_height = " + page_height);
-
+
actual_lowest_point = page_height - min_y;
actual_starting_point = page_height - findMaxY(transformed_coordinates);
}
@@ -243,7 +380,7 @@ public class PDFPage extends PDFTextStripper
logger_.debug("page_width = " + page_width);
actual_lowest_point = max_x;
- actual_starting_point = findMinX(transformed_coordinates);
+ actual_starting_point = findMinX(transformed_coordinates);
}
if (pageRotation == 180)
{
@@ -257,17 +394,17 @@ public class PDFPage extends PDFTextStripper
{
float min_x = findMinX(transformed_coordinates);
logger_.debug("min_x = " + min_x);
-
+
float page_width = page.findMediaBox().getWidth();
logger_.debug("page_width = " + page_width);
-
+
actual_lowest_point = page_width - min_x;
- actual_starting_point = page_width - findMaxX(transformed_coordinates);
+ actual_starting_point = page_width - findMaxX(transformed_coordinates);
}
-
+
logger_.debug("actual_lowest_point = " + actual_lowest_point);
-
+
if (actual_lowest_point > PDFPage.this.effectivePageHeight && actual_starting_point > PDFPage.this.effectivePageHeight)
{
logger_.debug("image is below footer_line");
@@ -278,7 +415,7 @@ public class PDFPage extends PDFTextStripper
{
PDFPage.this.max_image_ypos = actual_lowest_point;
}
-
+
return;
}
@@ -297,7 +434,7 @@ public class PDFPage extends PDFTextStripper
}
}
}
-
+
public static Pos [] transtormCoordinates (Pos [] coordinates, Matrix m)
{
Pos [] transformed = new Pos [coordinates.length];
@@ -307,18 +444,18 @@ public class PDFPage extends PDFTextStripper
}
return transformed;
}
-
+
public static Pos transtormCoordinate (Pos pos, Matrix m)
{
Pos transformed = new Pos();
transformed.x = pos.x * m.getValue(0, 0) + pos.y * m.getValue(1, 0) + pos.z * m.getValue(2, 0);
transformed.y = pos.x * m.getValue(0, 1) + pos.y * m.getValue(1, 1) + pos.z * m.getValue(2, 1);
transformed.z = pos.x * m.getValue(0, 2) + pos.y * m.getValue(1, 2) + pos.z * m.getValue(2, 2);
-
+
logger_.debug(" transformed " + pos + " --> " + transformed);
return transformed;
}
-
+
public static float findMinY (Pos [] coordinates)
{
float min = Float.POSITIVE_INFINITY;
@@ -331,7 +468,7 @@ public class PDFPage extends PDFTextStripper
}
return min;
}
-
+
public static float findMaxY(Pos[] coordinates) {
float max = 0;
for (int i = 0; i < coordinates.length; i++) {
@@ -341,7 +478,7 @@ public class PDFPage extends PDFTextStripper
}
return max;
}
-
+
public static float findMaxX (Pos [] coordinates)
{
float max = Float.NEGATIVE_INFINITY;