From c33195994e0a5e263ebb87402f9789cdda21a4b2 Mon Sep 17 00:00:00 2001 From: Andreas Fitzek Date: Tue, 30 Jul 2013 15:45:49 +0200 Subject: Fixed legacy parameter reading with trim --- .../at/knowcenter/wag/egov/egiz/pdf/PDFPage.java | 641 ++++++++++----------- 1 file changed, 319 insertions(+), 322 deletions(-) (limited to 'pdf-as-lib') diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java index a851e18..138f334 100644 --- a/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java +++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java @@ -73,80 +73,83 @@ import at.knowcenter.wag.egov.egiz.pdf.operator.path.painting.StrokePath; * This method is called when processing the FileStream. By calling the method * {@link org.pdfbox.util.PDFStreamEngine#processStream(org.pdfbox.pdmodel.PDPage, org.pdfbox.pdmodel.PDResources, org.pdfbox.cos.COSStream)} * the implemented method showCharacter is called. - * + * * @author wlackner * @see PDFTextStripper */ -public class PDFPage extends PDFTextStripper -{ - /** - * The logger definition. - */ - private static final Logger logger_ = ConfigLogger.getLogger(PDFPage.class); - - /** - * The maximum (lowest) y position of a character. - */ - protected float max_character_ypos = Float.NEGATIVE_INFINITY; - - /** - * The maximum (lowest y position of an image. - */ - protected float max_image_ypos = Float.NEGATIVE_INFINITY; - - /** - * The effective page height. - */ - protected float effectivePageHeight; - - /** - * The path currently being constructed. - */ - private GeneralPath currentPath = new GeneralPath(); - - /** - * The lowest position of a drawn path (originating from top). - */ - private float maxPathRelatedYPositionFromTop = Float.NEGATIVE_INFINITY; - - /** - * Constructor. - * - * @param effectivePageHeight The height of the page to be evaluated. PDF elements outside this height will not be considered. - * - * @throws IOException - */ - public PDFPage(float effectivePageHeight) throws IOException - { - super(); - - this.effectivePageHeight = effectivePageHeight; - - OperatorProcessor newInvoke = new MyInvoke(); - newInvoke.setContext(this); - operators.put("Do", newInvoke); - - boolean legacy = false; - - try { - String leg = SettingsReader.getInstance().getSetting("legacy_positioning", "false"); - if("true".equals(leg)) { - legacy = true; - } - } catch (SettingsException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - if(!legacy) { - registerCustomPathOperators(); - } - } +public class PDFPage extends PDFTextStripper { + /** + * The logger definition. + */ + private static final Logger logger_ = ConfigLogger.getLogger(PDFPage.class); /** - * Registers operators responsible for path construction and painting in order to fix auto positioning on pages with - * path elements. - * + * The maximum (lowest) y position of a character. + */ + protected float max_character_ypos = Float.NEGATIVE_INFINITY; + + /** + * The maximum (lowest y position of an image. + */ + protected float max_image_ypos = Float.NEGATIVE_INFINITY; + + /** + * The effective page height. + */ + protected float effectivePageHeight; + + /** + * The path currently being constructed. + */ + private GeneralPath currentPath = new GeneralPath(); + + /** + * The lowest position of a drawn path (originating from top). + */ + private float maxPathRelatedYPositionFromTop = Float.NEGATIVE_INFINITY; + + /** + * Constructor. + * + * @param effectivePageHeight + * The height of the page to be evaluated. PDF elements outside + * this height will not be considered. + * + * @throws IOException + */ + public PDFPage(float effectivePageHeight) throws IOException { + super(); + + this.effectivePageHeight = effectivePageHeight; + + OperatorProcessor newInvoke = new MyInvoke(); + newInvoke.setContext(this); + operators.put("Do", newInvoke); + + boolean legacy = false; + + try { + String leg = SettingsReader.getInstance().getSetting("legacy.pos", + "false"); + if (leg != null) { + if ("true".equals(leg.trim())) { + legacy = true; + } + } + } catch (SettingsException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + if (!legacy) { + registerCustomPathOperators(); + } + } + + /** + * Registers operators responsible for path construction and painting in + * order to fix auto positioning on pages with path elements. + * * @author Datentechnik Innovation GmbH */ @SuppressWarnings("unchecked") @@ -175,16 +178,19 @@ public class PDFPage extends PDFTextStripper operators.put("B*", new FillEvenOddAndStrokePath(this)); operators.put("n", new EndPath(this)); - // Note: The graphic context (org.pdfbox.pdmodel.graphics.PDGraphicsState) of the underlying pdfbox library does - // not yet support clipping. This prevents feasible usage of clipping operators (W, W*). -// operators.put("W", new ...(this)); -// operators.put("W*", new ...(this)); + // Note: The graphic context + // (org.pdfbox.pdmodel.graphics.PDGraphicsState) of the underlying + // pdfbox library does + // not yet support clipping. This prevents feasible usage of clipping + // operators (W, W*). + // operators.put("W", new ...(this)); + // operators.put("W*", new ...(this)); } /** * Returns the path currently being constructed. - * + * * @return The path currently being constructed. */ public GeneralPath getCurrentPath() { @@ -193,7 +199,9 @@ public class PDFPage extends PDFTextStripper /** * Sets the current path. - * @param currentPath The new current path. + * + * @param currentPath + * The new current path. */ public void setCurrentPath(GeneralPath currentPath) { this.currentPath = currentPath; @@ -201,9 +209,10 @@ public class PDFPage extends PDFTextStripper /** * Registers a rectangle that bounds the path currently being drawn. - * + * * @param bounds - * A rectangle depicting the bounds (coordinates originating from bottom left). + * A rectangle depicting the bounds (coordinates originating from + * bottom left). * @author Datentechnik Innovation GmbH */ public void registerPathBounds(Rectangle bounds) { @@ -233,257 +242,250 @@ public class PDFPage extends PDFTextStripper break; case 270: // CCW pageHeight = boundaryBox.getWidth(); - upperBoundYPositionFromTop = pageHeight - (float) bounds.getMaxX(); - lowerBoundYPositionFromTop = pageHeight - (float) bounds.getMinX(); + upperBoundYPositionFromTop = pageHeight + - (float) bounds.getMaxX(); + lowerBoundYPositionFromTop = pageHeight + - (float) bounds.getMinX(); break; default: pageHeight = boundaryBox.getHeight(); - upperBoundYPositionFromTop = pageHeight - (float) bounds.getMaxY(); - lowerBoundYPositionFromTop = pageHeight - (float) bounds.getMinY(); + upperBoundYPositionFromTop = pageHeight + - (float) bounds.getMaxY(); + lowerBoundYPositionFromTop = pageHeight + - (float) bounds.getMinY(); break; } // new maximum ? if (lowerBoundYPositionFromTop > maxPathRelatedYPositionFromTop) { - // Is the rectangle (at least partly) located above the footer line? + // Is the rectangle (at least partly) located above the footer + // line? // (effective page height := page height - footer line) if (upperBoundYPositionFromTop <= effectivePageHeight) { // yes: update current end of path-related page content maxPathRelatedYPositionFromTop = lowerBoundYPositionFromTop; - logger_.trace("New max path related y position (from top): " + maxPathRelatedYPositionFromTop); + logger_.trace("New max path related y position (from top): " + + maxPathRelatedYPositionFromTop); } else { - // no: rectangle is fully located below the footer line -> ignore + // no: rectangle is fully located below the footer line -> + // ignore logger_.trace("Ignoring path bound below the footer line."); } } } } - protected void processOperator(PDFOperator operator, List arguments) throws IOException - { -// logger_.debug("operator = " + operator); - super.processOperator(operator, arguments); - } - - // exthex - /** - * A method provided as an event interface to allow a subclass to perform some - * specific functionality when a character needs to be displayed. This method - * is used to calculate the latest position of a text in the page. Sorry for - * this missinterpretation of the method, but it is the only way to do this - * (provided by PDFBox)!!! - * - * @param text - * the character to be displayed -> calculate there y position. - */ - protected void showCharacter(TextPosition text) - { - float current_y = text.getY(); - final String character = text.getCharacter(); - - int pageRotation = page.findRotation(); - //logger_.debug("PageRotation = " + pageRotation); - if (pageRotation == 0) - { - current_y = text.getY(); - } - if (pageRotation == 90) - { - current_y = text.getX(); - } - if (pageRotation == 180) - { - float page_height = page.findMediaBox().getHeight(); - current_y = page_height - text.getY(); - } - if (pageRotation == 270) - { - float page_height = page.findMediaBox().getHeight(); - current_y = page_height - text.getX(); - } - - if (current_y > this.effectivePageHeight) - { - //logger_.debug("character is below footer_line. footer_line = " + this.footer_line + ", text.character=" + character + ", y=" + current_y); - return; - } - - // store ypos of the char if it is not empty - if (!character.equals(" ") && current_y > this.max_character_ypos) - { - this.max_character_ypos = current_y; - } - - } - - // use this funtion getting an unsorted text output - // public void showString(byte[] string) { - // logger_.debug(new String(string)); - // } + protected void processOperator(PDFOperator operator, List arguments) + throws IOException { + // logger_.debug("operator = " + operator); + super.processOperator(operator, arguments); + } + + // exthex + /** + * A method provided as an event interface to allow a subclass to perform + * some specific functionality when a character needs to be displayed. This + * method is used to calculate the latest position of a text in the page. + * Sorry for this missinterpretation of the method, but it is the only way + * to do this (provided by PDFBox)!!! + * + * @param text + * the character to be displayed -> calculate there y position. + */ + protected void showCharacter(TextPosition text) { + float current_y = text.getY(); + final String character = text.getCharacter(); + + int pageRotation = page.findRotation(); + // logger_.debug("PageRotation = " + pageRotation); + if (pageRotation == 0) { + current_y = text.getY(); + } + if (pageRotation == 90) { + current_y = text.getX(); + } + if (pageRotation == 180) { + float page_height = page.findMediaBox().getHeight(); + current_y = page_height - text.getY(); + } + if (pageRotation == 270) { + float page_height = page.findMediaBox().getHeight(); + current_y = page_height - text.getX(); + } + + if (current_y > this.effectivePageHeight) { + // logger_.debug("character is below footer_line. footer_line = " + + // this.footer_line + ", text.character=" + character + ", y=" + + // current_y); + return; + } + + // store ypos of the char if it is not empty + if (!character.equals(" ") && current_y > this.max_character_ypos) { + this.max_character_ypos = current_y; + } + + } + + // use this funtion getting an unsorted text output + // public void showString(byte[] string) { + // logger_.debug(new String(string)); + // } /** * Returns the calculated page length. - * + * * @return the max page length value */ public float getMaxPageLength() { if (logger_.isDebugEnabled()) { - logger_.debug("Determining page content length: text=" + max_character_ypos + ", image=" + max_image_ypos + logger_.debug("Determining page content length: text=" + + max_character_ypos + ", image=" + max_image_ypos + ", path=" + maxPathRelatedYPositionFromTop); } - return NumberUtils.max(max_character_ypos, max_image_ypos, maxPathRelatedYPositionFromTop); + return NumberUtils.max(max_character_ypos, max_image_ypos, + maxPathRelatedYPositionFromTop); + } + + public class MyInvoke extends OperatorProcessor { + + public void process(PDFOperator operator, List arguments) + throws IOException { + COSName name = (COSName) arguments.get(0); + + // PDResources res = context.getResources(); + + Map xobjects = context.getXObjects(); + PDXObject xobject = (PDXObject) xobjects.get(name.getName()); + + PDStream stream = xobject.getPDStream(); + COSStream cos_stream = stream.getStream(); + + COSName subtype = (COSName) cos_stream + .getDictionaryObject(COSName.SUBTYPE); + if (subtype.equals(COSName.IMAGE)) { + logger_.debug("XObject Image"); + + Matrix ctm = context.getGraphicsState() + .getCurrentTransformationMatrix(); + logger_.debug("ctm = " + ctm); + + Pos[] coordinates = new Pos[] { new Pos(0, 0, 1), + new Pos(1, 0, 1), new Pos(0, 1, 1), new Pos(1, 1, 1) }; + + Pos[] transformed_coordinates = transtormCoordinates( + coordinates, ctm); + + /********************************************************** + * pdf-as fix: calculating min and max point of an image to look + * where the signature should be placed fix solves problems with + * footer and images and placement of the signature in an image + * only pdf document + **********************************************************/ + + float actual_lowest_point = Float.NaN; + float actual_starting_point = Float.NaN; + + int pageRotation = page.findRotation(); + logger_.debug("PageRotation = " + pageRotation); + if (pageRotation == 0) { + float min_y = findMinY(transformed_coordinates); + logger_.debug("min_y = " + min_y); + float page_height = page.findMediaBox().getHeight(); + logger_.debug("page_height = " + page_height); + + actual_lowest_point = page_height - min_y; + actual_starting_point = page_height + - findMaxY(transformed_coordinates); + } + if (pageRotation == 90) { + float max_x = findMaxX(transformed_coordinates); + logger_.debug("max_x = " + max_x); + float page_width = page.findMediaBox().getWidth(); + logger_.debug("page_width = " + page_width); + + actual_lowest_point = max_x; + actual_starting_point = findMinX(transformed_coordinates); + } + if (pageRotation == 180) { + float min_y = findMinY(transformed_coordinates); + logger_.debug("min_y = " + min_y); + float page_height = page.findMediaBox().getHeight(); + actual_lowest_point = page_height + - findMaxY(transformed_coordinates); + actual_starting_point = page_height - min_y; + } + if (pageRotation == 270) { + float min_x = findMinX(transformed_coordinates); + logger_.debug("min_x = " + min_x); + + float page_width = page.findMediaBox().getWidth(); + logger_.debug("page_width = " + page_width); + + actual_lowest_point = page_width - min_x; + actual_starting_point = page_width + - findMaxX(transformed_coordinates); + } + + logger_.debug("actual_lowest_point = " + actual_lowest_point); + + if (actual_lowest_point > PDFPage.this.effectivePageHeight + && actual_starting_point > PDFPage.this.effectivePageHeight) { + logger_.debug("image is below footer_line"); + return; + } + + if (actual_lowest_point > PDFPage.this.max_image_ypos) { + PDFPage.this.max_image_ypos = actual_lowest_point; + } + + return; + } + + if (xobject instanceof PDXObjectForm) { + PDXObjectForm form = (PDXObjectForm) xobject; + COSStream invoke = (COSStream) form.getCOSObject(); + PDResources pdResources = form.getResources(); + PDPage page = context.getCurrentPage(); + if (pdResources == null) { + pdResources = page.findResources(); + } + + getContext().processSubStream(page, pdResources, invoke); + } + } + } + + public static Pos[] transtormCoordinates(Pos[] coordinates, Matrix m) { + Pos[] transformed = new Pos[coordinates.length]; + for (int i = 0; i < coordinates.length; i++) { + transformed[i] = transtormCoordinate(coordinates[i], m); + } + return transformed; + } + + public static Pos transtormCoordinate(Pos pos, Matrix m) { + Pos transformed = new Pos(); + transformed.x = pos.x * m.getValue(0, 0) + pos.y * m.getValue(1, 0) + + pos.z * m.getValue(2, 0); + transformed.y = pos.x * m.getValue(0, 1) + pos.y * m.getValue(1, 1) + + pos.z * m.getValue(2, 1); + transformed.z = pos.x * m.getValue(0, 2) + pos.y * m.getValue(1, 2) + + pos.z * m.getValue(2, 2); + + logger_.debug(" transformed " + pos + " --> " + transformed); + return transformed; } - public class MyInvoke extends OperatorProcessor - { - - public void process(PDFOperator operator, List arguments) throws IOException - { - COSName name = (COSName) arguments.get(0); - - // PDResources res = context.getResources(); - - Map xobjects = context.getXObjects(); - PDXObject xobject = (PDXObject) xobjects.get(name.getName()); - - PDStream stream = xobject.getPDStream(); - COSStream cos_stream = stream.getStream(); - - COSName subtype = (COSName) cos_stream.getDictionaryObject(COSName.SUBTYPE); - if (subtype.equals(COSName.IMAGE)) - { - logger_.debug("XObject Image"); - - Matrix ctm = context.getGraphicsState().getCurrentTransformationMatrix(); - logger_.debug("ctm = " + ctm); - - Pos [] coordinates = new Pos [] { - new Pos(0, 0, 1), - new Pos(1, 0, 1), - new Pos(0, 1, 1), - new Pos(1, 1, 1) }; - - Pos [] transformed_coordinates = transtormCoordinates(coordinates, ctm); - - /********************************************************** - * pdf-as fix: - * calculating min and max point of an image to look where - * the signature should be placed - * fix solves problems with footer and images and - * placement of the signature in an image only pdf document - **********************************************************/ - - float actual_lowest_point = Float.NaN; - float actual_starting_point = Float.NaN; - - int pageRotation = page.findRotation(); - logger_.debug("PageRotation = " + pageRotation); - if (pageRotation == 0) - { - float min_y = findMinY(transformed_coordinates); - logger_.debug("min_y = " + min_y); - float page_height = page.findMediaBox().getHeight(); - logger_.debug("page_height = " + page_height); - - actual_lowest_point = page_height - min_y; - actual_starting_point = page_height - findMaxY(transformed_coordinates); - } - if (pageRotation == 90) - { - float max_x = findMaxX(transformed_coordinates); - logger_.debug("max_x = " + max_x); - float page_width = page.findMediaBox().getWidth(); - logger_.debug("page_width = " + page_width); - - actual_lowest_point = max_x; - actual_starting_point = findMinX(transformed_coordinates); - } - if (pageRotation == 180) - { - float min_y = findMinY(transformed_coordinates); - logger_.debug("min_y = " + min_y); - float page_height = page.findMediaBox().getHeight(); - actual_lowest_point = page_height - findMaxY(transformed_coordinates); - actual_starting_point = page_height - min_y; - } - if (pageRotation == 270) - { - float min_x = findMinX(transformed_coordinates); - logger_.debug("min_x = " + min_x); - - float page_width = page.findMediaBox().getWidth(); - logger_.debug("page_width = " + page_width); - - actual_lowest_point = page_width - min_x; - actual_starting_point = page_width - findMaxX(transformed_coordinates); - } - - - logger_.debug("actual_lowest_point = " + actual_lowest_point); - - if (actual_lowest_point > PDFPage.this.effectivePageHeight && actual_starting_point > PDFPage.this.effectivePageHeight) - { - logger_.debug("image is below footer_line"); - return; - } - - if (actual_lowest_point > PDFPage.this.max_image_ypos) - { - PDFPage.this.max_image_ypos = actual_lowest_point; - } - - return; - } - - if (xobject instanceof PDXObjectForm) - { - PDXObjectForm form = (PDXObjectForm) xobject; - COSStream invoke = (COSStream) form.getCOSObject(); - PDResources pdResources = form.getResources(); - PDPage page = context.getCurrentPage(); - if (pdResources == null) - { - pdResources = page.findResources(); - } - - getContext().processSubStream(page, pdResources, invoke); - } - } - } - - public static Pos [] transtormCoordinates (Pos [] coordinates, Matrix m) - { - Pos [] transformed = new Pos [coordinates.length]; - for (int i = 0; i < coordinates.length; i++) - { - transformed[i] = transtormCoordinate(coordinates[i], m); - } - return transformed; - } - - public static Pos transtormCoordinate (Pos pos, Matrix m) - { - Pos transformed = new Pos(); - transformed.x = pos.x * m.getValue(0, 0) + pos.y * m.getValue(1, 0) + pos.z * m.getValue(2, 0); - transformed.y = pos.x * m.getValue(0, 1) + pos.y * m.getValue(1, 1) + pos.z * m.getValue(2, 1); - transformed.z = pos.x * m.getValue(0, 2) + pos.y * m.getValue(1, 2) + pos.z * m.getValue(2, 2); - - logger_.debug(" transformed " + pos + " --> " + transformed); - return transformed; - } - - public static float findMinY (Pos [] coordinates) - { - float min = Float.POSITIVE_INFINITY; - for (int i = 0; i < coordinates.length; i++) - { - if (coordinates[i].y < min) - { - min = coordinates[i].y; - } - } - return min; - } + public static float findMinY(Pos[] coordinates) { + float min = Float.POSITIVE_INFINITY; + for (int i = 0; i < coordinates.length; i++) { + if (coordinates[i].y < min) { + min = coordinates[i].y; + } + } + return min; + } public static float findMaxY(Pos[] coordinates) { float max = 0; @@ -495,29 +497,24 @@ public class PDFPage extends PDFTextStripper return max; } - public static float findMaxX (Pos [] coordinates) - { - float max = Float.NEGATIVE_INFINITY; - for (int i = 0; i < coordinates.length; i++) - { - if (coordinates[i].x > max) - { - max = coordinates[i].x; - } - } - return max; - } - public static float findMinX (Pos [] coordinates) - { - float min = Float.POSITIVE_INFINITY; - for (int i = 0; i < coordinates.length; i++) - { - if (coordinates[i].x < min) - { - min = coordinates[i].x; - } - } - return min; - } + public static float findMaxX(Pos[] coordinates) { + float max = Float.NEGATIVE_INFINITY; + for (int i = 0; i < coordinates.length; i++) { + if (coordinates[i].x > max) { + max = coordinates[i].x; + } + } + return max; + } + + public static float findMinX(Pos[] coordinates) { + float min = Float.POSITIVE_INFINITY; + for (int i = 0; i < coordinates.length; i++) { + if (coordinates[i].x < min) { + min = coordinates[i].x; + } + } + return min; + } } \ No newline at end of file -- cgit v1.2.3