From f891dca529c9dc199114ae4f0857d28812315b11 Mon Sep 17 00:00:00 2001 From: Andreas Fitzek Date: Thu, 3 Apr 2014 15:38:01 +0200 Subject: Fixed Positioning to recognize Annotations --- .../at/knowcenter/wag/egov/egiz/pdf/PDFPage.java | 31 +++++++++++ .../knowcenter/wag/egov/egiz/pdf/PDFUtilities.java | 62 +++++++++++++--------- 2 files changed, 68 insertions(+), 25 deletions(-) (limited to 'pdf-as-lib/src/main/java/at/knowcenter/wag/egov') diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java index 540179b8..e482d50c 100644 --- a/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java +++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java @@ -64,6 +64,7 @@ import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.common.PDStream; import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject; import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectForm; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; import org.apache.pdfbox.util.Matrix; import org.apache.pdfbox.util.PDFOperator; import org.apache.pdfbox.util.PDFTextStripper; @@ -534,4 +535,34 @@ public class PDFPage extends PDFTextStripper { return min; } + public void processAnnotation(PDAnnotation anno) { + float current_y = anno.getRectangle().getLowerLeftY(); + + int pageRotation = this.getCurrentPage().findRotation(); + // logger_.debug("PageRotation = " + pageRotation); + if (pageRotation == 0) { + float page_height = this.getCurrentPage().findMediaBox().getHeight(); + current_y = page_height - anno.getRectangle().getLowerLeftY(); + } + if (pageRotation == 90) { + current_y = anno.getRectangle().getLowerLeftX(); + } + if (pageRotation == 180) { + current_y = anno.getRectangle().getUpperRightY(); + } + if (pageRotation == 270) { + float page_height = this.getCurrentPage().findMediaBox().getHeight(); + current_y = page_height - anno.getRectangle().getUpperRightX(); + } + + if (current_y > this.effectivePageHeight) { + return; + } + + // store ypos of the char if it is not empty + if (current_y > this.max_character_ypos) { + this.max_character_ypos = current_y; + } + } + } diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFUtilities.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFUtilities.java index c68f6229..3f5e67fc 100644 --- a/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFUtilities.java +++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFUtilities.java @@ -49,47 +49,59 @@ package at.knowcenter.wag.egov.egiz.pdf; import java.io.IOException; +import java.util.Iterator; import java.util.List; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; import at.gv.egiz.pdfas.common.exceptions.PDFIOException; - /** * Contains useful helpers for accessing PDF documents. * * @author wprinz * @author mruhmer */ -public abstract class PDFUtilities -{ - public static float calculatePageLength(PDDocument document, int page, float effectivePageHeight, /*int pagerotation,*/ boolean legacy32) throws PDFIOException { - //int last_page_id = document.getNumberOfPages(); - List allPages = document.getDocumentCatalog().getAllPages(); - PDPage pdpage = (PDPage) allPages.get(page); - //pdpage.setRotation(pagerotation); - return calculatePageLength(pdpage, effectivePageHeight, legacy32); +public abstract class PDFUtilities { + public static float calculatePageLength(PDDocument document, int page, + float effectivePageHeight, /* int pagerotation, */boolean legacy32) + throws PDFIOException { + // int last_page_id = document.getNumberOfPages(); + List allPages = document.getDocumentCatalog().getAllPages(); + PDPage pdpage = (PDPage) allPages.get(page); + // pdpage.setRotation(pagerotation); + return calculatePageLength(pdpage, effectivePageHeight, legacy32); } - public static float calculatePageLength(PDPage page, float effectivePageHeight, boolean legacy32) throws PDFIOException - { - try{ - PDFPage my_page = new PDFPage(effectivePageHeight, legacy32); - PDResources resources = page.findResources(); - COSStream stream = page.getContents().getStream(); - //List articles = page.getThreadBeads(); - //my_page.processMyPage(page); - my_page.processStream(page, resources, stream); - return my_page.getMaxPageLength(); - } - catch (IOException e) - { - throw new PDFIOException("error.pdf.stamp.11", e); - } - } + public static float calculatePageLength(PDPage page, + float effectivePageHeight, boolean legacy32) throws PDFIOException { + try { + PDFPage my_page = new PDFPage(effectivePageHeight, legacy32); + PDResources resources = page.findResources(); + COSStream stream = page.getContents().getStream(); + // List articles = page.getThreadBeads(); + // my_page.processMyPage(page); + my_page.processStream(page, resources, stream); + + if (!legacy32) { + Iterator annotationsIt = page.getAnnotations() + .iterator(); + + while (annotationsIt.hasNext()) { + PDAnnotation annotation = annotationsIt.next(); + if(!annotation.isInvisible()) { + my_page.processAnnotation(annotation); + } + } + } + return my_page.getMaxPageLength(); + } catch (IOException e) { + throw new PDFIOException("error.pdf.stamp.11", e); + } + } } -- cgit v1.2.3