From f0e215a1fb38f637b809be6f8619732e12a18356 Mon Sep 17 00:00:00 2001 From: knowcenter Date: Tue, 13 Mar 2007 12:50:21 +0000 Subject: rotion of pdf-documents now is handled correctly git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@53 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../wag/egov/egiz/pdf/TextualSignature.java | 102 +++++++++++++++++---- 1 file changed, 84 insertions(+), 18 deletions(-) (limited to 'src/main/java/at/knowcenter/wag/egov') diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java index 140a6c3..1a3b56b 100644 --- a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java +++ b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java @@ -23,12 +23,15 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import org.apache.log4j.Logger; import org.pdfbox.pdfparser.PDFParser; import org.pdfbox.pdmodel.PDDocument; import org.pdfbox.util.PDFTextStripper; +import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger; import at.knowcenter.wag.egov.egiz.cfg.SettingsReader; import at.knowcenter.wag.egov.egiz.exceptions.PresentableException; +import at.knowcenter.wag.egov.egiz.framework.signators.TextualSignator_1_0_0; import com.lowagie.text.Document; import com.lowagie.text.DocumentException; @@ -45,7 +48,10 @@ import com.lowagie.text.pdf.PdfWriter; */ public class TextualSignature { - + /** + * The logger definition. + */ + private static final Logger logger_ = ConfigLogger.getLogger(TextualSignature.class); /** * Extracts the document text from a given pdf. * @@ -59,6 +65,7 @@ public class TextualSignature { try { + int first_page_rotation = 0; // logger_.debug("===================================================="); // logger_.debug("extractText:"); @@ -68,28 +75,91 @@ public class TextualSignature // when being signed, but of course without adding content. - byte[] bytes = normalizePDF(pdf_stream); - - ByteArrayInputStream bais = new ByteArrayInputStream(bytes); - + // byte[] bytes = normalizePDF(pdf_stream); + //iText + + PdfReader reader = new PdfReader(pdf_stream); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + + // For some reason the Reader -> ImportPage -> Writer mechanism produces + // problems en mass. + // The text extractor may not be able to extract proper text from + // documents + // created with + // this method (although it works when a Table is appended)... very + // fragile. + + Document document = new Document(); + + PdfWriter writer = PdfWriter.getInstance(document, baos); + document.open(); + + PdfContentByte cb = writer.getDirectContent(); + for (int page_num = 1; page_num <= reader.getNumberOfPages(); page_num++) + { + //Rectangle new_size = reader.getPageSize(page_num); + //logger_.info("PageSize with no rotaion: Pagenr:"+page_num+" Size: "+new_size); + //document.setPageSize(new_size); + Rectangle new_size_withrot =reader.getPageSizeWithRotation(page_num); + if (page_num == 1) + { + //setFirstPageRotation(new_size_withrot.getRotation()); + first_page_rotation = new_size_withrot.getRotation(); + //logger_.info("iText first_page_rotation="+new_size_withrot.getRotation()); + } + //logger_.info("iText set PageSize of page:"+page_num+" to: "+new_size_withrot); + //document.setPageSize(new_size); + document.setPageSize(new_size_withrot); + document.newPage(); + + PdfImportedPage page = writer.getImportedPage(reader, page_num); + // note that this will add an xobject form to the doc. + // the xobject form contains the content of the page. + cb.addTemplate(page, 0, 0); + + // wprinz: debugging + // cb.beginText(); + // cb.setFontAndSize(BaseFont.createFont(BaseFont.HELVETICA, + // BaseFont.CP1252, BaseFont.NOT_EMBEDDED), 14); + // cb.showText("page " + page_num); + // cb.endText(); + // wprinz: end debugging + } + + document.close(); + + // for (int i = 1; i <= reader.getNumberOfPages(); i++) + // { + // Rectangle rect = reader.getBoxSize(i, "bleed"); + // logger_.debug("rect[" + i + "] = " + rect); + // } + + baos.close(); + byte[] normalizedPDF = baos.toByteArray(); + + ByteArrayInputStream bais = new ByteArrayInputStream(normalizedPDF); + //PDFBox-parser PDFParser parser = new PDFParser(bais); File temporary_dir = SettingsReader.getTemporaryDirectory(); + //logger_.info("temporary_dir="+temporary_dir.getAbsolutePath()); parser.setTempDirectory(temporary_dir); parser.parse(); - + PDDocument doc = parser.getPDDocument(); - + //System.out.println("pdfBox.getNumberOfPages()"+doc.getNumberOfPages()); + PDFTextStripper stripper = new PDFTextStripper(); stripper.setSortByPosition(false); + stripper.setGetFirstPageRotationFromThis(true); + stripper.setFirstPageRotation(first_page_rotation); + // stripper.setStartPage(4); // stripper.setEndPage(4); String text = stripper.getText(doc); doc.close(); - - // logger_.debug("text.length = " + text.length()); - // logger_.debug("===================================================="); - + //logger_.debug("TextualSignator extractTextTextual="+text); return text; } @@ -122,10 +192,9 @@ public class TextualSignature */ public static byte[] normalizePDF(InputStream input_pdf) throws IOException, DocumentException { + //iText PdfReader reader = new PdfReader(input_pdf); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - // For some reason the Reader -> ImportPage -> Writer mechanism produces // problems en mass. // The text extractor may not be able to extract proper text from @@ -142,12 +211,10 @@ public class TextualSignature PdfContentByte cb = writer.getDirectContent(); for (int page_num = 1; page_num <= reader.getNumberOfPages(); page_num++) { - Rectangle new_size = reader.getPageSize(page_num); - document.setPageSize(new_size); + Rectangle new_size_withrot =reader.getPageSizeWithRotation(page_num); + document.setPageSize(new_size_withrot); document.newPage(); - PdfImportedPage page = writer.getImportedPage(reader, page_num); - // note that this will add an xobject form to the doc. // the xobject form contains the content of the page. cb.addTemplate(page, 0, 0); @@ -171,7 +238,6 @@ public class TextualSignature baos.close(); byte[] normalizedPDF = baos.toByteArray(); - return normalizedPDF; } } -- cgit v1.2.3