aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorknowcenter <knowcenter@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2007-03-13 12:50:21 +0000
committerknowcenter <knowcenter@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2007-03-13 12:50:21 +0000
commitf0e215a1fb38f637b809be6f8619732e12a18356 (patch)
tree94e3402a4595d49aa5256d3e6763016bd46b42e6
parent16cf1fdcf1f92579494c53aa25478ab284a8b5d9 (diff)
downloadpdf-as-3-f0e215a1fb38f637b809be6f8619732e12a18356.tar.gz
pdf-as-3-f0e215a1fb38f637b809be6f8619732e12a18356.tar.bz2
pdf-as-3-f0e215a1fb38f637b809be6f8619732e12a18356.zip
rotion of pdf-documents now is handled correctly
git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@53 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
-rw-r--r--src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java102
1 files changed, 84 insertions, 18 deletions
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java
index 140a6c3..1a3b56b 100644
--- a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java
+++ b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java
@@ -23,12 +23,15 @@ import java.io.File;
import java.io.IOException;
import java.io.InputStream;
+import org.apache.log4j.Logger;
import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;
+import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger;
import at.knowcenter.wag.egov.egiz.cfg.SettingsReader;
import at.knowcenter.wag.egov.egiz.exceptions.PresentableException;
+import at.knowcenter.wag.egov.egiz.framework.signators.TextualSignator_1_0_0;
import com.lowagie.text.Document;
import com.lowagie.text.DocumentException;
@@ -45,7 +48,10 @@ import com.lowagie.text.pdf.PdfWriter;
*/
public class TextualSignature
{
-
+ /**
+ * The logger definition.
+ */
+ private static final Logger logger_ = ConfigLogger.getLogger(TextualSignature.class);
/**
* Extracts the document text from a given pdf.
*
@@ -59,6 +65,7 @@ public class TextualSignature
{
try
{
+ int first_page_rotation = 0;
// logger_.debug("====================================================");
// logger_.debug("extractText:");
@@ -68,28 +75,91 @@ public class TextualSignature
// when being signed, but of course without adding content.
- byte[] bytes = normalizePDF(pdf_stream);
-
- ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
-
+ // byte[] bytes = normalizePDF(pdf_stream);
+ //iText
+
+ PdfReader reader = new PdfReader(pdf_stream);
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+
+ // For some reason the Reader -> ImportPage -> Writer mechanism produces
+ // problems en mass.
+ // The text extractor may not be able to extract proper text from
+ // documents
+ // created with
+ // this method (although it works when a Table is appended)... very
+ // fragile.
+
+ Document document = new Document();
+
+ PdfWriter writer = PdfWriter.getInstance(document, baos);
+ document.open();
+
+ PdfContentByte cb = writer.getDirectContent();
+ for (int page_num = 1; page_num <= reader.getNumberOfPages(); page_num++)
+ {
+ //Rectangle new_size = reader.getPageSize(page_num);
+ //logger_.info("PageSize with no rotaion: Pagenr:"+page_num+" Size: "+new_size);
+ //document.setPageSize(new_size);
+ Rectangle new_size_withrot =reader.getPageSizeWithRotation(page_num);
+ if (page_num == 1)
+ {
+ //setFirstPageRotation(new_size_withrot.getRotation());
+ first_page_rotation = new_size_withrot.getRotation();
+ //logger_.info("iText first_page_rotation="+new_size_withrot.getRotation());
+ }
+ //logger_.info("iText set PageSize of page:"+page_num+" to: "+new_size_withrot);
+ //document.setPageSize(new_size);
+ document.setPageSize(new_size_withrot);
+ document.newPage();
+
+ PdfImportedPage page = writer.getImportedPage(reader, page_num);
+ // note that this will add an xobject form to the doc.
+ // the xobject form contains the content of the page.
+ cb.addTemplate(page, 0, 0);
+
+ // wprinz: debugging
+ // cb.beginText();
+ // cb.setFontAndSize(BaseFont.createFont(BaseFont.HELVETICA,
+ // BaseFont.CP1252, BaseFont.NOT_EMBEDDED), 14);
+ // cb.showText("page " + page_num);
+ // cb.endText();
+ // wprinz: end debugging
+ }
+
+ document.close();
+
+ // for (int i = 1; i <= reader.getNumberOfPages(); i++)
+ // {
+ // Rectangle rect = reader.getBoxSize(i, "bleed");
+ // logger_.debug("rect[" + i + "] = " + rect);
+ // }
+
+ baos.close();
+ byte[] normalizedPDF = baos.toByteArray();
+
+ ByteArrayInputStream bais = new ByteArrayInputStream(normalizedPDF);
+ //PDFBox-parser
PDFParser parser = new PDFParser(bais);
File temporary_dir = SettingsReader.getTemporaryDirectory();
+ //logger_.info("temporary_dir="+temporary_dir.getAbsolutePath());
parser.setTempDirectory(temporary_dir);
parser.parse();
-
+
PDDocument doc = parser.getPDDocument();
-
+ //System.out.println("pdfBox.getNumberOfPages()"+doc.getNumberOfPages());
+
PDFTextStripper stripper = new PDFTextStripper();
stripper.setSortByPosition(false);
+ stripper.setGetFirstPageRotationFromThis(true);
+ stripper.setFirstPageRotation(first_page_rotation);
+
// stripper.setStartPage(4);
// stripper.setEndPage(4);
String text = stripper.getText(doc);
doc.close();
-
- // logger_.debug("text.length = " + text.length());
- // logger_.debug("====================================================");
-
+ //logger_.debug("TextualSignator extractTextTextual="+text);
return text;
}
@@ -122,10 +192,9 @@ public class TextualSignature
*/
public static byte[] normalizePDF(InputStream input_pdf) throws IOException, DocumentException
{
+ //iText
PdfReader reader = new PdfReader(input_pdf);
-
ByteArrayOutputStream baos = new ByteArrayOutputStream();
-
// For some reason the Reader -> ImportPage -> Writer mechanism produces
// problems en mass.
// The text extractor may not be able to extract proper text from
@@ -142,12 +211,10 @@ public class TextualSignature
PdfContentByte cb = writer.getDirectContent();
for (int page_num = 1; page_num <= reader.getNumberOfPages(); page_num++)
{
- Rectangle new_size = reader.getPageSize(page_num);
- document.setPageSize(new_size);
+ Rectangle new_size_withrot =reader.getPageSizeWithRotation(page_num);
+ document.setPageSize(new_size_withrot);
document.newPage();
-
PdfImportedPage page = writer.getImportedPage(reader, page_num);
-
// note that this will add an xobject form to the doc.
// the xobject form contains the content of the page.
cb.addTemplate(page, 0, 0);
@@ -171,7 +238,6 @@ public class TextualSignature
baos.close();
byte[] normalizedPDF = baos.toByteArray();
-
return normalizedPDF;
}
}