pdf-as-lib maven project files moved to pdf-as-lib

git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/pdf-as/trunk@926 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
author: tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> 2013-01-09 15:41:29 +0000
committer: tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> 2013-01-09 15:41:29 +0000
commit: 535a04fa05f739ec16dd81666e3b0f82dfbd442d (patch)
tree: 0804f301c1a9ceb303a8441b7b29244fc8eb7ff0 /src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java
parent: 1efaf6fd5619dfa95c9d7e8c71eda4c2ffba4998 (diff)
download: pdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.tar.gz
pdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.tar.bz2
pdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.zip
1 files changed, 0 insertions, 282 deletions
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java
deleted file mode 100644
index 35a0768..0000000
--- a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java
+++ /dev/null
@@ -1,282 +0,0 @@
-/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: TextualSignature.java,v 1.4 2006/10/31 08:12:45 wprinz Exp $
- */
-package at.knowcenter.wag.egov.egiz.pdf;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.log4j.Logger;
-import org.pdfbox.pdfparser.PDFParser;
-import org.pdfbox.pdmodel.PDDocument;
-import org.pdfbox.util.PDFTextStripper;
-
-import at.gv.egiz.pdfas.exceptions.ErrorCode;
-import at.gv.egiz.pdfas.framework.input.PdfDataSource;
-import at.gv.egiz.pdfas.performance.PerformanceCounters;
-import at.gv.egiz.pdfas.utils.PDFASUtils;
-import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger;
-import at.knowcenter.wag.egov.egiz.cfg.SettingsReader;
-import at.knowcenter.wag.egov.egiz.exceptions.PDFDocumentException;
-
-import com.lowagie.text.Document;
-import com.lowagie.text.DocumentException;
-import com.lowagie.text.Rectangle;
-import com.lowagie.text.pdf.PdfContentByte;
-import com.lowagie.text.pdf.PdfImportedPage;
-import com.lowagie.text.pdf.PdfReader;
-import com.lowagie.text.pdf.PdfWriter;
-
-/**
- * Contains helper function for textual signatures.
- * 
- * @author wprinz
- */
-public class TextualSignature
-{
-  /**
-  * The logger definition.
-  */
-  private static final Logger logger_ = ConfigLogger.getLogger(TextualSignature.class);
-  
-  /**
-   * Extracts the document text from a given pdf.
-   * 
-   * @param pdf_stream
-   *          The pdf_input stream.
-   * @return Returns the extracted document text.
- * @throws PDFDocumentException 
-   * @throws TextExtractionException
-   *           Forwarded exception.
-   */
-  public static String extractTextTextual(PdfDataSource pdfDataSource, String encoding) throws PDFDocumentException 
-  {
-    PerformanceCounters.textExtractions.increment();
-    
-    try
-    {
-      int first_page_rotation = 0;
-      // logger_.debug("====================================================");
-      // logger_.debug("extractText:");
-
-      // For text extraction, create a temporary object with iText just as the
-      // one
-      // created
-      // when being signed, but of course without adding content.
-
-
-     // byte[] bytes = normalizePDF(pdf_stream);
-    	//iText  
-    	
-        byte [] pdf_data = pdfDataSource.getAsByteArray();
-        PdfReader reader = new PdfReader(pdf_data);
-        PDFASUtils.checkReaderPermissions(reader);
-        //pdf_stream.close();
-
-        // PERF: PDF normalization needs byte array - this is costy
-        ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
-
-        // For some reason the Reader -> ImportPage -> Writer mechanism produces
-        // problems en mass.
-        // The text extractor may not be able to extract proper text from
-        // documents
-        // created with
-        // this method (although it works when a Table is appended)... very
-        // fragile.
-
-        Document document = new Document();
-
-        PdfWriter writer = PdfWriter.getInstance(document, baos);
-        document.open();
-
-        PdfContentByte cb = writer.getDirectContent();
-        for (int page_num = 1; page_num <= reader.getNumberOfPages(); page_num++)
-        {
-          //Rectangle new_size = reader.getPageSize(page_num);
-          //logger_.info("PageSize with no rotaion: Pagenr:"+page_num+" Size: "+new_size);
-          //document.setPageSize(new_size);
-          Rectangle new_size_withrot =reader.getPageSizeWithRotation(page_num);
-          if (page_num == 1)
-          {
-        	//setFirstPageRotation(new_size_withrot.getRotation());
-        	first_page_rotation = new_size_withrot.getRotation();
-        	//logger_.info("iText first_page_rotation="+new_size_withrot.getRotation());
-          }
-          //logger_.info("iText set PageSize of page:"+page_num+" to: "+new_size_withrot); 
-          //document.setPageSize(new_size);
-          document.setPageSize(new_size_withrot);
-          document.newPage();
-
-          PdfImportedPage page = writer.getImportedPage(reader, page_num);
-          // note that this will add an xobject form to the doc.
-          // the xobject form contains the content of the page.
-          cb.addTemplate(page, 0, 0);
-
-          // wprinz: debugging
-          // cb.beginText();
-          // cb.setFontAndSize(BaseFont.createFont(BaseFont.HELVETICA,
-          // BaseFont.CP1252, BaseFont.NOT_EMBEDDED), 14);
-          // cb.showText("page " + page_num);
-          // cb.endText();
-          // wprinz: end debugging
-        }
-
-        document.close();
-
-        // for (int i = 1; i <= reader.getNumberOfPages(); i++)
-        // {
-        // Rectangle rect = reader.getBoxSize(i, "bleed");
-        // logger_.debug("rect[" + i + "] = " + rect);
-        // }
-
-        baos.close();
-        byte[] normalizedPDF = baos.toByteArray();
-
-      ByteArrayInputStream bais = new ByteArrayInputStream(normalizedPDF);
-      //PDFBox-parser
-      PDFParser parser = new PDFParser(bais);
-      File temporary_dir = SettingsReader.getTemporaryDirectory();
-      //logger_.info("temporary_dir="+temporary_dir.getAbsolutePath());
-      parser.setTempDirectory(temporary_dir);
-      parser.parse();
-      
-      PDDocument doc = parser.getPDDocument();
-      //System.out.println("pdfBox.getNumberOfPages()"+doc.getNumberOfPages());
-      
-      PDFTextStripper stripper = new PDFTextStripper();
-      stripper.setSortByPosition(false);
-      stripper.setGetFirstPageRotationFromThis(true);
-      stripper.setFirstPageRotation(first_page_rotation);
-      
-      // stripper.setStartPage(4);
-      // stripper.setEndPage(4);
-      logger_.debug("TextualSignator extractTextTextual: Begin stripping text");
-      String text;
-      try {
-    	  text = stripper.getText(doc, encoding);
-      } catch (Exception e) {
-    	  throw new PDFDocumentException(ErrorCode.TEXT_EXTRACTION_EXCEPTION, "Unable to extract textual content.", e);
-      }
-      logger_.debug("TextualSignator extractTextTextual: Stripping text ended");
-      
-      doc.close();
-      //logger_.debug("TextualSignator extractTextTextual="+text);
-      return text;
-
-    }
-    catch (IllegalArgumentException e)
-    {
-       throw new PDFDocumentException(ErrorCode.DOCUMENT_CANNOT_BE_READ, e);
-    }
-    catch (IOException e)
-    {
-      throw new PDFDocumentException(ErrorCode.DOCUMENT_CANNOT_BE_READ, e);
-    }
-    catch (DocumentException e)
-    {
-       throw new PDFDocumentException(ErrorCode.DOCUMENT_CANNOT_BE_READ, e);
-    }
-  }
-  
-  /**
-   * Normalizes a given binary PDF to a version PDFbox can handle correctly.
-   * 
-   * <p>
-   * PDFbox has serious problems with documents that use incremental updates or
-   * XObject forms. Therefor use this to remove incremental updates and create a
-   * streamlined document.
-   * </p>
-   * 
-   * <p>
-   * Note that this has nothing to do with text normalization. It just unifies
-   * the PDF documents that are fed into PDFbox for text extraction and page
-   * length determination.
-   * </p>
-   * 
-   * @param input_pdf
-   *          The input pdf to be normalized.
-   * @return Returns the normalized pdf.
-   * @throws IOException
-   * @throws DocumentException
- * @throws PDFDocumentException 
-   */
-  public static byte[] normalizePDF(PdfDataSource pdfDataSource) throws IOException, DocumentException, PDFDocumentException
-  {
-	  //iText
-    byte [] pdf_data = pdfDataSource.getAsByteArray();
-    PdfReader reader = new PdfReader(pdf_data);
-    PDFASUtils.checkReaderPermissions(reader);
-    //input_pdf.close();
-    
-    // PERF: PDF Normalization needs byte array
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    // For some reason the Reader -> ImportPage -> Writer mechanism produces
-    // problems en mass.
-    // The text extractor may not be able to extract proper text from
-    // documents
-    // created with
-    // this method (although it works when a Table is appended)... very
-    // fragile.
-
-    Document document = new Document();
-
-    PdfWriter writer = PdfWriter.getInstance(document, baos);
-    document.open();
-
-    PdfContentByte cb = writer.getDirectContent();
-    for (int page_num = 1; page_num <= reader.getNumberOfPages(); page_num++)
-    {
-      Rectangle new_size_withrot =reader.getPageSizeWithRotation(page_num);
-      document.setPageSize(new_size_withrot);
-      document.newPage();
-      PdfImportedPage page = writer.getImportedPage(reader, page_num);
-      // note that this will add an xobject form to the doc.
-      // the xobject form contains the content of the page.
-      cb.addTemplate(page, 0, 0);
-
-      // wprinz: debugging
-      // cb.beginText();
-      // cb.setFontAndSize(BaseFont.createFont(BaseFont.HELVETICA,
-      // BaseFont.CP1252, BaseFont.NOT_EMBEDDED), 14);
-      // cb.showText("page " + page_num);
-      // cb.endText();
-      // wprinz: end debugging
-    }
-
-    document.close();
-
-    // for (int i = 1; i <= reader.getNumberOfPages(); i++)
-    // {
-    // Rectangle rect = reader.getBoxSize(i, "bleed");
-    // logger_.debug("rect[" + i + "] = " + rect);
-    // }
-
-    baos.close();
-    byte[] normalizedPDF = baos.toByteArray();
-    return normalizedPDF;
-  }
-}
author	tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>	2013-01-09 15:41:29 +0000
committer	tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>	2013-01-09 15:41:29 +0000
commit	535a04fa05f739ec16dd81666e3b0f82dfbd442d (patch)
tree	0804f301c1a9ceb303a8441b7b29244fc8eb7ff0 /src/main/java/at/knowcenter/wag/egov/egiz/pdf/TextualSignature.java
parent	1efaf6fd5619dfa95c9d7e8c71eda4c2ffba4998 (diff)
download	pdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.tar.gz pdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.tar.bz2 pdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.zip