From 535a04fa05f739ec16dd81666e3b0f82dfbd442d Mon Sep 17 00:00:00 2001 From: tknall Date: Wed, 9 Jan 2013 15:41:29 +0000 Subject: pdf-as-lib maven project files moved to pdf-as-lib git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/pdf-as/trunk@926 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../pdfas/impl/input/IncrementalUpdateParser.java | 92 ++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/IncrementalUpdateParser.java (limited to 'pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/IncrementalUpdateParser.java') diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/IncrementalUpdateParser.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/IncrementalUpdateParser.java new file mode 100644 index 0000000..c1dcc03 --- /dev/null +++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/IncrementalUpdateParser.java @@ -0,0 +1,92 @@ +/** + * Copyright 2006 by Know-Center, Graz, Austria + * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a + * joint initiative of the Federal Chancellery Austria and Graz University of + * Technology. + * + * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by + * the European Commission - subsequent versions of the EUPL (the "Licence"); + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * http://www.osor.eu/eupl/ + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and + * limitations under the Licence. + * + * This product combines work with different licenses. See the "NOTICE" text + * file for details on the various modules and licenses. + * The "NOTICE" text file is part of the distribution. Any derivative works + * that you distribute must include a readable copy of the "NOTICE" text file. + */ +package at.gv.egiz.pdfas.impl.input; + +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import at.gv.egiz.pdfas.exceptions.ErrorCode; +import at.gv.egiz.pdfas.framework.input.PdfDataSource; +import at.gv.egiz.pdfas.framework.input.PdfDataSourceHolder; +import at.gv.egiz.pdfas.framework.input.correction.Corrector; +import at.gv.egiz.pdfas.framework.input.correction.CorrectorFactory; +import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper; +import at.knowcenter.wag.egov.egiz.cfg.SettingsReader; +import at.knowcenter.wag.egov.egiz.exceptions.PDFDocumentException; +import at.knowcenter.wag.exactparser.ParseDocument; + +/** + * Parses the given PDF document into a list of Incremental Update blocks. + * @author wprinz + */ +public class IncrementalUpdateParser +{ + /** + * The log. + */ + private static final Log log = LogFactory.getLog(IncrementalUpdateParser.class); + + public static List parsePdfIntoIUBlocks (PdfDataSourceHolder pdfDataSource) throws PDFDocumentException + { + log.trace("parsePdfIntoIUBlocks:"); + + List blocks = null; + try + { + byte [] pdf = DataSourceHelper.convertDataSourceToByteArray(pdfDataSource.getDataSource()); + blocks = ParseDocument.parseDocument(pdf); + } + catch (Exception e) { + try { + log.debug("Error while parsing Document.", e); + boolean tryToCorrect = SettingsReader.getInstance().getSetting("correct_document_on_verify_if_necessary", "false").equals("true"); + if (tryToCorrect) { + log.info("Correcting document..."); + Corrector cor = CorrectorFactory.createCorrector(); + PdfDataSource correctedDS = cor.correctDocument(pdfDataSource.getDataSource()); + log.info("Correction finished."); + byte [] pdf = DataSourceHelper.convertDataSourceToByteArray(correctedDS); + blocks = ParseDocument.parseDocument(pdf); + pdfDataSource.setDataSource(correctedDS); + } else { + makeError(e); + } + + } catch (Exception e1) { + makeError(e); + } + } + + log.trace("parsePdfIntoIUBlocks finished."); + return blocks; + } + + private static void makeError(Exception e) throws PDFDocumentException { + log.error("Error while parsing Document into IU blocks.", e); + throw new PDFDocumentException(ErrorCode.DOCUMENT_CANNOT_BE_READ, e); + } + +} -- cgit v1.2.3