From 6031ba41fa8c054ebb1e86b1890c75d4df34c6ac Mon Sep 17 00:00:00 2001 From: pdanner Date: Mon, 29 Nov 2010 14:23:43 +0000 Subject: added correction on verify git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@629 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../pdfas/impl/input/IncrementalUpdateParser.java | 43 +++++++++++++++++----- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/src/main/java/at/gv/egiz/pdfas/impl/input/IncrementalUpdateParser.java b/src/main/java/at/gv/egiz/pdfas/impl/input/IncrementalUpdateParser.java index b4c2bef..096bb0b 100644 --- a/src/main/java/at/gv/egiz/pdfas/impl/input/IncrementalUpdateParser.java +++ b/src/main/java/at/gv/egiz/pdfas/impl/input/IncrementalUpdateParser.java @@ -5,13 +5,16 @@ package at.gv.egiz.pdfas.impl.input; import java.util.List; -import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper; -import at.gv.egiz.pdfas.exceptions.ErrorCode; -import at.gv.egiz.pdfas.framework.input.PdfDataSource; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import at.gv.egiz.pdfas.exceptions.ErrorCode; +import at.gv.egiz.pdfas.framework.input.PdfDataSource; +import at.gv.egiz.pdfas.framework.input.PdfDataSourceHolder; +import at.gv.egiz.pdfas.framework.input.correction.Corrector; +import at.gv.egiz.pdfas.framework.input.correction.CorrectorFactory; +import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper; +import at.knowcenter.wag.egov.egiz.cfg.SettingsReader; import at.knowcenter.wag.egov.egiz.exceptions.PDFDocumentException; import at.knowcenter.wag.exactparser.ParseDocument; @@ -26,24 +29,44 @@ public class IncrementalUpdateParser */ private static final Log log = LogFactory.getLog(IncrementalUpdateParser.class); - public static List parsePdfIntoIUBlocks (PdfDataSource pdfDataSource) throws PDFDocumentException + public static List parsePdfIntoIUBlocks (PdfDataSourceHolder pdfDataSource) throws PDFDocumentException { log.trace("parsePdfIntoIUBlocks:"); List blocks = null; try { - byte [] pdf = DataSourceHelper.convertDataSourceToByteArray(pdfDataSource); + byte [] pdf = DataSourceHelper.convertDataSourceToByteArray(pdfDataSource.getDataSource()); blocks = ParseDocument.parseDocument(pdf); } - catch (Exception e) - { - log.error("Error while parsing Document into IU blocks.", e); - throw new PDFDocumentException(ErrorCode.DOCUMENT_CANNOT_BE_READ, e); + catch (Exception e) { + try { + log.debug("Error while parsing Document.", e); + boolean tryToCorrect = SettingsReader.getInstance().getSetting("correct_document_on_verify_if_necessary", "false").equals("true"); + if (tryToCorrect) { + log.info("Correcting document..."); + Corrector cor = CorrectorFactory.createCorrector(); + PdfDataSource correctedDS = cor.correctDocument(pdfDataSource.getDataSource()); + log.info("Correction finished."); + byte [] pdf = DataSourceHelper.convertDataSourceToByteArray(correctedDS); + blocks = ParseDocument.parseDocument(pdf); + pdfDataSource.setDataSource(correctedDS); + } else { + makeError(e); + } + + } catch (Exception e1) { + makeError(e); + } } log.trace("parsePdfIntoIUBlocks finished."); return blocks; } + private static void makeError(Exception e) throws PDFDocumentException { + log.error("Error while parsing Document into IU blocks.", e); + throw new PDFDocumentException(ErrorCode.DOCUMENT_CANNOT_BE_READ, e); + } + } -- cgit v1.2.3