From 535a04fa05f739ec16dd81666e3b0f82dfbd442d Mon Sep 17 00:00:00 2001 From: tknall Date: Wed, 9 Jan 2013 15:41:29 +0000 Subject: pdf-as-lib maven project files moved to pdf-as-lib git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/pdf-as/trunk@926 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../impl/input/correction/ExternalCorrector.java | 283 +++++++++++++++++++++ 1 file changed, 283 insertions(+) create mode 100644 pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/correction/ExternalCorrector.java (limited to 'pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/correction/ExternalCorrector.java') diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/correction/ExternalCorrector.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/correction/ExternalCorrector.java new file mode 100644 index 0000000..efd094a --- /dev/null +++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/correction/ExternalCorrector.java @@ -0,0 +1,283 @@ +/** + * Copyright 2006 by Know-Center, Graz, Austria + * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a + * joint initiative of the Federal Chancellery Austria and Graz University of + * Technology. + * + * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by + * the European Commission - subsequent versions of the EUPL (the "Licence"); + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * http://www.osor.eu/eupl/ + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and + * limitations under the Licence. + * + * This product combines work with different licenses. See the "NOTICE" text + * file for details on the various modules and licenses. + * The "NOTICE" text file is part of the distribution. Any derivative works + * that you distribute must include a readable copy of the "NOTICE" text file. + */ +package at.gv.egiz.pdfas.impl.input.correction; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import at.gv.egiz.pdfas.exceptions.ErrorCode; +import at.gv.egiz.pdfas.exceptions.framework.CorrectorException; +import at.gv.egiz.pdfas.framework.input.PdfDataSource; +import at.gv.egiz.pdfas.framework.input.correction.Corrector; +import at.gv.egiz.pdfas.impl.input.FileBased; +import at.gv.egiz.pdfas.impl.input.FileBasedPdfDataSourceImpl; +import at.gv.egiz.pdfas.utils.TempDirHelper; +import at.knowcenter.wag.egov.egiz.cfg.SettingsReader; +import at.knowcenter.wag.egov.egiz.exceptions.SettingNotFoundException; +import at.knowcenter.wag.egov.egiz.exceptions.SettingsException; + +/** + * Corrects the document using an extrenal commandline tool. + * + *

+ * Process.destroy after a certain timeout does not work if the executable is a + * Windows batch file. + *

+ * + * @author wprinz + */ +public class ExternalCorrector implements Corrector +{ + public static final String INPUT_DOCUMENT_REPLACE = "##input_document##"; + + public static final String OUTPUT_DOCUMENT_REPLACE = "##output_document##"; + + public static final String COMMANDLINE_KEY = "external_corrector_commandline"; + + public static final String TIMEOUT_KEY = "external_corrector_timeout"; + + protected static final int DEFAULT_TIMEOUT = 1000; + + /** + * The log. + */ + private static final Log log = LogFactory.getLog(ExternalCorrector.class); + + /** + * @see at.gv.egiz.pdfas.framework.input.correction.Corrector#correctDocument(at.gv.egiz.pdfas.framework.input.PdfDataSource) + */ + public PdfDataSource correctDocument(PdfDataSource document) throws CorrectorException + { + + try + { + String outName = null; + File in = null; + if (document instanceof FileBased) + { + FileBased fb = (FileBased) document; + in = fb.getFile(); + outName = in.getName() + "_correction_outfile.pdf"; + } + else + { + in = TempDirHelper.placeInputIntoTempDirFile(document.createInputStream(), "correction_infile.pdf"); + outName = "correction_outfile.pdf"; + } + + File out = TempDirHelper.formTempFile(outName); + + String commandline = SettingsReader.getInstance().getSetting(COMMANDLINE_KEY); + long timeout = SettingsReader.getInstance().getIntSetting(TIMEOUT_KEY, DEFAULT_TIMEOUT); + + String inF = in.getAbsolutePath(); + commandline = commandline.replaceFirst(INPUT_DOCUMENT_REPLACE, inF.replaceAll("\\\\", "\\\\\\\\")); + String outF = out.getAbsolutePath(); + commandline = commandline.replaceFirst(OUTPUT_DOCUMENT_REPLACE, outF.replaceAll("\\\\", "\\\\\\\\")); + + log.info(commandline); + + Process p = Runtime.getRuntime().exec(commandline); + + Thread outT = null; + Thread errT = null; + TimeoutThread tt = null; + BufferedReader outReader = null; + BufferedReader errReader = null; + + try + { + outReader = new BufferedReader(new InputStreamReader(p.getInputStream())); + errReader = new BufferedReader(new InputStreamReader(p.getErrorStream())); + + outT = new Thread(new ReaderPrinter(outReader, "STDOUT")); + errT = new Thread(new ReaderPrinter(errReader, "STDERR")); + + tt = new TimeoutThread(p, timeout, new Thread[] { outT, errT }); + + tt.start(); + outT.start(); + errT.start(); + + log.trace("Joining the STDOUT thread..."); + outT.join(); + log.trace("STDOUT thread joined."); + log.trace("Joining the STDERR thread..."); + errT.join(); + log.trace("STDERR thread joined."); + + log.trace("Waiting for process to end..."); + p.waitFor(); + log.trace("process has ended."); + + log.trace("Interrupting timeout thread..."); + tt.interrupt(); + log.trace("timeout thread has been interrupted."); + + int exitValue = p.exitValue(); + log.info("External Corrector exited with: " + exitValue); + + if (tt.isTimedOut()) + { + throw new CorrectorException(ErrorCode.EXTERNAL_CORRECTOR_TIMEOUT_REACHED, "The external corrector process timed out. timeout = " + timeout); + } + + PdfDataSource ds = new FileBasedPdfDataSourceImpl(out, (int) out.length()); + return ds; + } + finally + { + if (outT != null) + { + outT.interrupt(); + } + if (errT != null) + { + errT.interrupt(); + } + if (tt != null) + { + tt.interrupt(); + } + if (outReader != null) + { + outReader.close(); + } + if (errReader != null) + { + errReader.close(); + } + } + + } + catch (IOException e) + { + throw new CorrectorException(ErrorCode.CORRECTOR_EXCEPTION, e); + } + catch (InterruptedException e) + { + throw new CorrectorException(ErrorCode.CORRECTOR_EXCEPTION, e); + } + catch (SettingNotFoundException e) + { + throw new CorrectorException(ErrorCode.CORRECTOR_EXCEPTION, e); + } + catch (SettingsException e) + { + throw new CorrectorException(ErrorCode.CORRECTOR_EXCEPTION, e); + } + } + + protected static class ReaderPrinter implements Runnable + { + protected BufferedReader reader = null; + + protected String streamName = null; + + public ReaderPrinter(BufferedReader reader, String streamName) + { + this.reader = reader; + this.streamName = streamName; + } + + public void run() + { + try + { + String line = null; + + while ((line = this.reader.readLine()) != null) + { + if (line != null) + { + log.info(streamName + ": " + line); + } + } + } + catch (IOException e) + { + log.error(e.getMessage(), e); + } + } + } + + protected static class TimeoutThread extends Thread + { + protected Process proc = null; + + protected long timeout = -1; + + protected boolean ranIntoTimeout = false; + + protected Thread[] threads; + + protected BufferedReader errReader; + + public TimeoutThread(Process proc, long timeout, Thread[] threadsToInterrupt) + { + this.proc = proc; + this.timeout = timeout; + this.threads = threadsToInterrupt; + } + + public void run() + { + try + { + Thread.sleep(this.timeout); + log.info("The timeout was reached. Destroying the process."); + proc.destroy(); + ranIntoTimeout = true; + log.trace("destroy has been called."); + log.trace("Interrupting threads..."); + for (int i = 0; i < this.threads.length; i++) + { + this.threads[i].interrupt(); + } + log.trace("threads have been interrupted."); + } + catch (InterruptedException e) + { + log.debug("Timeout thread interrupted. This means that the process finished successfully."); + } + } + + /** + * Tells, if the process ran into the timeout. + * + * @return Returns true if the timeout was reached. Returns false if the + * timeout was not reached. + */ + public boolean isTimedOut() + { + return this.ranIntoTimeout; + } + } + +} -- cgit v1.2.3