aboutsummaryrefslogtreecommitdiff
path: root/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/correction/ExternalCorrector.java
diff options
context:
space:
mode:
Diffstat (limited to 'pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/correction/ExternalCorrector.java')
-rw-r--r--pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/correction/ExternalCorrector.java283
1 files changed, 283 insertions, 0 deletions
diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/correction/ExternalCorrector.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/correction/ExternalCorrector.java
new file mode 100644
index 0000000..efd094a
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/correction/ExternalCorrector.java
@@ -0,0 +1,283 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ */
+package at.gv.egiz.pdfas.impl.input.correction;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStreamReader;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import at.gv.egiz.pdfas.exceptions.ErrorCode;
+import at.gv.egiz.pdfas.exceptions.framework.CorrectorException;
+import at.gv.egiz.pdfas.framework.input.PdfDataSource;
+import at.gv.egiz.pdfas.framework.input.correction.Corrector;
+import at.gv.egiz.pdfas.impl.input.FileBased;
+import at.gv.egiz.pdfas.impl.input.FileBasedPdfDataSourceImpl;
+import at.gv.egiz.pdfas.utils.TempDirHelper;
+import at.knowcenter.wag.egov.egiz.cfg.SettingsReader;
+import at.knowcenter.wag.egov.egiz.exceptions.SettingNotFoundException;
+import at.knowcenter.wag.egov.egiz.exceptions.SettingsException;
+
+/**
+ * Corrects the document using an extrenal commandline tool.
+ *
+ * <p>
+ * Process.destroy after a certain timeout does not work if the executable is a
+ * Windows batch file.
+ * </p>
+ *
+ * @author wprinz
+ */
+public class ExternalCorrector implements Corrector
+{
+ public static final String INPUT_DOCUMENT_REPLACE = "##input_document##";
+
+ public static final String OUTPUT_DOCUMENT_REPLACE = "##output_document##";
+
+ public static final String COMMANDLINE_KEY = "external_corrector_commandline";
+
+ public static final String TIMEOUT_KEY = "external_corrector_timeout";
+
+ protected static final int DEFAULT_TIMEOUT = 1000;
+
+ /**
+ * The log.
+ */
+ private static final Log log = LogFactory.getLog(ExternalCorrector.class);
+
+ /**
+ * @see at.gv.egiz.pdfas.framework.input.correction.Corrector#correctDocument(at.gv.egiz.pdfas.framework.input.PdfDataSource)
+ */
+ public PdfDataSource correctDocument(PdfDataSource document) throws CorrectorException
+ {
+
+ try
+ {
+ String outName = null;
+ File in = null;
+ if (document instanceof FileBased)
+ {
+ FileBased fb = (FileBased) document;
+ in = fb.getFile();
+ outName = in.getName() + "_correction_outfile.pdf";
+ }
+ else
+ {
+ in = TempDirHelper.placeInputIntoTempDirFile(document.createInputStream(), "correction_infile.pdf");
+ outName = "correction_outfile.pdf";
+ }
+
+ File out = TempDirHelper.formTempFile(outName);
+
+ String commandline = SettingsReader.getInstance().getSetting(COMMANDLINE_KEY);
+ long timeout = SettingsReader.getInstance().getIntSetting(TIMEOUT_KEY, DEFAULT_TIMEOUT);
+
+ String inF = in.getAbsolutePath();
+ commandline = commandline.replaceFirst(INPUT_DOCUMENT_REPLACE, inF.replaceAll("\\\\", "\\\\\\\\"));
+ String outF = out.getAbsolutePath();
+ commandline = commandline.replaceFirst(OUTPUT_DOCUMENT_REPLACE, outF.replaceAll("\\\\", "\\\\\\\\"));
+
+ log.info(commandline);
+
+ Process p = Runtime.getRuntime().exec(commandline);
+
+ Thread outT = null;
+ Thread errT = null;
+ TimeoutThread tt = null;
+ BufferedReader outReader = null;
+ BufferedReader errReader = null;
+
+ try
+ {
+ outReader = new BufferedReader(new InputStreamReader(p.getInputStream()));
+ errReader = new BufferedReader(new InputStreamReader(p.getErrorStream()));
+
+ outT = new Thread(new ReaderPrinter(outReader, "STDOUT"));
+ errT = new Thread(new ReaderPrinter(errReader, "STDERR"));
+
+ tt = new TimeoutThread(p, timeout, new Thread[] { outT, errT });
+
+ tt.start();
+ outT.start();
+ errT.start();
+
+ log.trace("Joining the STDOUT thread...");
+ outT.join();
+ log.trace("STDOUT thread joined.");
+ log.trace("Joining the STDERR thread...");
+ errT.join();
+ log.trace("STDERR thread joined.");
+
+ log.trace("Waiting for process to end...");
+ p.waitFor();
+ log.trace("process has ended.");
+
+ log.trace("Interrupting timeout thread...");
+ tt.interrupt();
+ log.trace("timeout thread has been interrupted.");
+
+ int exitValue = p.exitValue();
+ log.info("External Corrector exited with: " + exitValue);
+
+ if (tt.isTimedOut())
+ {
+ throw new CorrectorException(ErrorCode.EXTERNAL_CORRECTOR_TIMEOUT_REACHED, "The external corrector process timed out. timeout = " + timeout);
+ }
+
+ PdfDataSource ds = new FileBasedPdfDataSourceImpl(out, (int) out.length());
+ return ds;
+ }
+ finally
+ {
+ if (outT != null)
+ {
+ outT.interrupt();
+ }
+ if (errT != null)
+ {
+ errT.interrupt();
+ }
+ if (tt != null)
+ {
+ tt.interrupt();
+ }
+ if (outReader != null)
+ {
+ outReader.close();
+ }
+ if (errReader != null)
+ {
+ errReader.close();
+ }
+ }
+
+ }
+ catch (IOException e)
+ {
+ throw new CorrectorException(ErrorCode.CORRECTOR_EXCEPTION, e);
+ }
+ catch (InterruptedException e)
+ {
+ throw new CorrectorException(ErrorCode.CORRECTOR_EXCEPTION, e);
+ }
+ catch (SettingNotFoundException e)
+ {
+ throw new CorrectorException(ErrorCode.CORRECTOR_EXCEPTION, e);
+ }
+ catch (SettingsException e)
+ {
+ throw new CorrectorException(ErrorCode.CORRECTOR_EXCEPTION, e);
+ }
+ }
+
+ protected static class ReaderPrinter implements Runnable
+ {
+ protected BufferedReader reader = null;
+
+ protected String streamName = null;
+
+ public ReaderPrinter(BufferedReader reader, String streamName)
+ {
+ this.reader = reader;
+ this.streamName = streamName;
+ }
+
+ public void run()
+ {
+ try
+ {
+ String line = null;
+
+ while ((line = this.reader.readLine()) != null)
+ {
+ if (line != null)
+ {
+ log.info(streamName + ": " + line);
+ }
+ }
+ }
+ catch (IOException e)
+ {
+ log.error(e.getMessage(), e);
+ }
+ }
+ }
+
+ protected static class TimeoutThread extends Thread
+ {
+ protected Process proc = null;
+
+ protected long timeout = -1;
+
+ protected boolean ranIntoTimeout = false;
+
+ protected Thread[] threads;
+
+ protected BufferedReader errReader;
+
+ public TimeoutThread(Process proc, long timeout, Thread[] threadsToInterrupt)
+ {
+ this.proc = proc;
+ this.timeout = timeout;
+ this.threads = threadsToInterrupt;
+ }
+
+ public void run()
+ {
+ try
+ {
+ Thread.sleep(this.timeout);
+ log.info("The timeout was reached. Destroying the process.");
+ proc.destroy();
+ ranIntoTimeout = true;
+ log.trace("destroy has been called.");
+ log.trace("Interrupting threads...");
+ for (int i = 0; i < this.threads.length; i++)
+ {
+ this.threads[i].interrupt();
+ }
+ log.trace("threads have been interrupted.");
+ }
+ catch (InterruptedException e)
+ {
+ log.debug("Timeout thread interrupted. This means that the process finished successfully.");
+ }
+ }
+
+ /**
+ * Tells, if the process ran into the timeout.
+ *
+ * @return Returns true if the timeout was reached. Returns false if the
+ * timeout was not reached.
+ */
+ public boolean isTimedOut()
+ {
+ return this.ranIntoTimeout;
+ }
+ }
+
+}