From 535a04fa05f739ec16dd81666e3b0f82dfbd442d Mon Sep 17 00:00:00 2001 From: tknall Date: Wed, 9 Jan 2013 15:41:29 +0000 Subject: pdf-as-lib maven project files moved to pdf-as-lib git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/pdf-as/trunk@926 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../gv/egiz/pdfas/framework/input/DataSource.java | 70 +++++++++++++++++ .../pdfas/framework/input/ExtractionStage.java | 90 ++++++++++++++++++++++ .../egiz/pdfas/framework/input/PdfDataSource.java | 41 ++++++++++ .../pdfas/framework/input/PdfDataSourceHolder.java | 49 ++++++++++++ .../egiz/pdfas/framework/input/TextDataSource.java | 39 ++++++++++ .../framework/input/correction/Corrector.java | 61 +++++++++++++++ .../input/correction/CorrectorFactory.java | 63 +++++++++++++++ 7 files changed, 413 insertions(+) create mode 100644 pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/DataSource.java create mode 100644 pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/ExtractionStage.java create mode 100644 pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/PdfDataSource.java create mode 100644 pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/PdfDataSourceHolder.java create mode 100644 pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/TextDataSource.java create mode 100644 pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/correction/Corrector.java create mode 100644 pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/correction/CorrectorFactory.java (limited to 'pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input') diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/DataSource.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/DataSource.java new file mode 100644 index 0000000..f834622 --- /dev/null +++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/DataSource.java @@ -0,0 +1,70 @@ +/** + * Copyright 2006 by Know-Center, Graz, Austria + * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a + * joint initiative of the Federal Chancellery Austria and Graz University of + * Technology. + * + * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by + * the European Commission - subsequent versions of the EUPL (the "Licence"); + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * http://www.osor.eu/eupl/ + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and + * limitations under the Licence. + * + * This product combines work with different licenses. See the "NOTICE" text + * file for details on the various modules and licenses. + * The "NOTICE" text file is part of the distribution. Any derivative works + * that you distribute must include a readable copy of the "NOTICE" text file. + */ +package at.gv.egiz.pdfas.framework.input; + +import java.io.InputStream; + +/** + * The input document data source. + * + *

+ * Usually this is a PdfDataSource, but it may be a TextDataSource as well. + *

+ * + * @author wprinz + * + */ +public interface DataSource +{ + /** + * Creates a new InputStream that allows to read out the document's binary + * data from the beginning. + * + * @return Returns the InputStream with the binary data. + */ + public InputStream createInputStream(); + + /** + * Returns the length (number of bytes) of the stream. + * + * @return Returns the length (number of bytes) of the stream. + */ + public int getLength(); + + /** + * Returns the data of this DataSource as a byte array. + * + *

+ * Calling this method indicates that you need a byte array for random read access. + * The DataSource implementation should of course cache this byte array to avoid too much memory usage. + *

+ *

+ * Performance analysis has shown that the libraries internally convert the streams to byte arrays and + * that file system access is very slow. + *

+ * + * @return + */ + public byte [] getAsByteArray(); +} diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/ExtractionStage.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/ExtractionStage.java new file mode 100644 index 0000000..19065b0 --- /dev/null +++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/ExtractionStage.java @@ -0,0 +1,90 @@ +/** + * Copyright 2006 by Know-Center, Graz, Austria + * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a + * joint initiative of the Federal Chancellery Austria and Graz University of + * Technology. + * + * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by + * the European Commission - subsequent versions of the EUPL (the "Licence"); + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * http://www.osor.eu/eupl/ + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and + * limitations under the Licence. + * + * This product combines work with different licenses. See the "NOTICE" text + * file for details on the various modules and licenses. + * The "NOTICE" text file is part of the distribution. Any derivative works + * that you distribute must include a readable copy of the "NOTICE" text file. + */ +package at.gv.egiz.pdfas.framework.input; + +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import at.gv.egiz.pdfas.exceptions.ErrorCode; +import at.gv.egiz.pdfas.exceptions.framework.VerificationFilterException; +import at.gv.egiz.pdfas.framework.DataSourceHolder; +import at.gv.egiz.pdfas.framework.vfilter.VerificationFilter; +import at.gv.egiz.pdfas.framework.vfilter.VerificationFilterParameters; +import at.gv.egiz.pdfas.impl.input.IncrementalUpdateParser; +import at.gv.egiz.pdfas.impl.vfilter.VerificationFilterImpl; +import at.knowcenter.wag.egov.egiz.exceptions.PDFDocumentException; +import at.knowcenter.wag.egov.egiz.exceptions.PresentableException; + +/** + * Extracts all signatures from a given input DataSource. + * + * @author wprinz + */ +public class ExtractionStage +{ + /** + * The log. + */ + private static final Log log = LogFactory.getLog(ExtractionStage.class); + + public List extractSignatureHolders(final DataSourceHolder dataSource, VerificationFilterParameters parameters) throws PresentableException + { + if (dataSource.getDataSource() instanceof PdfDataSource) + { + PdfDataSource pdfDataSource = (PdfDataSource) dataSource.getDataSource(); + + PdfDataSourceHolder pdsh = new PdfDataSourceHolder(pdfDataSource); + List blocks = parsePdfIntoBlocks(pdsh); + dataSource.setDataSource(pdsh.getDataSource()); + parameters.setBeenCorrected(pdsh.hasChanged()); + + VerificationFilter vf = new VerificationFilterImpl(); + List signatures = vf.extractSignatureHolders(pdsh.getDataSource(), blocks, parameters); + + return signatures; + } + + if (dataSource.getDataSource() instanceof TextDataSource) + { + TextDataSource textDataSource = (TextDataSource) dataSource.getDataSource(); + + VerificationFilter vf = new VerificationFilterImpl(); + List signatures = vf.extractSignaturHolders(textDataSource, parameters); + + return signatures; + } + + String msg = "The input DataSource is neither pdf nor text. class.name = " + dataSource.getClass().getName(); + log.error(msg); + throw new VerificationFilterException(ErrorCode.DOCUMENT_CANNOT_BE_READ, msg); + } + + protected List parsePdfIntoBlocks(PdfDataSourceHolder pdfDataSource) throws PDFDocumentException + { + List blocks = IncrementalUpdateParser.parsePdfIntoIUBlocks(pdfDataSource); + return blocks; + } +} diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/PdfDataSource.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/PdfDataSource.java new file mode 100644 index 0000000..ce3d658 --- /dev/null +++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/PdfDataSource.java @@ -0,0 +1,41 @@ +/** + * Copyright 2006 by Know-Center, Graz, Austria + * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a + * joint initiative of the Federal Chancellery Austria and Graz University of + * Technology. + * + * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by + * the European Commission - subsequent versions of the EUPL (the "Licence"); + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * http://www.osor.eu/eupl/ + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and + * limitations under the Licence. + * + * This product combines work with different licenses. See the "NOTICE" text + * file for details on the various modules and licenses. + * The "NOTICE" text file is part of the distribution. Any derivative works + * that you distribute must include a readable copy of the "NOTICE" text file. + */ +package at.gv.egiz.pdfas.framework.input; + + +/** + * Represents the binary data of a PDF document. + * + *

+ * This interface allows Pdf data to be handled in an abstract way so that the + * storage (byta array, disk etc.) of pdf documents can be separated from the + * algorithms. + *

+ * + * @author wprinz + */ +public interface PdfDataSource extends DataSource +{ + // jsut a marker interface +} diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/PdfDataSourceHolder.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/PdfDataSourceHolder.java new file mode 100644 index 0000000..14a4ff4 --- /dev/null +++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/PdfDataSourceHolder.java @@ -0,0 +1,49 @@ +/** + * Copyright 2006 by Know-Center, Graz, Austria + * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a + * joint initiative of the Federal Chancellery Austria and Graz University of + * Technology. + * + * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by + * the European Commission - subsequent versions of the EUPL (the "Licence"); + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * http://www.osor.eu/eupl/ + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and + * limitations under the Licence. + * + * This product combines work with different licenses. See the "NOTICE" text + * file for details on the various modules and licenses. + * The "NOTICE" text file is part of the distribution. Any derivative works + * that you distribute must include a readable copy of the "NOTICE" text file. + */ +package at.gv.egiz.pdfas.framework.input; + +public class PdfDataSourceHolder { + private PdfDataSource dataSource; + private boolean hasChanged = false; + + public PdfDataSourceHolder(PdfDataSource dataSource) { + this.dataSource = dataSource; + } + + public PdfDataSource getDataSource() { + return dataSource; + } + + public void setDataSource(PdfDataSource dataSource) { + this.dataSource = dataSource; + this.hasChanged = true; + } + + public boolean hasChanged() { + return this.hasChanged; + } + + + +} diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/TextDataSource.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/TextDataSource.java new file mode 100644 index 0000000..7623896 --- /dev/null +++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/TextDataSource.java @@ -0,0 +1,39 @@ +/** + * Copyright 2006 by Know-Center, Graz, Austria + * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a + * joint initiative of the Federal Chancellery Austria and Graz University of + * Technology. + * + * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by + * the European Commission - subsequent versions of the EUPL (the "Licence"); + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * http://www.osor.eu/eupl/ + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and + * limitations under the Licence. + * + * This product combines work with different licenses. See the "NOTICE" text + * file for details on the various modules and licenses. + * The "NOTICE" text file is part of the distribution. Any derivative works + * that you distribute must include a readable copy of the "NOTICE" text file. + */ +package at.gv.egiz.pdfas.framework.input; + +/** + * Represents a free-text input text to be processed. + * + * @author wprinz + */ +public interface TextDataSource extends DataSource +{ + /** + * Returns the text to be processed. + * @return Returns the text to be processed. + */ + public String getText(); + +} diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/correction/Corrector.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/correction/Corrector.java new file mode 100644 index 0000000..71f3ded --- /dev/null +++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/correction/Corrector.java @@ -0,0 +1,61 @@ +/** + * Copyright 2006 by Know-Center, Graz, Austria + * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a + * joint initiative of the Federal Chancellery Austria and Graz University of + * Technology. + * + * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by + * the European Commission - subsequent versions of the EUPL (the "Licence"); + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * http://www.osor.eu/eupl/ + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and + * limitations under the Licence. + * + * This product combines work with different licenses. See the "NOTICE" text + * file for details on the various modules and licenses. + * The "NOTICE" text file is part of the distribution. Any derivative works + * that you distribute must include a readable copy of the "NOTICE" text file. + */ +package at.gv.egiz.pdfas.framework.input.correction; + +import at.gv.egiz.pdfas.exceptions.framework.CorrectorException; +import at.gv.egiz.pdfas.framework.input.PdfDataSource; + +/** + * Interface for PDF corretors. + * + *

+ * Often PDF documents generated by various document to PDF converters have an + * invalid structure that upsets PDF-AS. The correction mechanism allows these + * documents to be corrected before being signed. + *

+ *

+ * A PDF corrector takes an incorrect PDF document and transforms it into a + * correct one. + *

+ *

+ * Note that correcting a document destroys all signatures in that document, so + * never correct an already signed document. + *

+ * + * @author wprinz + */ +public interface Corrector +{ + /** + * Corrects the given PDF document to a form that PDF-AS can use. + * + * @param document + * The (incorrect) PDF document. + * @return Returns the corrected PDF document. + * @throws CorrectorException + * Exception thrown if the document couldn't be corrected. + */ + public PdfDataSource correctDocument(PdfDataSource document) throws CorrectorException; + +} diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/correction/CorrectorFactory.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/correction/CorrectorFactory.java new file mode 100644 index 0000000..8c954f7 --- /dev/null +++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/framework/input/correction/CorrectorFactory.java @@ -0,0 +1,63 @@ +/** + * Copyright 2006 by Know-Center, Graz, Austria + * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a + * joint initiative of the Federal Chancellery Austria and Graz University of + * Technology. + * + * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by + * the European Commission - subsequent versions of the EUPL (the "Licence"); + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * http://www.osor.eu/eupl/ + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and + * limitations under the Licence. + * + * This product combines work with different licenses. See the "NOTICE" text + * file for details on the various modules and licenses. + * The "NOTICE" text file is part of the distribution. Any derivative works + * that you distribute must include a readable copy of the "NOTICE" text file. + */ +package at.gv.egiz.pdfas.framework.input.correction; + +import at.gv.egiz.pdfas.impl.input.correction.ExternalCorrector; +import at.gv.egiz.pdfas.impl.input.correction.InternalCorrector; +import at.knowcenter.wag.egov.egiz.cfg.SettingsReader; +import at.knowcenter.wag.egov.egiz.exceptions.SettingsException; + +/** + * Factory for creating Correctors. + * + * @author wprinz + */ +public class CorrectorFactory +{ + public static final String INTERNAL_CORRECTOR = "internal"; + + public static final String EXTERNAL_CORRECTOR = "external"; + + public static final String CORRECTOR_KEY = "corrector"; + + public static Corrector createCorrector(String id) throws SettingsException + { + if (id.equals(INTERNAL_CORRECTOR)) + { + return new InternalCorrector(); + } + if (id.equals(EXTERNAL_CORRECTOR)) + { + return new ExternalCorrector(); + } + throw new SettingsException("The connector id '" + id + "' is not a valid corrector id."); + } + + public static Corrector createCorrector() throws SettingsException + { + String id = SettingsReader.getInstance().getSetting(CORRECTOR_KEY, INTERNAL_CORRECTOR); + return createCorrector(id); + } + +} -- cgit v1.2.3