aboutsummaryrefslogtreecommitdiff
path: root/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/FileBasedTextDataSourceImpl.java
diff options
context:
space:
mode:
Diffstat (limited to 'pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/FileBasedTextDataSourceImpl.java')
-rw-r--r--pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/FileBasedTextDataSourceImpl.java160
1 files changed, 160 insertions, 0 deletions
diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/FileBasedTextDataSourceImpl.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/FileBasedTextDataSourceImpl.java
new file mode 100644
index 0000000..5a84ce2
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/FileBasedTextDataSourceImpl.java
@@ -0,0 +1,160 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ */
+package at.gv.egiz.pdfas.impl.input;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import at.gv.egiz.pdfas.framework.input.TextDataSource;
+import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper;
+
+/**
+ * @author wprinz
+ *
+ */
+public class FileBasedTextDataSourceImpl implements TextDataSource, FileBased
+{
+ /**
+ * The log.
+ */
+ private static final Log log = LogFactory.getLog(FileBasedTextDataSourceImpl.class);
+
+ protected File file = null;
+
+ protected String characterEncoding = null;
+
+ public FileBasedTextDataSourceImpl(File file, String characterEncoding) throws IOException
+ {
+ if (!file.exists())
+ {
+ throw new FileNotFoundException("The file '" + file + "' does not exist.");
+ }
+ if (!file.canRead())
+ {
+ throw new IOException("The file '" + file + "' cannot be read.");
+ }
+
+ this.file = file;
+ this.characterEncoding = characterEncoding;
+ }
+
+ /**
+ * @see at.gv.egiz.pdfas.impl.input.FileBased#getFile()
+ */
+ public File getFile()
+ {
+ return this.file;
+ }
+
+ /**
+ * Returns the character encoding.
+ *
+ * @return Returns the character encoding.
+ */
+ public String getCharacterEncoding()
+ {
+ return this.characterEncoding;
+ }
+
+ /**
+ * @see at.gv.egiz.pdfas.framework.input.TextDataSource#getText()
+ */
+ public String getText()
+ {
+ try
+ {
+ InputStream is = createInputStream();
+ byte[] data = new byte[getLength()];
+ int read = 0;
+ int n = 0;
+ while ((n = is.read(data, read, data.length - read)) > 0)
+ {
+ read += n;
+ }
+ is.close();
+
+ String text = new String(data, getCharacterEncoding());
+
+ data = null;
+
+ return text;
+ }
+ catch (IOException e)
+ {
+ log.error("Couldn't read text for file " + getFile() + ". Returning null.", e);
+
+ return null;
+ }
+ }
+
+ /**
+ * @see at.gv.egiz.pdfas.framework.input.DataSource#createInputStream()
+ */
+ public InputStream createInputStream()
+ {
+ try
+ {
+ FileInputStream fis = new FileInputStream(getFile());
+ return fis;
+ }
+ catch (IOException e)
+ {
+ log.error("Couldn't create InputStream for file " + getFile() + ". Returning null.", e);
+
+ return null;
+ }
+ }
+
+ /**
+ * @see at.gv.egiz.pdfas.framework.input.DataSource#getLength()
+ */
+ public int getLength()
+ {
+ return (int) getFile().length();
+ }
+
+ byte [] cache = null;
+
+ /**
+ * @see at.gv.egiz.pdfas.framework.input.DataSource#getAsByteArray()
+ */
+ public byte[] getAsByteArray()
+ {
+ if (cache != null)
+ {
+ return cache;
+ }
+
+ cache = DataSourceHelper.convertInputStreamToByteArray(createInputStream());
+
+ return cache;
+ }
+
+}