From 535a04fa05f739ec16dd81666e3b0f82dfbd442d Mon Sep 17 00:00:00 2001 From: tknall Date: Wed, 9 Jan 2013 15:41:29 +0000 Subject: pdf-as-lib maven project files moved to pdf-as-lib git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/pdf-as/trunk@926 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../impl/input/FileBasedTextDataSourceImpl.java | 160 +++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/FileBasedTextDataSourceImpl.java (limited to 'pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/FileBasedTextDataSourceImpl.java') diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/FileBasedTextDataSourceImpl.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/FileBasedTextDataSourceImpl.java new file mode 100644 index 0000000..5a84ce2 --- /dev/null +++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/impl/input/FileBasedTextDataSourceImpl.java @@ -0,0 +1,160 @@ +/** + * Copyright 2006 by Know-Center, Graz, Austria + * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a + * joint initiative of the Federal Chancellery Austria and Graz University of + * Technology. + * + * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by + * the European Commission - subsequent versions of the EUPL (the "Licence"); + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * http://www.osor.eu/eupl/ + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and + * limitations under the Licence. + * + * This product combines work with different licenses. See the "NOTICE" text + * file for details on the various modules and licenses. + * The "NOTICE" text file is part of the distribution. Any derivative works + * that you distribute must include a readable copy of the "NOTICE" text file. + */ +package at.gv.egiz.pdfas.impl.input; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import at.gv.egiz.pdfas.framework.input.TextDataSource; +import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper; + +/** + * @author wprinz + * + */ +public class FileBasedTextDataSourceImpl implements TextDataSource, FileBased +{ + /** + * The log. + */ + private static final Log log = LogFactory.getLog(FileBasedTextDataSourceImpl.class); + + protected File file = null; + + protected String characterEncoding = null; + + public FileBasedTextDataSourceImpl(File file, String characterEncoding) throws IOException + { + if (!file.exists()) + { + throw new FileNotFoundException("The file '" + file + "' does not exist."); + } + if (!file.canRead()) + { + throw new IOException("The file '" + file + "' cannot be read."); + } + + this.file = file; + this.characterEncoding = characterEncoding; + } + + /** + * @see at.gv.egiz.pdfas.impl.input.FileBased#getFile() + */ + public File getFile() + { + return this.file; + } + + /** + * Returns the character encoding. + * + * @return Returns the character encoding. + */ + public String getCharacterEncoding() + { + return this.characterEncoding; + } + + /** + * @see at.gv.egiz.pdfas.framework.input.TextDataSource#getText() + */ + public String getText() + { + try + { + InputStream is = createInputStream(); + byte[] data = new byte[getLength()]; + int read = 0; + int n = 0; + while ((n = is.read(data, read, data.length - read)) > 0) + { + read += n; + } + is.close(); + + String text = new String(data, getCharacterEncoding()); + + data = null; + + return text; + } + catch (IOException e) + { + log.error("Couldn't read text for file " + getFile() + ". Returning null.", e); + + return null; + } + } + + /** + * @see at.gv.egiz.pdfas.framework.input.DataSource#createInputStream() + */ + public InputStream createInputStream() + { + try + { + FileInputStream fis = new FileInputStream(getFile()); + return fis; + } + catch (IOException e) + { + log.error("Couldn't create InputStream for file " + getFile() + ". Returning null.", e); + + return null; + } + } + + /** + * @see at.gv.egiz.pdfas.framework.input.DataSource#getLength() + */ + public int getLength() + { + return (int) getFile().length(); + } + + byte [] cache = null; + + /** + * @see at.gv.egiz.pdfas.framework.input.DataSource#getAsByteArray() + */ + public byte[] getAsByteArray() + { + if (cache != null) + { + return cache; + } + + cache = DataSourceHelper.convertInputStreamToByteArray(createInputStream()); + + return cache; + } + +} -- cgit v1.2.3