aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/at/knowcenter/wag/egov/egiz/tools
diff options
context:
space:
mode:
authortknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2013-01-09 15:41:29 +0000
committertknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2013-01-09 15:41:29 +0000
commit535a04fa05f739ec16dd81666e3b0f82dfbd442d (patch)
tree0804f301c1a9ceb303a8441b7b29244fc8eb7ff0 /src/main/java/at/knowcenter/wag/egov/egiz/tools
parent1efaf6fd5619dfa95c9d7e8c71eda4c2ffba4998 (diff)
downloadpdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.tar.gz
pdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.tar.bz2
pdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.zip
pdf-as-lib maven project files moved to pdf-as-lib
git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/pdf-as/trunk@926 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
Diffstat (limited to 'src/main/java/at/knowcenter/wag/egov/egiz/tools')
-rw-r--r--src/main/java/at/knowcenter/wag/egov/egiz/tools/CodingHelper.java301
-rw-r--r--src/main/java/at/knowcenter/wag/egov/egiz/tools/DebugHelper.java90
-rw-r--r--src/main/java/at/knowcenter/wag/egov/egiz/tools/FileHelper.java125
-rw-r--r--src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalize.java55
-rw-r--r--src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java184
-rw-r--r--src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalizer.java280
6 files changed, 0 insertions, 1035 deletions
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/CodingHelper.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/CodingHelper.java
deleted file mode 100644
index 5132021..0000000
--- a/src/main/java/at/knowcenter/wag/egov/egiz/tools/CodingHelper.java
+++ /dev/null
@@ -1,301 +0,0 @@
-/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: CodingHelper.java,v 1.6 2006/10/11 07:52:36 wprinz Exp $
- */
-package at.knowcenter.wag.egov.egiz.tools;
-
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
-
-import org.apache.commons.codec.binary.Base64;
-
-import at.gv.egiz.pdfas.framework.input.DataSource;
-import at.gv.egiz.pdfas.impl.input.helper.DataSourceHelper;
-
-/**
- * This class provides encoding and decoding methods and other coding methods.
- * All methods are static!
- *
- * @author wlackner
- */
-public class CodingHelper
-{
-
- /**
- * Static Base64 object
- */
- private static Base64 b64 = new Base64();
-
- /**
- * This method encodes a given Unicode (Java) String to UTF-8 bytes and then
- * encodes these UTF-8 bytes to a Base64 US-ASCII (Java) String.
- *
- * @param plain_string
- * to be encoded
- * @return the UTF-8 and Base64 encoded string
- */
- public static String encodeUTF8AsBase64(String plain_string)
- {
- try
- {
- byte[] utf8_bytes = plain_string.getBytes("UTF-8");
- byte[] base64_bytes = b64.encode(utf8_bytes);
- String encoded_string = new String(base64_bytes, "US-ASCII");
- return encoded_string;
- }
- catch (UnsupportedEncodingException e)
- {
- e.printStackTrace();
- throw new RuntimeException(e);
- }
- }
-
- /**
- * This method decodes the UTF-8 bytes from a Base64 US-ASCII (Java) String
- * and decodes the UTF-8 bytes to a unicode (Java) String.
- *
- * @param encoded_string
- * to be decoded
- * @return the Base64 and UTF-8 decoded string
- */
- public static String decodeUTF8FromBase64(String encoded_string)
- {
- try
- {
- byte[] base64_bytes = encoded_string.getBytes("US-ASCII");
- byte[] utf8_bytes = b64.decode(base64_bytes);
- String plain_string = new String(utf8_bytes, "UTF-8");
- return plain_string;
- }
- catch (UnsupportedEncodingException e)
- {
- e.printStackTrace();
- throw new RuntimeException(e);
- }
- }
-
- // /**
- // * This method encodes a given string UTF-8
- // *
- // * @param theString to be encoded
- // * @return the UTF-8 encoded string
- // */
- // public static byte[] encodeUTF8(String theString) {
- // byte[] utf8 = null;
- // try {
- // utf8 = theString.getBytes("UTF-8");
- // } catch (UnsupportedEncodingException e) {
- // e.printStackTrace();
- // }
- // return utf8;
- // }
-
- // /**
- // * This method decodes a given UTF-8 string
- // *
- // * @param theString to be decoded
- // * @return the decoded UTF-8 string
- // */
- // public static String decodeUTF8(String theString) {
- // byte[] ba = theString.getBytes();
- // String the_string = decodeUTF8(ba);
- // if (the_string != null) {
- // return the_string;
- // }
- // return theString;
- // }
-
- // /**
- // * This method decodes a given UTF-8 byte array
- // *
- // * @param ba the byte array to be decoded
- // * @return the decoded UTF-8 string
- // */
- // public static String decodeUTF8(byte[] ba) {
- // String the_string = null;
- // try {
- // the_string = new String(ba, "UTF-8");
- // } catch (UnsupportedEncodingException e) {
- // e.printStackTrace();
- // }
- // return the_string;
- // }
-
- /**
- * This method decodes a given Base64 string.
- *
- * <p>
- * Note that the given String must only contain Base64 characters. (The string
- * will be converted to a byte array of "US-ASCII" (7 bit) bytes and then this
- * byte array will be decoded using the Base64 algorithm.
- * </p>
- *
- * @param theString
- * to be decoded
- * @return a Base64 decoded byte array
- */
- public static byte[] decodeBase64(String theString)
- {
- try
- {
- byte[] base64_bytes = theString.getBytes("US-ASCII");
- return b64.decode(base64_bytes);
- }
- catch (UnsupportedEncodingException e)
- {
- e.printStackTrace();
- throw new RuntimeException("Very Strange: US-ASCII encoding not supported???", e);
- }
- }
-
- /**
- * This method decodes a given Base64 byte array
- *
- * @param ba
- * the byte array to be decoded
- * @return a Base64 decoded byte array
- */
- public static byte[] decodeBase64(byte[] ba)
- {
- return b64.decode(ba);
- }
-
- /**
- * This method encodes a given byte array Base64
- *
- * @param plainString
- * the byte array to be encoded
- * @return the Base64 encoded string
- */
- public static String encodeBase64(byte[] plainString)
- {
- try
- {
- byte[] base64_bytes = b64.encode(plainString);
- return new String(base64_bytes, "US-ASCII");
- }
- catch (UnsupportedEncodingException e)
- {
- e.printStackTrace();
- throw new RuntimeException("Very Strange: US-ASCII encoding not supported???", e);
- }
- }
-
- // dferbas
- /**
- * This method builds an hash value of a given byte array.
- *
- * @param data
- * the byte array to build the hash value for
- * @param hashAlg hash algorithm for {@link MessageDigest} e.g. "SHA-1"
- * @return the calculated hash value as a byte array
- * @see MessageDigest
- */
- public static byte[] buildDigest(byte[] data, String hashAlg)
- {
- MessageDigest digester = null;
- try
- {
- digester = MessageDigest.getInstance(hashAlg);
- digester.update(data);
- return digester.digest();
- }
- catch (NoSuchAlgorithmException e)
- {
- throw new RuntimeException(e);
- }
- }
-
- // dferbas
- /**
- * This method builds an hash value of a given byte array.
- * @param input
- * @param hashAlg hashAlg hash algorithm for {@link MessageDigest} e.g. "SHA-1"
- * @return the calculated hash value as a byte array
- * @throws IOException
- */
- public static byte[] buildDigest(DataSource input, String hashAlg) throws IOException
- {
- // PERF: digesting needs data source.
- byte [] data = DataSourceHelper.convertDataSourceToByteArray(input);
- return buildDigest(data, hashAlg);
- }
-
- /**
- * This method escapes a given string with HTML entities.
- *
- * @param rawString
- * the string to escaped
- * @return the HTML escaped string
- */
- public static String htmlEscape(String rawString)
- {
- rawString = rawString.replaceAll("\\&", "&amp;");
- rawString = rawString.replaceAll("\\<", "&lt;");
- rawString = rawString.replaceAll("\\>", "&gt;");
- rawString = rawString.replaceAll("\">", "&quot;");
- return rawString;
- }
-
- /**
- * This method checks, if a byte array contains chars that are not base64
- * conform.
- *
- * @param byteArray
- * the array to test
- * @return boolean, if a byte array is base64 conform, false otherwise
- */
- public static boolean isB64(byte[] byteArray)
- {
- try
- {
- return Base64.isArrayByteBase64(byteArray);
- }
- catch (ArrayIndexOutOfBoundsException e)
- {
- return false;
- }
- }
-
- /**
- * This method checks, if a string contains chars that are not base64 conform.
- *
- * @param string
- * the chars to test
- * @return boolean, if the given string is base64 conform, false otherwise
- */
- public static boolean isB64(String string)
- {
- try
- {
- return Base64.isArrayByteBase64(string.getBytes());
- }
- catch (ArrayIndexOutOfBoundsException e)
- {
- return false;
- }
- }
-} \ No newline at end of file
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/DebugHelper.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/DebugHelper.java
deleted file mode 100644
index 762cb71..0000000
--- a/src/main/java/at/knowcenter/wag/egov/egiz/tools/DebugHelper.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- */
-package at.knowcenter.wag.egov.egiz.tools;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.OutputStreamWriter;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-import at.knowcenter.wag.egov.egiz.sig.connectors.moa.EnvelopingBase64MOAConnector;
-
-/**
- * Contains useful helper methods for debugging.
- *
- * @author wprinz
- */
-public final class DebugHelper
-{
- /**
- * The log.
- */
- private static Log log = LogFactory.getLog(DebugHelper.class);
-
- /**
- * Tells, if Strings should be debugged to a file.
- *
- * <p>
- * If set to false, the corresponding methods will simply do nothing.
- * </p>
- */
- public static boolean debug_to_file = true;
-
- /**
- * The directory under which the debug files are to be stored.
- */
- public static File debug_dir = new File("C:\\wprinz\\Filer\\egiz2"); //$NON-NLS-1$
-
- /**
- * Writes the given String to a file with the given file name in UTF-8
- * encoding.
- *
- * @param str
- * The String to be written.
- * @param file_name
- * The file the String will be written to.
- */
- public static void debugStringToFile(String str, String file_name)
- {
- if (!debug_to_file)
- {
- return;
- }
-
- try
- {
- File file = new File(debug_dir, file_name);
- FileOutputStream fos = new FileOutputStream(file);
- OutputStreamWriter osw = new OutputStreamWriter(fos, "UTF-8"); //$NON-NLS-1$
- osw.write(str);
- osw.close();
- }
- catch (Exception e)
- {
- log.error(e.getMessage(), e);
- }
- }
-}
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/FileHelper.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/FileHelper.java
deleted file mode 100644
index 17b98d7..0000000
--- a/src/main/java/at/knowcenter/wag/egov/egiz/tools/FileHelper.java
+++ /dev/null
@@ -1,125 +0,0 @@
-/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: FileHelper.java,v 1.2 2006/05/15 12:05:21 wlackner Exp $
- */
-package at.knowcenter.wag.egov.egiz.tools;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-
-import org.apache.log4j.Logger;
-
-import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger;
-
-/**
- * This class provides file reader and writer methods. All methods are static!
- *
- * @author wlackner
- */
-public class FileHelper {
- /**
- * The logger definition.
- */
- private static final Logger logger_ = ConfigLogger.getLogger(FileHelper.class);
-
- /**
- * This method reads a file by reading line by line.
- *
- * @param fileName the file to be read
- * @return the content string of the file
- */
- public static String readFromFile(String fileName) {
- String file_string = null;
- logger_.trace("Looking for file: " + fileName);
- try {
- BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "UTF-8"));
- logger_.trace("Reading file: " + fileName);
- String line = null;
- file_string = "";
- while ((line = reader.readLine()) != null) {
- file_string += line;
- }
- reader.close();
- logger_.debug("File successfully read: " + fileName);
- } catch (FileNotFoundException e) {
- logger_.debug("File not found: " + fileName);
- } catch (IOException e) {
- logger_.debug("Error reading file: " + fileName);
- }
- return file_string;
- }
-
- /**
- * This method reads a file by reading line by line.
- *
- * @param fileName the file to be read
- * @return the content string of the file
- */
- public static String readFromInputStream(InputStream inputStream) {
- String file_string = null;
- if (inputStream == null) {
- return null;
- }
- try {
- BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
- String line = null;
- file_string = "";
- while ((line = reader.readLine()) != null) {
- file_string += line;
- }
- reader.close();
- } catch (IOException e) {
- logger_.debug("Error reading inputstream.");
- }
- return file_string;
- }
-
- /**
- * This method writes a file line by line.
- *
- * @param fileName the file to be written
- * @param fileString the content to be written
- * @return true if the file could be written sucessfully, false otherwise
- */
- public static boolean writeToFile(String fileName, String fileString) {
- BufferedWriter writer;
- try {
- FileWriter fwriter = new FileWriter(fileName);
- writer = new BufferedWriter(fwriter);
- writer.write(fileString);
- writer.close();
- } catch (IOException e) {
- logger_.info("File:" + fileName + " can not be written. Cause:" + e.getMessage());
- return false;
- }
- return true;
- }
-} \ No newline at end of file
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalize.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalize.java
deleted file mode 100644
index 2b0b8c2..0000000
--- a/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalize.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: Normalize.java,v 1.2 2006/05/15 12:05:21 wlackner Exp $
- */
-package at.knowcenter.wag.egov.egiz.tools;
-
-
-/**
- * Defines an interface to get access to different normalizer implementations.
- *
- * @author wlackner
- */
-public interface Normalize {
-
- /**
- * Normalize a given text.
- * @param rawText the raw text to normalize
- * @param keepMultipleLineBreaks if true multiple line breaks in a row will not be normalized to a single line break
- * @return the normalized string
- */
- public String normalize(String rawText, boolean keepMultipleLineBreaks);
- /**
- * Return the current normalizer version string.
- * @return the version string
- */
- public String getVersion();
-
- /**
- * Returns the normalizer line separator string.
- * @return the line separator string
- */
- public String getNormCR();
-
-}
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java
deleted file mode 100644
index 57b8e6f..0000000
--- a/src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java
+++ /dev/null
@@ -1,184 +0,0 @@
-/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: NormalizeV01.java,v 1.5 2006/10/31 08:20:56 wprinz Exp $
- */
-package at.knowcenter.wag.egov.egiz.tools;
-
-import java.io.Serializable;
-
-/**
- * This ist the first version implementing a normalizer method. The normalize statements are
- * performed by using regular expressions.
- *
- * @author wlackner
- */
-public class NormalizeV01 implements Normalize, Serializable {
-// 04.11.2010 change by exthex - added keepMultipleLineBreaks parameter to normalize method
-// to allow multiple line breaks to not be normalized to a single one
- /**
- * SVUID.
- */
- private static final long serialVersionUID = 2302956630639871601L;
-
- /**
- * The space string
- */
- private final static String NORM_SP = " "; //\u0020
- /**
- * The line break string --> use only \n because XML-Parser ignores \r\n
- */
- private final static String NORM_CR = "\n"; //
- /**
- * The apostrophe string
- */
- private final static String NORM_AP = "'"; //\u0027
- /**
- * The quotation mark string
- */
- private final static String NORM_QU = "\""; //\u0022
- /**
- * The hypens string
- */
- private final static String NORM_HY = "-"; //\u002D
- /**
- * The current version string
- */
- protected static final String VERSION = "V01";
-
- /**
- * The empty constructor.
- */
- public NormalizeV01() {
- }
-
- /**
- * The normalizer implementation. <br>
- * Normalizer algorithums:
- * <ol>
- * <li>code all multiple line breaks as \n\n</li>
- * <li>replace all Tabs and form feeds with spaces</li>
- * <li>code line breaks as \n</li>
- * <li>reduce all multiple line breaks into one line break (only if keepMultipleLineBreaks == false), code line break as \r</li>
- * <li>replace all single line breaks with space</li>
- * <li>normalize spaces</li>
- * <li>remove spaces before and after a line break</li>
- * <li>remove leading and trailing space or line break in the string</li>
- * <li>normalize line breaks</li>
- * <li>normalize apostrophes</li>
- * <li>normalize quotations</li>
- * <li>normalize hypens</li>
- * </ol>
- *
- * @param rawText the text to normalize
- * @param keepMultipleLineBreaks if true, multiple newlines in a row will not be normalized to a single line break
- * @see at.knowcenter.wag.egov.egiz.tools.Normalize#normalize(java.lang.String)
- */
- public String normalize(String rawText, boolean keepMultipleLineBreaks) {
- if (rawText == null || rawText.equals("null") || rawText.length() == 0) {
- return "";
- }
- String normText = rawText;
-
- // replace all null values
- normText = normText.replaceAll("\u0000+", "");
-
- // replace all Tabs and form feeds with spaces
- normText = normText.replaceAll("[\t\f]", NORM_SP);
-
- // replace all non breaking spaces with normal spaces
- normText = normText.replaceAll("\u00a0+", NORM_SP);
-
- // code all windows line breaks as \n
- normText = normText.replaceAll("\r\n", "\n");
-
- // code all mac line breaks as \n
- normText = normText.replace('\r', '\n');
-
- if (!keepMultipleLineBreaks)
- {
- // reduce all multiple line breaks into two line breaks, code muliple line break as \r\r
- normText = normText.replaceAll("\n[\\s\n]*\n", "\r\r");
- }
-
- // replace all single line breaks with one line break
- normText = normText.replace('\n', '\r');
-
- // normalize spaces
- normText = normText.replaceAll(" +", NORM_SP);
-
- // remove spaces before and after a single line break
- normText = normText.replaceAll(" ?\r ?", "\r");
-
- if (keepMultipleLineBreaks)
- {
- // remove spaces before and after a multiple line breaks
- normText = normText.replaceAll(" ?\r\r ?", "\r\r");
- } else
- {
- normText = normText.replaceAll(" ?\r\r ?", "\r");
- }
-
- // remove leading and trailing space or line break in the string
- int start_idx = (normText.charAt(0) == ' ' || normText.charAt(0) == '\r' ? 1 : 0);
- int end_idx = (normText.charAt(normText.length() - 1) == ' ' || normText.charAt(normText.length() - 1) == '\r' ? normText.length() - 1 : normText.length());
- if (end_idx < start_idx) {
- end_idx = start_idx;
- }
-
- // System.err.println("Start idx:" + start_idx + " End idx:" + end_idx + " Text length:" +
- // normText_.length());
- normText = normText.substring(start_idx, end_idx);
-
- // normalize line breaks
- normText = normText.replaceAll("\r", NORM_CR);
-
- // normalize apostrophes
- normText = normText.replaceAll("[\u0060\u00B4\u2018\u2019\u201A\u201B]", NORM_AP);
-
- // normalize quotations
- normText = normText.replaceAll("[\u201C\u201D\u201E\u201F]", NORM_QU);
-
- // normalize hypens
- normText = normText.replaceAll("[\u00AD\u2013\u2014]", NORM_HY);
-
- return normText;
- }
-
- /**
- * Return the version string.
- *
- * @see at.knowcenter.wag.egov.egiz.tools.Normalize#getVersion()
- */
- public String getVersion() {
- return VERSION;
- }
-
- /**
- * Returns the normalizer line separator string.
- * @return the line separator string
- */
- public String getNormCR() {
- return NORM_CR;
- }
-} \ No newline at end of file
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalizer.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalizer.java
deleted file mode 100644
index a08c12e..0000000
--- a/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalizer.java
+++ /dev/null
@@ -1,280 +0,0 @@
-/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: Normalizer.java,v 1.5 2006/10/31 08:20:56 wprinz Exp $
- */
-package at.knowcenter.wag.egov.egiz.tools;
-
-import java.io.Serializable;
-
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-
-import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger;
-import at.knowcenter.wag.egov.egiz.cfg.SettingsReader;
-import at.knowcenter.wag.egov.egiz.exceptions.NormalizeException;
-import at.knowcenter.wag.egov.egiz.exceptions.SettingsException;
-
-/**
- * This class provides wrapper methods to get an access to different normalizer implementations.
- * <br>
- * This class is to load the corresponding implementation of a normalizer class. Therefor it seams
- * to be a factory. The factory settings are read from the configuration file calling the
- * SettingsReader.
- *
- * @author wlackner
- * @see at.knowcenter.wag.egov.egiz.tools.Normalizer
- * @see at.knowcenter.wag.egov.egiz.tools.NormalizeV01
- * @see at.knowcenter.wag.egov.egiz.cfg.SettingsReader
- */
-public class Normalizer implements Serializable {
-// 04.11.2010 changed by exthex - normalize methods use and propagate the keepMultipleNewlines parameter
-
- /**
- * SVUID.
- */
- private static final long serialVersionUID = 4201772508393848555L;
-
- /**
- * The current raw string to normalize
- */
- private String rawString_ = null;
- /**
- * The current normalisation version string
- */
- private String normVersion_ = null;
- /**
- * The normalized string cache
- */
- private String normString_ = null;
- /**
- * The reference to the normalizer implementation
- */
- private Normalize normalize_ = null;
-// /**
-// * A given Encoding, not used now
-// */
-// private String encoding_ = null;
- /**
- * The SettingsReader instance
- */
- private SettingsReader settings_ = null;
- /**
- * The factory class prefix
- */
- private final static String CLASS_PREFIX = ".Normalize";
- /**
- * The default version string
- */
- protected final static String DEFAULT_VERSION = "V01";
- /**
- * The settings key defined in the settings file
- *
- * @see SettingsReader
- */
- protected final static String SETTINGS_VERSION_KEY = "normalizer.version";
- /**
- * The logger definition.
- */
- private static final Logger logger_ = ConfigLogger.getLogger(Normalizer.class);
-
- /**
- * New Normalizer init by the raw string and a normalizer version.
- *
- * @param rawString the raw string to normalize
- * @param normVersion the nomalizer version that should be used
- * @throws NormalizeException ErrorCode:400
- */
- public Normalizer(String rawString, String normVersion) throws NormalizeException {
- rawString_ = rawString;
- normVersion_ = normVersion;
- init();
- }
-
- /**
- * New Normalizer init by the raw string.
- *
- * @param rawString the raw string to normalize
- * @throws NormalizeException ErrorCode:400
- */
- public Normalizer(String rawString) throws NormalizeException {
- rawString_ = rawString;
- init();
- }
-
- /**
- * The empty constructor.
- *
- * @throws NormalizeException ErrorCode:400
- */
- public Normalizer() throws NormalizeException {
- init();
- }
-
- /**
- * Load the factory implementation. This method trys to load the configured normalizer library.
- *
- * @throws NormalizeException
- */
- public void init() throws NormalizeException {
- loadSettings();
- String class_name = this.getClass().getPackage().getName() + getClassName();
- Class normalize_class = null;
- try {
- normalize_class = Class.forName(class_name);
- } catch (ClassNotFoundException e) {
- if (logger_.isEnabledFor(Level.FATAL)) {
- logger_.fatal("Class not found:" + class_name);
- }
- throw new NormalizeException("Can not load normalizer library", e);
- }
- try {
- normalize_ = (Normalize) normalize_class.newInstance();
- } catch (InstantiationException e) {
- if (logger_.isEnabledFor(Level.FATAL)) {
- logger_.fatal("Can not instantiate:" + class_name);
- }
- throw new NormalizeException("Can not load normalizer library", e);
- } catch (IllegalAccessException e) {
- if (logger_.isEnabledFor(Level.FATAL)) {
- logger_.fatal("Can not access:" + class_name);
- }
- throw new NormalizeException("Can not load normalizer library", e);
- }
- }
-
- /**
- * Returns the underlying normalizer instance.
- * @author tknall
- */
- public Normalize getInstance() {
- return this.normalize_;
- }
-
- /**
- * Read the class postfix from the configuration file
- *
- * @return the full qualified class name
- */
- private String getClassName() {
- if (normVersion_ == null) {
- normVersion_ = settings_.getSetting(SETTINGS_VERSION_KEY, DEFAULT_VERSION);
- }
- return CLASS_PREFIX + normVersion_;
- }
-
- /*
- * public void setEncoding(String encoding) { encoding_ = encoding; }
- */
-
- /**
- * Set the raw string to normalize
- */
- public void setRawString(String rawString) {
- rawString_ = rawString;
- }
-
-// /**
-// * Return the normalized string. If the chached value does not exist the normalize method from the
-// * current normalizer implementation is called.
-// *
-// * @return the normalized string
-// */
-// public String getNormalizedString() {
-// if (normString_ == null) {
-// normalize();
-// }
-// return normString_;
-// }
-
- /**
- * Set a normalizer version. This activity load the new requested normalizer implementation.
- *
- * @param normVersion the normalizer version to be use
- * @throws NormalizeException ErrorCode:400
- */
- public void setVersion(String normVersion) throws NormalizeException {
- normVersion_ = normVersion;
- init();
- }
-
- /**
- * Return the current version string.
- *
- * @return the normaliser version string
- */
- public String getVersion() {
- return normVersion_;
- }
-
- /**
- * Wrapper method. Call the normalizer implementation method.
- *
- * @param rawString the raw string to normalize
- * @param keepMultipleNewlines
- * @return the normalized string
- * @see NormalizeV01
- */
- public String normalize(String rawString, boolean keepMultipleNewlines) {
- return normalize_.normalize(rawString, keepMultipleNewlines);
- }
-
- /**
- * Wrapper method. Call the normalizer implementation method. Normalize the current raw string.
- *
- * @return the normalized string
- * @see NormalizeV01
- */
- public String normalize(boolean keepMultipleNewlines) {
- if (normString_ == null) {
- normString_ = normalize(rawString_, keepMultipleNewlines);
- }
- return normString_;
- }
-
- /**
- * Returns the normalizer line separator string.
- * @return the line separator string
- */
- public String getNormCR() {
- return normalize_.getNormCR();
- }
-
- /**
- * load the class settings
- *
- * @throws NormalizeException
- * @see SettingsReader
- */
- private void loadSettings() throws NormalizeException {
- if (settings_ == null) {
- try {
- settings_ = SettingsReader.getInstance();
- } catch (SettingsException e) {
- String log_message = "Can not load normalizer settings. Cause:\n" + e.getMessage();
- logger_.error(log_message, e);
- throw new NormalizeException(log_message, e);
- }
- }
- }
-} \ No newline at end of file