diff options
author | tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> | 2006-12-01 12:20:24 +0000 |
---|---|---|
committer | tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> | 2006-12-01 12:20:24 +0000 |
commit | 6025b6016517c6d898d8957d1d7e03ba71431912 (patch) | |
tree | b15bd6fa5ffe9588a9bca3f2b8a7e358f83b6eba /src/main/java/at/knowcenter/wag/egov/egiz/tools | |
parent | d2c77e820ab4aba8235d71275755021347b3ad10 (diff) | |
download | pdf-as-3-6025b6016517c6d898d8957d1d7e03ba71431912.tar.gz pdf-as-3-6025b6016517c6d898d8957d1d7e03ba71431912.tar.bz2 pdf-as-3-6025b6016517c6d898d8957d1d7e03ba71431912.zip |
Initial import of release 2.2.REL-2.2@923
git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@4 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
Diffstat (limited to 'src/main/java/at/knowcenter/wag/egov/egiz/tools')
5 files changed, 844 insertions, 0 deletions
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/CodingHelper.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/CodingHelper.java new file mode 100644 index 0000000..7908486 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/egov/egiz/tools/CodingHelper.java @@ -0,0 +1,272 @@ +/*
+ * <copyright> Copyright (c) 2006 by Know-Center, Graz, Austria </copyright>
+ *
+ * This software is the confidential and proprietary information of Know-Center,
+ * Graz, Austria. You shall not disclose such Confidential Information and shall
+ * use it only in accordance with the terms of the license agreement you entered
+ * into with Know-Center.
+ *
+ * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
+ * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
+ * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY
+ * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS
+ * DERIVATIVES.
+ *
+ * $Id: CodingHelper.java,v 1.6 2006/10/11 07:52:36 wprinz Exp $
+ */
+package at.knowcenter.wag.egov.egiz.tools;
+
+import java.io.UnsupportedEncodingException;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+
+import org.apache.commons.codec.binary.Base64;
+
+/**
+ * This class provides encoding and decoding methods and other coding methods.
+ * All methods are static!
+ *
+ * @author wlackner
+ */
+public class CodingHelper
+{
+
+ /**
+ * Static Base64 object
+ */
+ private static Base64 b64 = new Base64();
+
+ /**
+ * This method encodes a given Unicode (Java) String to UTF-8 bytes and then
+ * encodes these UTF-8 bytes to a Base64 US-ASCII (Java) String.
+ *
+ * @param plain_string
+ * to be encoded
+ * @return the UTF-8 and Base64 encoded string
+ */
+ public static String encodeUTF8AsBase64(String plain_string)
+ {
+ try
+ {
+ byte[] utf8_bytes = plain_string.getBytes("UTF-8");
+ byte[] base64_bytes = b64.encode(utf8_bytes);
+ String encoded_string = new String(base64_bytes, "US-ASCII");
+ return encoded_string;
+ }
+ catch (UnsupportedEncodingException e)
+ {
+ e.printStackTrace();
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * This method decodes the UTF-8 bytes from a Base64 US-ASCII (Java) String
+ * and decodes the UTF-8 bytes to a unicode (Java) String.
+ *
+ * @param encoded_string
+ * to be decoded
+ * @return the Base64 and UTF-8 decoded string
+ */
+ public static String decodeUTF8FromBase64(String encoded_string)
+ {
+ try
+ {
+ byte[] base64_bytes = encoded_string.getBytes("US-ASCII");
+ byte[] utf8_bytes = b64.decode(base64_bytes);
+ String plain_string = new String(utf8_bytes, "UTF-8");
+ return plain_string;
+ }
+ catch (UnsupportedEncodingException e)
+ {
+ e.printStackTrace();
+ throw new RuntimeException(e);
+ }
+ }
+
+ // /**
+ // * This method encodes a given string UTF-8
+ // *
+ // * @param theString to be encoded
+ // * @return the UTF-8 encoded string
+ // */
+ // public static byte[] encodeUTF8(String theString) {
+ // byte[] utf8 = null;
+ // try {
+ // utf8 = theString.getBytes("UTF-8");
+ // } catch (UnsupportedEncodingException e) {
+ // e.printStackTrace();
+ // }
+ // return utf8;
+ // }
+
+ // /**
+ // * This method decodes a given UTF-8 string
+ // *
+ // * @param theString to be decoded
+ // * @return the decoded UTF-8 string
+ // */
+ // public static String decodeUTF8(String theString) {
+ // byte[] ba = theString.getBytes();
+ // String the_string = decodeUTF8(ba);
+ // if (the_string != null) {
+ // return the_string;
+ // }
+ // return theString;
+ // }
+
+ // /**
+ // * This method decodes a given UTF-8 byte array
+ // *
+ // * @param ba the byte array to be decoded
+ // * @return the decoded UTF-8 string
+ // */
+ // public static String decodeUTF8(byte[] ba) {
+ // String the_string = null;
+ // try {
+ // the_string = new String(ba, "UTF-8");
+ // } catch (UnsupportedEncodingException e) {
+ // e.printStackTrace();
+ // }
+ // return the_string;
+ // }
+
+ /**
+ * This method decodes a given Base64 string.
+ *
+ * <p>
+ * Note that the given String must only contain Base64 characters. (The string
+ * will be converted to a byte array of "US-ASCII" (7 bit) bytes and then this
+ * byte array will be decoded using the Base64 algorithm.
+ * </p>
+ *
+ * @param theString
+ * to be decoded
+ * @return a Base64 decoded byte array
+ */
+ public static byte[] decodeBase64(String theString)
+ {
+ try
+ {
+ byte[] base64_bytes = theString.getBytes("US-ASCII");
+ return b64.decode(base64_bytes);
+ }
+ catch (UnsupportedEncodingException e)
+ {
+ e.printStackTrace();
+ throw new RuntimeException("Very Strange: US-ASCII encoding not supported???", e);
+ }
+ }
+
+ /**
+ * This method decodes a given Base64 byte array
+ *
+ * @param ba
+ * the byte array to be decoded
+ * @return a Base64 decoded byte array
+ */
+ public static byte[] decodeBase64(byte[] ba)
+ {
+ return b64.decode(ba);
+ }
+
+ /**
+ * This method encodes a given byte array Base64
+ *
+ * @param plainString
+ * the byte array to be encoded
+ * @return the Base64 encoded string
+ */
+ public static String encodeBase64(byte[] plainString)
+ {
+ try
+ {
+ byte[] base64_bytes = b64.encode(plainString);
+ return new String(base64_bytes, "US-ASCII");
+ }
+ catch (UnsupportedEncodingException e)
+ {
+ e.printStackTrace();
+ throw new RuntimeException("Very Strange: US-ASCII encoding not supported???", e);
+ }
+ }
+
+ /**
+ * This method builds an SHA-1 hash value of a given byte array.
+ *
+ * @param data
+ * the byte array to build the hash value for
+ * @return the calculated hash value as a byte array
+ * @see MessageDigest
+ */
+ public static byte[] buildDigest(byte[] data)
+ {
+ MessageDigest sha_1 = null;
+ try
+ {
+ sha_1 = MessageDigest.getInstance("SHA-1");
+ sha_1.update(data);
+ return sha_1.digest();
+ }
+ catch (NoSuchAlgorithmException e)
+ {
+ return null;
+ }
+ }
+
+ /**
+ * This method escapes a given string with HTML entities.
+ *
+ * @param rawString
+ * the string to escaped
+ * @return the HTML escaped string
+ */
+ public static String htmlEscape(String rawString)
+ {
+ rawString = rawString.replaceAll("\\&", "&");
+ rawString = rawString.replaceAll("\\<", "<");
+ rawString = rawString.replaceAll("\\>", ">");
+ rawString = rawString.replaceAll("\">", """);
+ return rawString;
+ }
+
+ /**
+ * This method checks, if a byte array contains chars that are not base64
+ * conform.
+ *
+ * @param byteArray
+ * the array to test
+ * @return boolean, if a byte array is base64 conform, false otherwise
+ */
+ public static boolean isB64(byte[] byteArray)
+ {
+ try
+ {
+ return Base64.isArrayByteBase64(byteArray);
+ }
+ catch (ArrayIndexOutOfBoundsException e)
+ {
+ return false;
+ }
+ }
+
+ /**
+ * This method checks, if a string contains chars that are not base64 conform.
+ *
+ * @param string
+ * the chars to test
+ * @return boolean, if the given string is base64 conform, false otherwise
+ */
+ public static boolean isB64(String string)
+ {
+ try
+ {
+ return Base64.isArrayByteBase64(string.getBytes());
+ }
+ catch (ArrayIndexOutOfBoundsException e)
+ {
+ return false;
+ }
+ }
+}
\ No newline at end of file diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/FileHelper.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/FileHelper.java new file mode 100644 index 0000000..0c1a420 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/egov/egiz/tools/FileHelper.java @@ -0,0 +1,88 @@ +/*
+ * <copyright>
+ * Copyright (c) 2006 by Know-Center, Graz, Austria
+ * </copyright>
+ *
+ * This software is the confidential and proprietary information of Know-Center,
+ * Graz, Austria. You shall not disclose such Confidential Information and shall
+ * use it only in accordance with the terms of the license agreement you entered
+ * into with Know-Center.
+ *
+ * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE
+ * SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE,
+ * OR NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES
+ * SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING
+ * THIS SOFTWARE OR ITS DERIVATIVES.
+ *
+ * $Id: FileHelper.java,v 1.2 2006/05/15 12:05:21 wlackner Exp $
+ */
+package at.knowcenter.wag.egov.egiz.tools;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.FileReader;
+import java.io.FileWriter;
+
+import org.apache.log4j.Logger;
+
+import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger;
+
+/**
+ * This class provides file reader and writer methods. All methods are static!
+ *
+ * @author wlackner
+ */
+public class FileHelper {
+ /**
+ * The logger definition.
+ */
+ private static final Logger logger_ = ConfigLogger.getLogger(FileHelper.class);
+
+ /**
+ * This method reads a file by reading line by line.
+ *
+ * @param fileName the file to be read
+ * @return the content string of the file
+ */
+ public static String readFromFile(String fileName) {
+ String file_string = null;
+ try {
+ BufferedReader reader = new BufferedReader(new FileReader(fileName));
+ String line = null;
+ file_string = "";
+ while ((line = reader.readLine()) != null) {
+ file_string += line;
+ }
+ reader.close();
+ } catch (FileNotFoundException e) {
+ logger_.info("File not found:" + fileName);
+ } catch (IOException e) {
+ logger_.info("File can not read:" + fileName);
+ }
+ return file_string;
+ }
+
+ /**
+ * This method writes a file line by line.
+ *
+ * @param fileName the file to be written
+ * @param fileString the content to be written
+ * @return true if the file could be written sucessfully, false otherwise
+ */
+ public static boolean writeToFile(String fileName, String fileString) {
+ BufferedWriter writer;
+ try {
+ FileWriter fwriter = new FileWriter(fileName);
+ writer = new BufferedWriter(fwriter);
+ writer.write(fileString);
+ writer.close();
+ } catch (IOException e) {
+ logger_.info("File:" + fileName + " can not be written. Cause:" + e.getMessage());
+ return false;
+ }
+ return true;
+ }
+}
\ No newline at end of file diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalize.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalize.java new file mode 100644 index 0000000..a2df327 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalize.java @@ -0,0 +1,48 @@ +/*
+ * <copyright>
+ * Copyright (c) 2006 by Know-Center, Graz, Austria
+ * </copyright>
+ *
+ * This software is the confidential and proprietary information of Know-Center,
+ * Graz, Austria. You shall not disclose such Confidential Information and shall
+ * use it only in accordance with the terms of the license agreement you entered
+ * into with Know-Center.
+ *
+ * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE
+ * SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE,
+ * OR NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES
+ * SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING
+ * THIS SOFTWARE OR ITS DERIVATIVES.
+ *
+ * $Id: Normalize.java,v 1.2 2006/05/15 12:05:21 wlackner Exp $
+ */
+package at.knowcenter.wag.egov.egiz.tools;
+
+
+/**
+ * Defines an interface to get access to different normalizer implementations.
+ *
+ * @author wlackner
+ */
+public interface Normalize {
+
+ /**
+ * Normalize a given text.
+ * @param rawText the raw text to normalize
+ * @return the normalized string
+ */
+ public String normalize(String rawText);
+ /**
+ * Return the current normalizer version string.
+ * @return the version string
+ */
+ public String getVersion();
+
+ /**
+ * Returns the normalizer line separator string.
+ * @return the line separator string
+ */
+ public String getNormCR();
+
+}
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java new file mode 100644 index 0000000..d3af9b5 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java @@ -0,0 +1,166 @@ +/*
+ * <copyright>
+ * Copyright (c) 2006 by Know-Center, Graz, Austria
+ * </copyright>
+ *
+ * This software is the confidential and proprietary information of Know-Center,
+ * Graz, Austria. You shall not disclose such Confidential Information and shall
+ * use it only in accordance with the terms of the license agreement you entered
+ * into with Know-Center.
+ *
+ * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE
+ * SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE,
+ * OR NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES
+ * SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING
+ * THIS SOFTWARE OR ITS DERIVATIVES.
+ *
+ * $Id: NormalizeV01.java,v 1.5 2006/10/31 08:20:56 wprinz Exp $
+ */
+package at.knowcenter.wag.egov.egiz.tools;
+
+import java.io.Serializable;
+
+/**
+ * This ist the first version implementing a normalizer method. The normalize statements are
+ * performed by using regular expressions.
+ *
+ * @author wlackner
+ */
+public class NormalizeV01 implements Normalize, Serializable {
+
+ /**
+ * SVUID.
+ */
+ private static final long serialVersionUID = 2302956630639871601L;
+
+ /**
+ * The space string
+ */
+ private final static String NORM_SP = " "; //\u0020
+ /**
+ * The line break string --> use only \n because XML-Parser ignores \r\n
+ */
+ private final static String NORM_CR = "\n"; //
+ /**
+ * The apostrophe string
+ */
+ private final static String NORM_AP = "'"; //\u0027
+ /**
+ * The quotation mark string
+ */
+ private final static String NORM_QU = "\""; //\u0022
+ /**
+ * The hypens string
+ */
+ private final static String NORM_HY = "-"; //\u002D
+ /**
+ * The current version string
+ */
+ protected static final String VERSION = "V01";
+
+ /**
+ * The empty constructor.
+ */
+ public NormalizeV01() {
+ }
+
+ /**
+ * The normalizer implementation. <br>
+ * Normalizer algorithums:
+ * <ol>
+ * <li>code all multiple line breaks as \n\n</li>
+ * <li>replace all Tabs and form feeds with spaces</li>
+ * <li>code line breaks as \n</li>
+ * <li>reduce all multiple line breaks into one line break, code line break as \r</li>
+ * <li>replace all single line breaks with space</li>
+ * <li>normalize spaces</li>
+ * <li>remove spaces before and after a line break</li>
+ * <li>remove leading and trailing space or line break in the string</li>
+ * <li>normalize line breaks</li>
+ * <li>normalize apostrophes</li>
+ * <li>normalize quotations</li>
+ * <li>normalize hypens</li>
+ * </ol>
+ *
+ * @see at.knowcenter.wag.egov.egiz.tools.Normalize#normalize(java.lang.String)
+ */
+ public String normalize(String rawText) {
+ if (rawText == null || rawText.equals("null") || rawText.length() == 0) {
+ return "";
+ }
+ String normText = rawText;
+
+ // replace all null values
+ normText = normText.replaceAll("\u0000+", "");
+
+ // replace all Tabs and form feeds with spaces
+ normText = normText.replaceAll("[\t\f]", NORM_SP);
+
+ // replace all non breaking spaces with normal spaces
+ normText = normText.replaceAll("\u00a0+", NORM_SP);
+
+ // code all windows line breaks as \n
+ normText = normText.replaceAll("\r\n", "\n");
+
+ // code all mac line breaks as \n
+ normText = normText.replace('\r', '\n');
+
+ // reduce all multiple line breaks into two line breaks, code muliple line break as \r\r
+ normText = normText.replaceAll("\n[\\s\n]*\n", "\r\r");
+
+ // replace all single line breaks with one line break
+ normText = normText.replace('\n', '\r');
+
+ // normalize spaces
+ normText = normText.replaceAll(" +", NORM_SP);
+
+ // remove spaces before and after a single line break
+ normText = normText.replaceAll(" ?\r ?", "\r");
+
+ // remove spaces before and after a multiple line breaks
+ normText = normText.replaceAll(" ?\r\r ?", "\r");
+
+ // remove leading and trailing space or line break in the string
+ int start_idx = (normText.charAt(0) == ' ' || normText.charAt(0) == '\r' ? 1 : 0);
+ int end_idx = (normText.charAt(normText.length() - 1) == ' ' || normText.charAt(normText.length() - 1) == '\r' ? normText.length() - 1 : normText.length());
+ if (end_idx < start_idx) {
+ end_idx = start_idx;
+ }
+
+ // System.err.println("Start idx:" + start_idx + " End idx:" + end_idx + " Text length:" +
+ // normText_.length());
+ normText = normText.substring(start_idx, end_idx);
+
+ // normalize line breaks
+ normText = normText.replaceAll("\r", NORM_CR);
+
+ // normalize apostrophes
+ normText = normText.replaceAll("[\u0060\u00B4\u2018\u2019\u201A\u201B]", NORM_AP);
+
+ // normalize quotations
+ normText = normText.replaceAll("[\u201C\u201D\u201E\u201F]", NORM_QU);
+
+ // normalize hypens
+ normText = normText.replaceAll("[\u00AD\u2013\u2014]", NORM_HY);
+
+ return normText;
+ }
+
+ /**
+ * Return the version string.
+ *
+ * @see at.knowcenter.wag.egov.egiz.tools.Normalize#getVersion()
+ */
+ public String getVersion() {
+ return VERSION;
+ }
+
+ /**
+ * Returns the normalizer line separator string.
+ * @return the line separator string
+ */
+ public String getNormCR() {
+ return NORM_CR;
+ }
+}
\ No newline at end of file diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalizer.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalizer.java new file mode 100644 index 0000000..ab17e4a --- /dev/null +++ b/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalizer.java @@ -0,0 +1,270 @@ +/*
+ * <copyright>
+ * Copyright (c) 2006 by Know-Center, Graz, Austria
+ * </copyright>
+ *
+ * This software is the confidential and proprietary information of Know-Center,
+ * Graz, Austria. You shall not disclose such Confidential Information and shall
+ * use it only in accordance with the terms of the license agreement you entered
+ * into with Know-Center.
+ *
+ * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE
+ * SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE,
+ * OR NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES
+ * SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING
+ * THIS SOFTWARE OR ITS DERIVATIVES.
+ *
+ * $Id: Normalizer.java,v 1.5 2006/10/31 08:20:56 wprinz Exp $
+ */
+package at.knowcenter.wag.egov.egiz.tools;
+
+import java.io.Serializable;
+
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+
+import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger;
+import at.knowcenter.wag.egov.egiz.cfg.SettingsReader;
+import at.knowcenter.wag.egov.egiz.exceptions.NormalizeException;
+import at.knowcenter.wag.egov.egiz.exceptions.SettingsException;
+
+/**
+ * This class provides wrapper methods to get an access to different normalizer implementations.
+ * <br>
+ * This class is to load the corresponding implementation of a normalizer class. Therefor it seams
+ * to be a factory. The factory settings are read from the configuration file calling the
+ * SettingsReader.
+ *
+ * @author wlackner
+ * @see at.knowcenter.wag.egov.egiz.tools.Normalizer
+ * @see at.knowcenter.wag.egov.egiz.tools.NormalizeV01
+ * @see at.knowcenter.wag.egov.egiz.cfg.SettingsReader
+ */
+public class Normalizer implements Serializable {
+
+ /**
+ * SVUID.
+ */
+ private static final long serialVersionUID = 4201772508393848555L;
+
+ /**
+ * The current raw string to normalize
+ */
+ private String rawString_ = null;
+ /**
+ * The current normalisation version string
+ */
+ private String normVersion_ = null;
+ /**
+ * The normalized string cache
+ */
+ private String normString_ = null;
+ /**
+ * The reference to the normalizer implementation
+ */
+ private Normalize normalize_ = null;
+// /**
+// * A given Encoding, not used now
+// */
+// private String encoding_ = null;
+ /**
+ * The SettingsReader instance
+ */
+ private SettingsReader settings_ = null;
+ /**
+ * The factory class prefix
+ */
+ private final static String CLASS_PREFIX = ".Normalize";
+ /**
+ * The default version string
+ */
+ protected final static String DEFAULT_VERSION = "V01";
+ /**
+ * The settings key defined in the settings file
+ *
+ * @see SettingsReader
+ */
+ protected final static String SETTINGS_VERSION_KEY = "normalizer.version";
+ /**
+ * The logger definition.
+ */
+ private static final Logger logger_ = ConfigLogger.getLogger(Normalizer.class);
+
+ /**
+ * New Normalizer init by the raw string and a normalizer version.
+ *
+ * @param rawString the raw string to normalize
+ * @param normVersion the nomalizer version that should be used
+ * @throws NormalizeException ErrorCode:400
+ */
+ public Normalizer(String rawString, String normVersion) throws NormalizeException {
+ rawString_ = rawString;
+ normVersion_ = normVersion;
+ init();
+ }
+
+ /**
+ * New Normalizer init by the raw string.
+ *
+ * @param rawString the raw string to normalize
+ * @throws NormalizeException ErrorCode:400
+ */
+ public Normalizer(String rawString) throws NormalizeException {
+ rawString_ = rawString;
+ init();
+ }
+
+ /**
+ * The empty constructor.
+ *
+ * @throws NormalizeException ErrorCode:400
+ */
+ public Normalizer() throws NormalizeException {
+ init();
+ }
+
+ /**
+ * Load the factory implementation. This method trys to load the configured normalizer library.
+ *
+ * @throws NormalizeException
+ */
+ public void init() throws NormalizeException {
+ loadSettings();
+ String class_name = this.getClass().getPackage().getName() + getClassName();
+ Class normalize_class = null;
+ try {
+ normalize_class = Class.forName(class_name);
+ } catch (ClassNotFoundException e) {
+ if (logger_.isEnabledFor(Level.FATAL)) {
+ logger_.fatal("Class not found:" + class_name);
+ }
+ NormalizeException ne = new NormalizeException(400, "Can not load normalizer library");
+ ne.setErrorCode(400);
+ throw ne;
+ }
+ try {
+ normalize_ = (Normalize) normalize_class.newInstance();
+ } catch (InstantiationException e) {
+ if (logger_.isEnabledFor(Level.FATAL)) {
+ logger_.fatal("Can not instantiate:" + class_name);
+ }
+ NormalizeException ne = new NormalizeException(400, "Can not load normalizer library");
+ ne.setErrorCode(400);
+ throw ne;
+ } catch (IllegalAccessException e) {
+ if (logger_.isEnabledFor(Level.FATAL)) {
+ logger_.fatal("Can not access:" + class_name);
+ }
+ NormalizeException ne = new NormalizeException(400, "Can not load normalizer library");
+ ne.setErrorCode(400);
+ throw ne;
+ }
+ }
+
+ /**
+ * Read the class postfix from the configuration file
+ *
+ * @return the full qualified class name
+ */
+ private String getClassName() {
+ if (normVersion_ == null) {
+ normVersion_ = settings_.getSetting(SETTINGS_VERSION_KEY, DEFAULT_VERSION);
+ }
+ return CLASS_PREFIX + normVersion_;
+ }
+
+ /*
+ * public void setEncoding(String encoding) { encoding_ = encoding; }
+ */
+
+ /**
+ * Set the raw string to normalize
+ */
+ public void setRawString(String rawString) {
+ rawString_ = rawString;
+ }
+
+ /**
+ * Return the normalized string. If the chached value does not exist the normalize method from the
+ * current normalizer implementation is called.
+ *
+ * @return the normalized string
+ */
+ public String getNormalizedString() {
+ if (normString_ == null) {
+ normalize();
+ }
+ return normString_;
+ }
+
+ /**
+ * Set a normalizer version. This activity load the new requested normalizer implementation.
+ *
+ * @param normVersion the normalizer version to be use
+ * @throws NormalizeException ErrorCode:400
+ */
+ public void setVersion(String normVersion) throws NormalizeException {
+ normVersion_ = normVersion;
+ init();
+ }
+
+ /**
+ * Return the current version string.
+ *
+ * @return the normaliser version string
+ */
+ public String getVersion() {
+ return normVersion_;
+ }
+
+ /**
+ * Wrapper method. Call the normalizer implementation method.
+ *
+ * @param rawString the raw string to normalize
+ * @return the normalized string
+ * @see NormalizeV01
+ */
+ public String normalize(String rawString) {
+ return normalize_.normalize(rawString);
+ }
+
+ /**
+ * Wrapper method. Call the normalizer implementation method. Normalize the current raw string.
+ *
+ * @return the normalized string
+ * @see NormalizeV01
+ */
+ public String normalize() {
+ if (normString_ == null) {
+ normString_ = normalize(rawString_);
+ }
+ return normString_;
+ }
+
+ /**
+ * Returns the normalizer line separator string.
+ * @return the line separator string
+ */
+ public String getNormCR() {
+ return normalize_.getNormCR();
+ }
+
+ /**
+ * load the class settings
+ *
+ * @throws NormalizeException
+ * @see SettingsReader
+ */
+ private void loadSettings() throws NormalizeException {
+ if (settings_ == null) {
+ try {
+ settings_ = SettingsReader.getInstance();
+ } catch (SettingsException e) {
+ String log_message = "Can not load normalizer settings. Cause:\n" + e.getMessage();
+ logger_.error(log_message);
+ throw new NormalizeException(400, log_message, e);
+ }
+ }
+ }
+}
\ No newline at end of file |