From 6025b6016517c6d898d8957d1d7e03ba71431912 Mon Sep 17 00:00:00 2001 From: tknall Date: Fri, 1 Dec 2006 12:20:24 +0000 Subject: Initial import of release 2.2. git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@4 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../wag/egov/egiz/tools/CodingHelper.java | 272 +++++++++++++++++++++ .../knowcenter/wag/egov/egiz/tools/FileHelper.java | 88 +++++++ .../knowcenter/wag/egov/egiz/tools/Normalize.java | 48 ++++ .../wag/egov/egiz/tools/NormalizeV01.java | 166 +++++++++++++ .../knowcenter/wag/egov/egiz/tools/Normalizer.java | 270 ++++++++++++++++++++ 5 files changed, 844 insertions(+) create mode 100644 src/main/java/at/knowcenter/wag/egov/egiz/tools/CodingHelper.java create mode 100644 src/main/java/at/knowcenter/wag/egov/egiz/tools/FileHelper.java create mode 100644 src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalize.java create mode 100644 src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java create mode 100644 src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalizer.java (limited to 'src/main/java/at/knowcenter/wag/egov/egiz/tools') diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/CodingHelper.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/CodingHelper.java new file mode 100644 index 0000000..7908486 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/egov/egiz/tools/CodingHelper.java @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: CodingHelper.java,v 1.6 2006/10/11 07:52:36 wprinz Exp $ + */ +package at.knowcenter.wag.egov.egiz.tools; + +import java.io.UnsupportedEncodingException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +import org.apache.commons.codec.binary.Base64; + +/** + * This class provides encoding and decoding methods and other coding methods. + * All methods are static! + * + * @author wlackner + */ +public class CodingHelper +{ + + /** + * Static Base64 object + */ + private static Base64 b64 = new Base64(); + + /** + * This method encodes a given Unicode (Java) String to UTF-8 bytes and then + * encodes these UTF-8 bytes to a Base64 US-ASCII (Java) String. + * + * @param plain_string + * to be encoded + * @return the UTF-8 and Base64 encoded string + */ + public static String encodeUTF8AsBase64(String plain_string) + { + try + { + byte[] utf8_bytes = plain_string.getBytes("UTF-8"); + byte[] base64_bytes = b64.encode(utf8_bytes); + String encoded_string = new String(base64_bytes, "US-ASCII"); + return encoded_string; + } + catch (UnsupportedEncodingException e) + { + e.printStackTrace(); + throw new RuntimeException(e); + } + } + + /** + * This method decodes the UTF-8 bytes from a Base64 US-ASCII (Java) String + * and decodes the UTF-8 bytes to a unicode (Java) String. + * + * @param encoded_string + * to be decoded + * @return the Base64 and UTF-8 decoded string + */ + public static String decodeUTF8FromBase64(String encoded_string) + { + try + { + byte[] base64_bytes = encoded_string.getBytes("US-ASCII"); + byte[] utf8_bytes = b64.decode(base64_bytes); + String plain_string = new String(utf8_bytes, "UTF-8"); + return plain_string; + } + catch (UnsupportedEncodingException e) + { + e.printStackTrace(); + throw new RuntimeException(e); + } + } + + // /** + // * This method encodes a given string UTF-8 + // * + // * @param theString to be encoded + // * @return the UTF-8 encoded string + // */ + // public static byte[] encodeUTF8(String theString) { + // byte[] utf8 = null; + // try { + // utf8 = theString.getBytes("UTF-8"); + // } catch (UnsupportedEncodingException e) { + // e.printStackTrace(); + // } + // return utf8; + // } + + // /** + // * This method decodes a given UTF-8 string + // * + // * @param theString to be decoded + // * @return the decoded UTF-8 string + // */ + // public static String decodeUTF8(String theString) { + // byte[] ba = theString.getBytes(); + // String the_string = decodeUTF8(ba); + // if (the_string != null) { + // return the_string; + // } + // return theString; + // } + + // /** + // * This method decodes a given UTF-8 byte array + // * + // * @param ba the byte array to be decoded + // * @return the decoded UTF-8 string + // */ + // public static String decodeUTF8(byte[] ba) { + // String the_string = null; + // try { + // the_string = new String(ba, "UTF-8"); + // } catch (UnsupportedEncodingException e) { + // e.printStackTrace(); + // } + // return the_string; + // } + + /** + * This method decodes a given Base64 string. + * + *

+ * Note that the given String must only contain Base64 characters. (The string + * will be converted to a byte array of "US-ASCII" (7 bit) bytes and then this + * byte array will be decoded using the Base64 algorithm. + *

+ * + * @param theString + * to be decoded + * @return a Base64 decoded byte array + */ + public static byte[] decodeBase64(String theString) + { + try + { + byte[] base64_bytes = theString.getBytes("US-ASCII"); + return b64.decode(base64_bytes); + } + catch (UnsupportedEncodingException e) + { + e.printStackTrace(); + throw new RuntimeException("Very Strange: US-ASCII encoding not supported???", e); + } + } + + /** + * This method decodes a given Base64 byte array + * + * @param ba + * the byte array to be decoded + * @return a Base64 decoded byte array + */ + public static byte[] decodeBase64(byte[] ba) + { + return b64.decode(ba); + } + + /** + * This method encodes a given byte array Base64 + * + * @param plainString + * the byte array to be encoded + * @return the Base64 encoded string + */ + public static String encodeBase64(byte[] plainString) + { + try + { + byte[] base64_bytes = b64.encode(plainString); + return new String(base64_bytes, "US-ASCII"); + } + catch (UnsupportedEncodingException e) + { + e.printStackTrace(); + throw new RuntimeException("Very Strange: US-ASCII encoding not supported???", e); + } + } + + /** + * This method builds an SHA-1 hash value of a given byte array. + * + * @param data + * the byte array to build the hash value for + * @return the calculated hash value as a byte array + * @see MessageDigest + */ + public static byte[] buildDigest(byte[] data) + { + MessageDigest sha_1 = null; + try + { + sha_1 = MessageDigest.getInstance("SHA-1"); + sha_1.update(data); + return sha_1.digest(); + } + catch (NoSuchAlgorithmException e) + { + return null; + } + } + + /** + * This method escapes a given string with HTML entities. + * + * @param rawString + * the string to escaped + * @return the HTML escaped string + */ + public static String htmlEscape(String rawString) + { + rawString = rawString.replaceAll("\\&", "&"); + rawString = rawString.replaceAll("\\<", "<"); + rawString = rawString.replaceAll("\\>", ">"); + rawString = rawString.replaceAll("\">", """); + return rawString; + } + + /** + * This method checks, if a byte array contains chars that are not base64 + * conform. + * + * @param byteArray + * the array to test + * @return boolean, if a byte array is base64 conform, false otherwise + */ + public static boolean isB64(byte[] byteArray) + { + try + { + return Base64.isArrayByteBase64(byteArray); + } + catch (ArrayIndexOutOfBoundsException e) + { + return false; + } + } + + /** + * This method checks, if a string contains chars that are not base64 conform. + * + * @param string + * the chars to test + * @return boolean, if the given string is base64 conform, false otherwise + */ + public static boolean isB64(String string) + { + try + { + return Base64.isArrayByteBase64(string.getBytes()); + } + catch (ArrayIndexOutOfBoundsException e) + { + return false; + } + } +} \ No newline at end of file diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/FileHelper.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/FileHelper.java new file mode 100644 index 0000000..0c1a420 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/egov/egiz/tools/FileHelper.java @@ -0,0 +1,88 @@ +/* + * + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE + * SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, + * OR NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES + * SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING + * THIS SOFTWARE OR ITS DERIVATIVES. + * + * $Id: FileHelper.java,v 1.2 2006/05/15 12:05:21 wlackner Exp $ + */ +package at.knowcenter.wag.egov.egiz.tools; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.FileReader; +import java.io.FileWriter; + +import org.apache.log4j.Logger; + +import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger; + +/** + * This class provides file reader and writer methods. All methods are static! + * + * @author wlackner + */ +public class FileHelper { + /** + * The logger definition. + */ + private static final Logger logger_ = ConfigLogger.getLogger(FileHelper.class); + + /** + * This method reads a file by reading line by line. + * + * @param fileName the file to be read + * @return the content string of the file + */ + public static String readFromFile(String fileName) { + String file_string = null; + try { + BufferedReader reader = new BufferedReader(new FileReader(fileName)); + String line = null; + file_string = ""; + while ((line = reader.readLine()) != null) { + file_string += line; + } + reader.close(); + } catch (FileNotFoundException e) { + logger_.info("File not found:" + fileName); + } catch (IOException e) { + logger_.info("File can not read:" + fileName); + } + return file_string; + } + + /** + * This method writes a file line by line. + * + * @param fileName the file to be written + * @param fileString the content to be written + * @return true if the file could be written sucessfully, false otherwise + */ + public static boolean writeToFile(String fileName, String fileString) { + BufferedWriter writer; + try { + FileWriter fwriter = new FileWriter(fileName); + writer = new BufferedWriter(fwriter); + writer.write(fileString); + writer.close(); + } catch (IOException e) { + logger_.info("File:" + fileName + " can not be written. Cause:" + e.getMessage()); + return false; + } + return true; + } +} \ No newline at end of file diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalize.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalize.java new file mode 100644 index 0000000..a2df327 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalize.java @@ -0,0 +1,48 @@ +/* + * + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE + * SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, + * OR NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES + * SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING + * THIS SOFTWARE OR ITS DERIVATIVES. + * + * $Id: Normalize.java,v 1.2 2006/05/15 12:05:21 wlackner Exp $ + */ +package at.knowcenter.wag.egov.egiz.tools; + + +/** + * Defines an interface to get access to different normalizer implementations. + * + * @author wlackner + */ +public interface Normalize { + + /** + * Normalize a given text. + * @param rawText the raw text to normalize + * @return the normalized string + */ + public String normalize(String rawText); + /** + * Return the current normalizer version string. + * @return the version string + */ + public String getVersion(); + + /** + * Returns the normalizer line separator string. + * @return the line separator string + */ + public String getNormCR(); + +} diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java new file mode 100644 index 0000000..d3af9b5 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java @@ -0,0 +1,166 @@ +/* + * + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE + * SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, + * OR NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES + * SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING + * THIS SOFTWARE OR ITS DERIVATIVES. + * + * $Id: NormalizeV01.java,v 1.5 2006/10/31 08:20:56 wprinz Exp $ + */ +package at.knowcenter.wag.egov.egiz.tools; + +import java.io.Serializable; + +/** + * This ist the first version implementing a normalizer method. The normalize statements are + * performed by using regular expressions. + * + * @author wlackner + */ +public class NormalizeV01 implements Normalize, Serializable { + + /** + * SVUID. + */ + private static final long serialVersionUID = 2302956630639871601L; + + /** + * The space string + */ + private final static String NORM_SP = " "; //\u0020 + /** + * The line break string --> use only \n because XML-Parser ignores \r\n + */ + private final static String NORM_CR = "\n"; // + /** + * The apostrophe string + */ + private final static String NORM_AP = "'"; //\u0027 + /** + * The quotation mark string + */ + private final static String NORM_QU = "\""; //\u0022 + /** + * The hypens string + */ + private final static String NORM_HY = "-"; //\u002D + /** + * The current version string + */ + protected static final String VERSION = "V01"; + + /** + * The empty constructor. + */ + public NormalizeV01() { + } + + /** + * The normalizer implementation.
+ * Normalizer algorithums: + *
    + *
  1. code all multiple line breaks as \n\n
  2. + *
  3. replace all Tabs and form feeds with spaces
  4. + *
  5. code line breaks as \n
  6. + *
  7. reduce all multiple line breaks into one line break, code line break as \r
  8. + *
  9. replace all single line breaks with space
  10. + *
  11. normalize spaces
  12. + *
  13. remove spaces before and after a line break
  14. + *
  15. remove leading and trailing space or line break in the string
  16. + *
  17. normalize line breaks
  18. + *
  19. normalize apostrophes
  20. + *
  21. normalize quotations
  22. + *
  23. normalize hypens
  24. + *
+ * + * @see at.knowcenter.wag.egov.egiz.tools.Normalize#normalize(java.lang.String) + */ + public String normalize(String rawText) { + if (rawText == null || rawText.equals("null") || rawText.length() == 0) { + return ""; + } + String normText = rawText; + + // replace all null values + normText = normText.replaceAll("\u0000+", ""); + + // replace all Tabs and form feeds with spaces + normText = normText.replaceAll("[\t\f]", NORM_SP); + + // replace all non breaking spaces with normal spaces + normText = normText.replaceAll("\u00a0+", NORM_SP); + + // code all windows line breaks as \n + normText = normText.replaceAll("\r\n", "\n"); + + // code all mac line breaks as \n + normText = normText.replace('\r', '\n'); + + // reduce all multiple line breaks into two line breaks, code muliple line break as \r\r + normText = normText.replaceAll("\n[\\s\n]*\n", "\r\r"); + + // replace all single line breaks with one line break + normText = normText.replace('\n', '\r'); + + // normalize spaces + normText = normText.replaceAll(" +", NORM_SP); + + // remove spaces before and after a single line break + normText = normText.replaceAll(" ?\r ?", "\r"); + + // remove spaces before and after a multiple line breaks + normText = normText.replaceAll(" ?\r\r ?", "\r"); + + // remove leading and trailing space or line break in the string + int start_idx = (normText.charAt(0) == ' ' || normText.charAt(0) == '\r' ? 1 : 0); + int end_idx = (normText.charAt(normText.length() - 1) == ' ' || normText.charAt(normText.length() - 1) == '\r' ? normText.length() - 1 : normText.length()); + if (end_idx < start_idx) { + end_idx = start_idx; + } + + // System.err.println("Start idx:" + start_idx + " End idx:" + end_idx + " Text length:" + + // normText_.length()); + normText = normText.substring(start_idx, end_idx); + + // normalize line breaks + normText = normText.replaceAll("\r", NORM_CR); + + // normalize apostrophes + normText = normText.replaceAll("[\u0060\u00B4\u2018\u2019\u201A\u201B]", NORM_AP); + + // normalize quotations + normText = normText.replaceAll("[\u201C\u201D\u201E\u201F]", NORM_QU); + + // normalize hypens + normText = normText.replaceAll("[\u00AD\u2013\u2014]", NORM_HY); + + return normText; + } + + /** + * Return the version string. + * + * @see at.knowcenter.wag.egov.egiz.tools.Normalize#getVersion() + */ + public String getVersion() { + return VERSION; + } + + /** + * Returns the normalizer line separator string. + * @return the line separator string + */ + public String getNormCR() { + return NORM_CR; + } +} \ No newline at end of file diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalizer.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalizer.java new file mode 100644 index 0000000..ab17e4a --- /dev/null +++ b/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalizer.java @@ -0,0 +1,270 @@ +/* + * + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE + * SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, + * OR NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES + * SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING + * THIS SOFTWARE OR ITS DERIVATIVES. + * + * $Id: Normalizer.java,v 1.5 2006/10/31 08:20:56 wprinz Exp $ + */ +package at.knowcenter.wag.egov.egiz.tools; + +import java.io.Serializable; + +import org.apache.log4j.Level; +import org.apache.log4j.Logger; + +import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger; +import at.knowcenter.wag.egov.egiz.cfg.SettingsReader; +import at.knowcenter.wag.egov.egiz.exceptions.NormalizeException; +import at.knowcenter.wag.egov.egiz.exceptions.SettingsException; + +/** + * This class provides wrapper methods to get an access to different normalizer implementations. + *
+ * This class is to load the corresponding implementation of a normalizer class. Therefor it seams + * to be a factory. The factory settings are read from the configuration file calling the + * SettingsReader. + * + * @author wlackner + * @see at.knowcenter.wag.egov.egiz.tools.Normalizer + * @see at.knowcenter.wag.egov.egiz.tools.NormalizeV01 + * @see at.knowcenter.wag.egov.egiz.cfg.SettingsReader + */ +public class Normalizer implements Serializable { + + /** + * SVUID. + */ + private static final long serialVersionUID = 4201772508393848555L; + + /** + * The current raw string to normalize + */ + private String rawString_ = null; + /** + * The current normalisation version string + */ + private String normVersion_ = null; + /** + * The normalized string cache + */ + private String normString_ = null; + /** + * The reference to the normalizer implementation + */ + private Normalize normalize_ = null; +// /** +// * A given Encoding, not used now +// */ +// private String encoding_ = null; + /** + * The SettingsReader instance + */ + private SettingsReader settings_ = null; + /** + * The factory class prefix + */ + private final static String CLASS_PREFIX = ".Normalize"; + /** + * The default version string + */ + protected final static String DEFAULT_VERSION = "V01"; + /** + * The settings key defined in the settings file + * + * @see SettingsReader + */ + protected final static String SETTINGS_VERSION_KEY = "normalizer.version"; + /** + * The logger definition. + */ + private static final Logger logger_ = ConfigLogger.getLogger(Normalizer.class); + + /** + * New Normalizer init by the raw string and a normalizer version. + * + * @param rawString the raw string to normalize + * @param normVersion the nomalizer version that should be used + * @throws NormalizeException ErrorCode:400 + */ + public Normalizer(String rawString, String normVersion) throws NormalizeException { + rawString_ = rawString; + normVersion_ = normVersion; + init(); + } + + /** + * New Normalizer init by the raw string. + * + * @param rawString the raw string to normalize + * @throws NormalizeException ErrorCode:400 + */ + public Normalizer(String rawString) throws NormalizeException { + rawString_ = rawString; + init(); + } + + /** + * The empty constructor. + * + * @throws NormalizeException ErrorCode:400 + */ + public Normalizer() throws NormalizeException { + init(); + } + + /** + * Load the factory implementation. This method trys to load the configured normalizer library. + * + * @throws NormalizeException + */ + public void init() throws NormalizeException { + loadSettings(); + String class_name = this.getClass().getPackage().getName() + getClassName(); + Class normalize_class = null; + try { + normalize_class = Class.forName(class_name); + } catch (ClassNotFoundException e) { + if (logger_.isEnabledFor(Level.FATAL)) { + logger_.fatal("Class not found:" + class_name); + } + NormalizeException ne = new NormalizeException(400, "Can not load normalizer library"); + ne.setErrorCode(400); + throw ne; + } + try { + normalize_ = (Normalize) normalize_class.newInstance(); + } catch (InstantiationException e) { + if (logger_.isEnabledFor(Level.FATAL)) { + logger_.fatal("Can not instantiate:" + class_name); + } + NormalizeException ne = new NormalizeException(400, "Can not load normalizer library"); + ne.setErrorCode(400); + throw ne; + } catch (IllegalAccessException e) { + if (logger_.isEnabledFor(Level.FATAL)) { + logger_.fatal("Can not access:" + class_name); + } + NormalizeException ne = new NormalizeException(400, "Can not load normalizer library"); + ne.setErrorCode(400); + throw ne; + } + } + + /** + * Read the class postfix from the configuration file + * + * @return the full qualified class name + */ + private String getClassName() { + if (normVersion_ == null) { + normVersion_ = settings_.getSetting(SETTINGS_VERSION_KEY, DEFAULT_VERSION); + } + return CLASS_PREFIX + normVersion_; + } + + /* + * public void setEncoding(String encoding) { encoding_ = encoding; } + */ + + /** + * Set the raw string to normalize + */ + public void setRawString(String rawString) { + rawString_ = rawString; + } + + /** + * Return the normalized string. If the chached value does not exist the normalize method from the + * current normalizer implementation is called. + * + * @return the normalized string + */ + public String getNormalizedString() { + if (normString_ == null) { + normalize(); + } + return normString_; + } + + /** + * Set a normalizer version. This activity load the new requested normalizer implementation. + * + * @param normVersion the normalizer version to be use + * @throws NormalizeException ErrorCode:400 + */ + public void setVersion(String normVersion) throws NormalizeException { + normVersion_ = normVersion; + init(); + } + + /** + * Return the current version string. + * + * @return the normaliser version string + */ + public String getVersion() { + return normVersion_; + } + + /** + * Wrapper method. Call the normalizer implementation method. + * + * @param rawString the raw string to normalize + * @return the normalized string + * @see NormalizeV01 + */ + public String normalize(String rawString) { + return normalize_.normalize(rawString); + } + + /** + * Wrapper method. Call the normalizer implementation method. Normalize the current raw string. + * + * @return the normalized string + * @see NormalizeV01 + */ + public String normalize() { + if (normString_ == null) { + normString_ = normalize(rawString_); + } + return normString_; + } + + /** + * Returns the normalizer line separator string. + * @return the line separator string + */ + public String getNormCR() { + return normalize_.getNormCR(); + } + + /** + * load the class settings + * + * @throws NormalizeException + * @see SettingsReader + */ + private void loadSettings() throws NormalizeException { + if (settings_ == null) { + try { + settings_ = SettingsReader.getInstance(); + } catch (SettingsException e) { + String log_message = "Can not load normalizer settings. Cause:\n" + e.getMessage(); + logger_.error(log_message); + throw new NormalizeException(400, log_message, e); + } + } + } +} \ No newline at end of file -- cgit v1.2.3