aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/at/knowcenter/wag/egov/egiz/tools
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/at/knowcenter/wag/egov/egiz/tools')
-rw-r--r--src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalize.java3
-rw-r--r--src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java26
-rw-r--r--src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalizer.java34
3 files changed, 39 insertions, 24 deletions
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalize.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalize.java
index a2df327..80cb286 100644
--- a/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalize.java
+++ b/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalize.java
@@ -30,9 +30,10 @@ public interface Normalize {
/**
* Normalize a given text.
* @param rawText the raw text to normalize
+ * @param keepMultipleLineBreaks if true multiple line breaks in a row will not be normalized to a single line break
* @return the normalized string
*/
- public String normalize(String rawText);
+ public String normalize(String rawText, boolean keepMultipleLineBreaks);
/**
* Return the current normalizer version string.
* @return the version string
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java
index d3af9b5..c8f10d9 100644
--- a/src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java
+++ b/src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java
@@ -28,7 +28,8 @@ import java.io.Serializable;
* @author wlackner
*/
public class NormalizeV01 implements Normalize, Serializable {
-
+// 04.11.2010 change by exthex - added keepMultipleLineBreaks parameter to normalize method
+// to allow multiple line breaks to not be normalized to a single one
/**
* SVUID.
*/
@@ -72,7 +73,7 @@ public class NormalizeV01 implements Normalize, Serializable {
* <li>code all multiple line breaks as \n\n</li>
* <li>replace all Tabs and form feeds with spaces</li>
* <li>code line breaks as \n</li>
- * <li>reduce all multiple line breaks into one line break, code line break as \r</li>
+ * <li>reduce all multiple line breaks into one line break (only if keepMultipleLineBreaks == false), code line break as \r</li>
* <li>replace all single line breaks with space</li>
* <li>normalize spaces</li>
* <li>remove spaces before and after a line break</li>
@@ -83,9 +84,11 @@ public class NormalizeV01 implements Normalize, Serializable {
* <li>normalize hypens</li>
* </ol>
*
+ * @param rawText the text to normalize
+ * @param keepMultipleLineBreaks if true, multiple newlines in a row will not be normalized to a single line break
* @see at.knowcenter.wag.egov.egiz.tools.Normalize#normalize(java.lang.String)
*/
- public String normalize(String rawText) {
+ public String normalize(String rawText, boolean keepMultipleLineBreaks) {
if (rawText == null || rawText.equals("null") || rawText.length() == 0) {
return "";
}
@@ -106,8 +109,11 @@ public class NormalizeV01 implements Normalize, Serializable {
// code all mac line breaks as \n
normText = normText.replace('\r', '\n');
- // reduce all multiple line breaks into two line breaks, code muliple line break as \r\r
- normText = normText.replaceAll("\n[\\s\n]*\n", "\r\r");
+ if (!keepMultipleLineBreaks)
+ {
+ // reduce all multiple line breaks into two line breaks, code muliple line break as \r\r
+ normText = normText.replaceAll("\n[\\s\n]*\n", "\r\r");
+ }
// replace all single line breaks with one line break
normText = normText.replace('\n', '\r');
@@ -118,8 +124,14 @@ public class NormalizeV01 implements Normalize, Serializable {
// remove spaces before and after a single line break
normText = normText.replaceAll(" ?\r ?", "\r");
- // remove spaces before and after a multiple line breaks
- normText = normText.replaceAll(" ?\r\r ?", "\r");
+ if (keepMultipleLineBreaks)
+ {
+ // remove spaces before and after a multiple line breaks
+ normText = normText.replaceAll(" ?\r\r ?", "\r\r");
+ } else
+ {
+ normText = normText.replaceAll(" ?\r\r ?", "\r");
+ }
// remove leading and trailing space or line break in the string
int start_idx = (normText.charAt(0) == ' ' || normText.charAt(0) == '\r' ? 1 : 0);
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalizer.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalizer.java
index 3dafb31..d5550f8 100644
--- a/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalizer.java
+++ b/src/main/java/at/knowcenter/wag/egov/egiz/tools/Normalizer.java
@@ -42,6 +42,7 @@ import at.knowcenter.wag.egov.egiz.exceptions.SettingsException;
* @see at.knowcenter.wag.egov.egiz.cfg.SettingsReader
*/
public class Normalizer implements Serializable {
+// 04.11.2010 changed by exthex - normalize methods use and propagate the keepMultipleNewlines parameter
/**
* SVUID.
@@ -187,18 +188,18 @@ public class Normalizer implements Serializable {
rawString_ = rawString;
}
- /**
- * Return the normalized string. If the chached value does not exist the normalize method from the
- * current normalizer implementation is called.
- *
- * @return the normalized string
- */
- public String getNormalizedString() {
- if (normString_ == null) {
- normalize();
- }
- return normString_;
- }
+// /**
+// * Return the normalized string. If the chached value does not exist the normalize method from the
+// * current normalizer implementation is called.
+// *
+// * @return the normalized string
+// */
+// public String getNormalizedString() {
+// if (normString_ == null) {
+// normalize();
+// }
+// return normString_;
+// }
/**
* Set a normalizer version. This activity load the new requested normalizer implementation.
@@ -224,11 +225,12 @@ public class Normalizer implements Serializable {
* Wrapper method. Call the normalizer implementation method.
*
* @param rawString the raw string to normalize
+ * @param keepMultipleNewlines
* @return the normalized string
* @see NormalizeV01
*/
- public String normalize(String rawString) {
- return normalize_.normalize(rawString);
+ public String normalize(String rawString, boolean keepMultipleNewlines) {
+ return normalize_.normalize(rawString, keepMultipleNewlines);
}
/**
@@ -237,9 +239,9 @@ public class Normalizer implements Serializable {
* @return the normalized string
* @see NormalizeV01
*/
- public String normalize() {
+ public String normalize(boolean keepMultipleNewlines) {
if (normString_ == null) {
- normString_ = normalize(rawString_);
+ normString_ = normalize(rawString_, keepMultipleNewlines);
}
return normString_;
}