aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java
diff options
context:
space:
mode:
authorpdanner <pdanner@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2010-11-04 16:35:10 +0000
committerpdanner <pdanner@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2010-11-04 16:35:10 +0000
commit89a45b322b16d3c3a949b552f4fb4f07f2914817 (patch)
tree4cfbc1a02fae77cb9180d51061a31f24236bfe0e /src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java
parentab57bf058107ad9ea251dfb8e3fc576834ea9f68 (diff)
downloadpdf-as-3-89a45b322b16d3c3a949b552f4fb4f07f2914817.tar.gz
pdf-as-3-89a45b322b16d3c3a949b552f4fb4f07f2914817.tar.bz2
pdf-as-3-89a45b322b16d3c3a949b552f4fb4f07f2914817.zip
- Added image[valign|halign] config parameters to allow separate alignment of images.
- Normalization of value cell values does not remove multiple newlines any more git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@601 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
Diffstat (limited to 'src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java')
-rw-r--r--src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java26
1 files changed, 19 insertions, 7 deletions
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java b/src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java
index d3af9b5..c8f10d9 100644
--- a/src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java
+++ b/src/main/java/at/knowcenter/wag/egov/egiz/tools/NormalizeV01.java
@@ -28,7 +28,8 @@ import java.io.Serializable;
* @author wlackner
*/
public class NormalizeV01 implements Normalize, Serializable {
-
+// 04.11.2010 change by exthex - added keepMultipleLineBreaks parameter to normalize method
+// to allow multiple line breaks to not be normalized to a single one
/**
* SVUID.
*/
@@ -72,7 +73,7 @@ public class NormalizeV01 implements Normalize, Serializable {
* <li>code all multiple line breaks as \n\n</li>
* <li>replace all Tabs and form feeds with spaces</li>
* <li>code line breaks as \n</li>
- * <li>reduce all multiple line breaks into one line break, code line break as \r</li>
+ * <li>reduce all multiple line breaks into one line break (only if keepMultipleLineBreaks == false), code line break as \r</li>
* <li>replace all single line breaks with space</li>
* <li>normalize spaces</li>
* <li>remove spaces before and after a line break</li>
@@ -83,9 +84,11 @@ public class NormalizeV01 implements Normalize, Serializable {
* <li>normalize hypens</li>
* </ol>
*
+ * @param rawText the text to normalize
+ * @param keepMultipleLineBreaks if true, multiple newlines in a row will not be normalized to a single line break
* @see at.knowcenter.wag.egov.egiz.tools.Normalize#normalize(java.lang.String)
*/
- public String normalize(String rawText) {
+ public String normalize(String rawText, boolean keepMultipleLineBreaks) {
if (rawText == null || rawText.equals("null") || rawText.length() == 0) {
return "";
}
@@ -106,8 +109,11 @@ public class NormalizeV01 implements Normalize, Serializable {
// code all mac line breaks as \n
normText = normText.replace('\r', '\n');
- // reduce all multiple line breaks into two line breaks, code muliple line break as \r\r
- normText = normText.replaceAll("\n[\\s\n]*\n", "\r\r");
+ if (!keepMultipleLineBreaks)
+ {
+ // reduce all multiple line breaks into two line breaks, code muliple line break as \r\r
+ normText = normText.replaceAll("\n[\\s\n]*\n", "\r\r");
+ }
// replace all single line breaks with one line break
normText = normText.replace('\n', '\r');
@@ -118,8 +124,14 @@ public class NormalizeV01 implements Normalize, Serializable {
// remove spaces before and after a single line break
normText = normText.replaceAll(" ?\r ?", "\r");
- // remove spaces before and after a multiple line breaks
- normText = normText.replaceAll(" ?\r\r ?", "\r");
+ if (keepMultipleLineBreaks)
+ {
+ // remove spaces before and after a multiple line breaks
+ normText = normText.replaceAll(" ?\r\r ?", "\r\r");
+ } else
+ {
+ normText = normText.replaceAll(" ?\r\r ?", "\r");
+ }
// remove leading and trailing space or line break in the string
int start_idx = (normText.charAt(0) == ' ' || normText.charAt(0) == '\r' ? 1 : 0);