aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pdf-as-common/src/main/java/at/gv/egiz/pdfas/common/utils/StringUtils.java33
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java4
2 files changed, 35 insertions, 2 deletions
diff --git a/pdf-as-common/src/main/java/at/gv/egiz/pdfas/common/utils/StringUtils.java b/pdf-as-common/src/main/java/at/gv/egiz/pdfas/common/utils/StringUtils.java
index c110b93d..dbea3c70 100644
--- a/pdf-as-common/src/main/java/at/gv/egiz/pdfas/common/utils/StringUtils.java
+++ b/pdf-as-common/src/main/java/at/gv/egiz/pdfas/common/utils/StringUtils.java
@@ -98,4 +98,37 @@ public class StringUtils {
String text = new String(replace_bytes, "windows-1252");
return text;
}
+
+ public static String whiteSpaceTrim(String string) {
+ String str = startStrip(string);
+ return endStrip(str);
+ }
+
+ private static String startStrip(final String str) {
+ int strLen;
+ if (str == null || (strLen = str.length()) == 0) {
+ return str;
+ }
+ int start = 0;
+ while (start != strLen && isEmptySpace(str.charAt(start))) {
+ start++;
+ }
+ return str.substring(start);
+ }
+
+ private static String endStrip(final String str) {
+ int end;
+ if (str == null || (end = str.length()) == 0) {
+ return str;
+ }
+ while (end != 0 && isEmptySpace(str.charAt(end - 1))) {
+ end++;
+ }
+
+ return str.substring(0, end);
+ }
+
+ private static boolean isEmptySpace(char c) {
+ return Character.isWhitespace(c) || Character.isSpaceChar(c);
+ }
}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java
index 6911c698..84574b41 100644
--- a/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java
@@ -56,7 +56,6 @@ import java.util.Map;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.pdfbox.cos.COSBase;
-import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.pdmodel.PDPage;
@@ -335,7 +334,8 @@ public class PDFPage extends PDFTextStripper {
}
// store ypos of the char if it is not empty
- if (!character.equals(" ") && current_y > this.max_character_ypos) {
+ if (!at.gv.egiz.pdfas.common.utils.StringUtils.whiteSpaceTrim(character).isEmpty() &&
+ current_y > this.max_character_ypos) {
this.max_character_ypos = current_y;
}