diff options
-rw-r--r-- | pdf-as-common/src/main/java/at/gv/egiz/pdfas/common/utils/StringUtils.java | 33 | ||||
-rw-r--r-- | pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java | 4 |
2 files changed, 35 insertions, 2 deletions
diff --git a/pdf-as-common/src/main/java/at/gv/egiz/pdfas/common/utils/StringUtils.java b/pdf-as-common/src/main/java/at/gv/egiz/pdfas/common/utils/StringUtils.java index c110b93d..dbea3c70 100644 --- a/pdf-as-common/src/main/java/at/gv/egiz/pdfas/common/utils/StringUtils.java +++ b/pdf-as-common/src/main/java/at/gv/egiz/pdfas/common/utils/StringUtils.java @@ -98,4 +98,37 @@ public class StringUtils { String text = new String(replace_bytes, "windows-1252"); return text; } + + public static String whiteSpaceTrim(String string) { + String str = startStrip(string); + return endStrip(str); + } + + private static String startStrip(final String str) { + int strLen; + if (str == null || (strLen = str.length()) == 0) { + return str; + } + int start = 0; + while (start != strLen && isEmptySpace(str.charAt(start))) { + start++; + } + return str.substring(start); + } + + private static String endStrip(final String str) { + int end; + if (str == null || (end = str.length()) == 0) { + return str; + } + while (end != 0 && isEmptySpace(str.charAt(end - 1))) { + end++; + } + + return str.substring(0, end); + } + + private static boolean isEmptySpace(char c) { + return Character.isWhitespace(c) || Character.isSpaceChar(c); + } } diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java index 6911c698..84574b41 100644 --- a/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java +++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java @@ -56,7 +56,6 @@ import java.util.Map; import org.apache.commons.lang3.math.NumberUtils; import org.apache.pdfbox.cos.COSBase; -import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.pdmodel.PDPage; @@ -335,7 +334,8 @@ public class PDFPage extends PDFTextStripper { } // store ypos of the char if it is not empty - if (!character.equals(" ") && current_y > this.max_character_ypos) { + if (!at.gv.egiz.pdfas.common.utils.StringUtils.whiteSpaceTrim(character).isEmpty() && + current_y > this.max_character_ypos) { this.max_character_ypos = current_y; } |