diff options
| -rw-r--r-- | pdf-as-common/src/main/java/at/gv/egiz/pdfas/common/utils/StringUtils.java | 33 | ||||
| -rw-r--r-- | pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java | 4 | 
2 files changed, 35 insertions, 2 deletions
| diff --git a/pdf-as-common/src/main/java/at/gv/egiz/pdfas/common/utils/StringUtils.java b/pdf-as-common/src/main/java/at/gv/egiz/pdfas/common/utils/StringUtils.java index c110b93d..dbea3c70 100644 --- a/pdf-as-common/src/main/java/at/gv/egiz/pdfas/common/utils/StringUtils.java +++ b/pdf-as-common/src/main/java/at/gv/egiz/pdfas/common/utils/StringUtils.java @@ -98,4 +98,37 @@ public class StringUtils {  		String text = new String(replace_bytes, "windows-1252");  		return text;  	} +	 +	public static String whiteSpaceTrim(String string) { +		String str = startStrip(string); +		return endStrip(str); +	} + +	private static String startStrip(final String str) { +		int strLen; +		if (str == null || (strLen = str.length()) == 0) { +			return str; +		} +		int start = 0; +		while (start != strLen && isEmptySpace(str.charAt(start))) { +			start++; +		} +		return str.substring(start); +	} +	 +	private static String endStrip(final String str) { +		int end; +		if (str == null || (end = str.length()) == 0) { +			return str; +		} +		while (end != 0 && isEmptySpace(str.charAt(end - 1))) { +			end++; +		} + +		return str.substring(0, end); +	} +	 +	private static boolean isEmptySpace(char c) { +		return Character.isWhitespace(c) || Character.isSpaceChar(c); +	}  } diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java index 6911c698..84574b41 100644 --- a/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java +++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java @@ -56,7 +56,6 @@ import java.util.Map;  import org.apache.commons.lang3.math.NumberUtils;  import org.apache.pdfbox.cos.COSBase; -import org.apache.pdfbox.cos.COSDictionary;  import org.apache.pdfbox.cos.COSName;  import org.apache.pdfbox.cos.COSStream;  import org.apache.pdfbox.pdmodel.PDPage; @@ -335,7 +334,8 @@ public class PDFPage extends PDFTextStripper {  		}  		// store ypos of the char if it is not empty -		if (!character.equals(" ") && current_y > this.max_character_ypos) { +		if (!at.gv.egiz.pdfas.common.utils.StringUtils.whiteSpaceTrim(character).isEmpty() &&  +				current_y > this.max_character_ypos) {  			this.max_character_ypos = current_y;  		} | 
