From 707a1511f9771b97bb630cf46d9b9d2c7d222ed9 Mon Sep 17 00:00:00 2001 From: Andreas Fitzek Date: Fri, 3 Oct 2014 10:16:14 +0200 Subject: Fixed whitespace error in pdf text stream --- .../java/at/gv/egiz/pdfas/utils/PDFASUtils.java | 132 +++++++++++++++------ .../at/knowcenter/wag/egov/egiz/pdf/PDFPage.java | 3 +- 2 files changed, 99 insertions(+), 36 deletions(-) diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/utils/PDFASUtils.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/utils/PDFASUtils.java index a2332dd..89dfef2 100644 --- a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/utils/PDFASUtils.java +++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/utils/PDFASUtils.java @@ -69,57 +69,69 @@ public class PDFASUtils { * @throws PDFDocumentException * Thrown if document has not been opened with full permissions. */ - private static void checkReaderPermissions(PdfReader pdfReader) throws PDFDocumentException { + private static void checkReaderPermissions(PdfReader pdfReader) + throws PDFDocumentException { if (pdfReader.isEncrypted()) { - throw new PDFDocumentException(ErrorCode.DOCUMENT_IS_PROTECTED, "Document is encrypted."); + throw new PDFDocumentException(ErrorCode.DOCUMENT_IS_PROTECTED, + "Document is encrypted."); } if (!pdfReader.isOpenedWithFullPermissions()) { - throw new PDFDocumentException(ErrorCode.DOCUMENT_IS_PROTECTED, "Document is protected."); + throw new PDFDocumentException(ErrorCode.DOCUMENT_IS_PROTECTED, + "Document is protected."); } } /** - * Verifies that the document is not encrypted and/or protected. In case no restrictions have been applied to the - * document a pdf reader is returned. + * Verifies that the document is not encrypted and/or protected. In case no + * restrictions have been applied to the document a pdf reader is returned. * * @param dataSource * The document data source. * @throws PDFDocumentException - * Thrown if the document could not be opened with full permissions. + * Thrown if the document could not be opened with full + * permissions. */ - public static PdfReader createPdfReaderCheckingPermissions(DataSource dataSource) throws PDFDocumentException { - return createPdfReaderCheckingPermissions(dataSource.createInputStream()); + public static PdfReader createPdfReaderCheckingPermissions( + DataSource dataSource) throws PDFDocumentException { + return createPdfReaderCheckingPermissions(dataSource + .createInputStream()); } /** - * Verifies that the document is not encrypted and/or protected. In case no restrictions have been applied to the - * document a pdf reader is returned. + * Verifies that the document is not encrypted and/or protected. In case no + * restrictions have been applied to the document a pdf reader is returned. * * @param dataSource * The document data source. * @throws PDFDocumentException - * Thrown if the document could not be opened with full permissions. + * Thrown if the document could not be opened with full + * permissions. */ - public static PdfReader createPdfReaderCheckingPermissions(at.gv.egiz.pdfas.framework.input.DataSource dataSource) + public static PdfReader createPdfReaderCheckingPermissions( + at.gv.egiz.pdfas.framework.input.DataSource dataSource) throws PDFDocumentException { - return createPdfReaderCheckingPermissions(dataSource.createInputStream()); + return createPdfReaderCheckingPermissions(dataSource + .createInputStream()); } /** - * Verifies that the document is not encrypted and/or protected. In case no restrictions have been applied to the - * document a pdf reader is returned. + * Verifies that the document is not encrypted and/or protected. In case no + * restrictions have been applied to the document a pdf reader is returned. * * @param inputStream * The document data input stream. * @throws PDFDocumentException - * Thrown if the document could not be opened with full permissions. + * Thrown if the document could not be opened with full + * permissions. */ - public static PdfReader createPdfReaderCheckingPermissions(InputStream inputStream) throws PDFDocumentException { + public static PdfReader createPdfReaderCheckingPermissions( + InputStream inputStream) throws PDFDocumentException { PdfReader reader = null; try { // try to parse document // If fully encrypted, PdfReader will fail; - // It should throw a BadPasswordException, but unfortunately does not (throws an IOException instead, + // It should throw a BadPasswordException, but unfortunately does + // not (throws an IOException instead, // internally catching BadPAsswordException; see comments below). reader = new PdfReader(inputStream); checkReaderPermissions(reader); @@ -127,21 +139,30 @@ public class PDFASUtils { } catch (BadPasswordException e) { // will never be reached with itext-2.1.5-rev3628-pdfas:v1.1 // just added for later versions... (see comments below) - // itext-2.1.5-rev3628-pdfas:v1.2 correctly throws BadPasswordException - throw new PDFDocumentException(ErrorCode.DOCUMENT_IS_PROTECTED, "Document is protected."); + // itext-2.1.5-rev3628-pdfas:v1.2 correctly throws + // BadPasswordException + throw new PDFDocumentException(ErrorCode.DOCUMENT_IS_PROTECTED, + "Document is protected."); } catch (PDFDocumentException e) { throw e; } catch (Exception e) { final String EX_MSG_FOR_ENCRYPTED_DOCUMENT = "Bad user Password"; - // Inspecting the exception message seems to be the only way when using itext-2.1.5-rev3628-pdfas:v1.1: - // itext neither externally throws a BadPasswordException nor passed the cause..., + // Inspecting the exception message seems to be the only way when + // using itext-2.1.5-rev3628-pdfas:v1.1: + // itext neither externally throws a BadPasswordException nor passed + // the cause..., // later versions do! - // String "Bad user Password" is set in com.lowagie.text.pdf.BadPasswordException so this approach will - // work as long as the underlying itext library is not beeing updated. - if (StringUtils.containsIgnoreCase(e.getMessage(), EX_MSG_FOR_ENCRYPTED_DOCUMENT)) { - throw new PDFDocumentException(ErrorCode.DOCUMENT_IS_PROTECTED, "Document is protected."); + // String "Bad user Password" is set in + // com.lowagie.text.pdf.BadPasswordException so this approach will + // work as long as the underlying itext library is not beeing + // updated. + if (StringUtils.containsIgnoreCase(e.getMessage(), + EX_MSG_FOR_ENCRYPTED_DOCUMENT)) { + throw new PDFDocumentException(ErrorCode.DOCUMENT_IS_PROTECTED, + "Document is protected."); } - throw new PDFDocumentException(ErrorCode.DOCUMENT_CANNOT_BE_READ, "Unable to parse document."); + throw new PDFDocumentException(ErrorCode.DOCUMENT_CANNOT_BE_READ, + "Unable to parse document."); } finally { IOUtils.closeQuietly(inputStream); closeQuietly(reader); @@ -216,12 +237,15 @@ public class PDFASUtils { return PDFASUtils.toFile(new ByteArrayInputStream(data), file); } - public static boolean toFile(InputStream inputStream, File file) throws IOException { + public static boolean toFile(InputStream inputStream, File file) + throws IOException { boolean result = false; BufferedOutputStream bufferedOutputStream = null; try { - bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(file)); - ConfigUtils.writeInputStreamToOutputStream(inputStream, bufferedOutputStream); + bufferedOutputStream = new BufferedOutputStream( + new FileOutputStream(file)); + ConfigUtils.writeInputStreamToOutputStream(inputStream, + bufferedOutputStream); } finally { if (bufferedOutputStream != null) { try { @@ -236,21 +260,26 @@ public class PDFASUtils { } /** - * Returns {@code true} if the given {@code profileId} is PDF/A-1b enabled, {@code false} if not. + * Returns {@code true} if the given {@code profileId} is PDF/A-1b enabled, + * {@code false} if not. * * @param profileId * The signature profile. - * @return {@code true} if the given {@code profileId} is PDF/A-1b enabled, {@code false} if not. + * @return {@code true} if the given {@code profileId} is PDF/A-1b enabled, + * {@code false} if not. */ public static boolean isPdfAEnabled(String profileId) { if (profileId == null) { - throw new NullPointerException("Profile identifier must not be null."); + throw new NullPointerException( + "Profile identifier must not be null."); } if (StringUtils.isEmpty(profileId)) { - throw new IllegalArgumentException("Profile identifier must not be empty."); + throw new IllegalArgumentException( + "Profile identifier must not be empty."); } try { - String pdfa = SettingsReader.getInstance().getSetting("sig_obj." + profileId + ".key." + CFG_KEY_PDFA, + String pdfa = SettingsReader.getInstance().getSetting( + "sig_obj." + profileId + ".key." + CFG_KEY_PDFA, "default." + CFG_KEY_PDFA, "false"); return BooleanUtils.toBoolean(pdfa); } catch (Exception e) { @@ -259,4 +288,37 @@ public class PDFASUtils { } } + public static String whiteSpaceTrim(String string) { + String str = StringUtils.strip(string); + str = startStrip(str); + return endStrip(str); + } + + private static String startStrip(final String str) { + int strLen; + if (str == null || (strLen = str.length()) == 0) { + return str; + } + int start = 0; + while (start != strLen && isEmptySpace(str.charAt(start))) { + start++; + } + return str.substring(start); + } + + private static String endStrip(final String str) { + int end; + if (str == null || (end = str.length()) == 0) { + return str; + } + while (end != 0 && isEmptySpace(str.charAt(end - 1))) { + end++; + } + + return str.substring(0, end); + } + + private static boolean isEmptySpace(char c) { + return Character.isWhitespace(c) || Character.isSpaceChar(c); + } } diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java index 32a043d..dc13a28 100644 --- a/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java +++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/egov/egiz/pdf/PDFPage.java @@ -47,6 +47,7 @@ import org.pdfbox.util.PDFTextStripper; import org.pdfbox.util.TextPosition; import org.pdfbox.util.operator.OperatorProcessor; +import at.gv.egiz.pdfas.utils.PDFASUtils; import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger; import at.knowcenter.wag.egov.egiz.cfg.SettingsReader; import at.knowcenter.wag.egov.egiz.exceptions.SettingsException; @@ -306,7 +307,7 @@ public class PDFPage extends PDFTextStripper { } // store ypos of the char if it is not empty - if (!character.equals(" ") && current_y > this.max_character_ypos) { + if (!PDFASUtils.whiteSpaceTrim(character).isEmpty() && current_y > this.max_character_ypos) { this.max_character_ypos = current_y; } -- cgit v1.2.3