From 53130153222382e2610c227909e446e7ac23827e Mon Sep 17 00:00:00 2001 From: Andreas Fitzek Date: Tue, 15 Apr 2014 16:03:26 +0200 Subject: Fixed String encoding if CP1252 not applicable, use URL encoding --- .../at/gv/egiz/pdfas/common/utils/StringUtils.java | 88 +++++++++++++----- .../lib/impl/stamping/pdfbox/PDFBoxTable.java | 103 ++++++++++++++------- 2 files changed, 131 insertions(+), 60 deletions(-) diff --git a/pdf-as-common/src/main/java/at/gv/egiz/pdfas/common/utils/StringUtils.java b/pdf-as-common/src/main/java/at/gv/egiz/pdfas/common/utils/StringUtils.java index 35cdf904..d0a9fee7 100644 --- a/pdf-as-common/src/main/java/at/gv/egiz/pdfas/common/utils/StringUtils.java +++ b/pdf-as-common/src/main/java/at/gv/egiz/pdfas/common/utils/StringUtils.java @@ -23,35 +23,73 @@ ******************************************************************************/ package at.gv.egiz.pdfas.common.utils; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; import java.util.Formatter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + /** - * Created with IntelliJ IDEA. - * User: afitzek - * Date: 8/28/13 - * Time: 12:42 PM - * To change this template use File | Settings | File Templates. + * Created with IntelliJ IDEA. User: afitzek Date: 8/28/13 Time: 12:42 PM To + * change this template use File | Settings | File Templates. */ public class StringUtils { - public static String bytesToHexString(byte[] bytes) { - StringBuilder sb = new StringBuilder(bytes.length * 2); - - Formatter formatter = new Formatter(sb); - for (byte b : bytes) { - formatter.format("%02x", b); - } - formatter.close(); - - return sb.toString(); - } - - public static String extractLastID(String id) { - int lastIDX = id.lastIndexOf('.'); - String result = id; - if(lastIDX > 0) { - result = id.substring(lastIDX+1); - } - return result; - } + private static final Logger logger = LoggerFactory + .getLogger(StringUtils.class); + + public static String bytesToHexString(byte[] bytes) { + StringBuilder sb = new StringBuilder(bytes.length * 2); + + Formatter formatter = new Formatter(sb); + for (byte b : bytes) { + formatter.format("%02x", b); + } + formatter.close(); + + return sb.toString(); + } + + public static String extractLastID(String id) { + int lastIDX = id.lastIndexOf('.'); + String result = id; + if (lastIDX > 0) { + result = id.substring(lastIDX + 1); + } + return result; + } + + public static String convertStringToPDFFormat(String value) + throws UnsupportedEncodingException { + byte[] replace_bytes = applyWinAnsiEncoding(value); + + String restored_value = unapplyWinAnsiEncoding(replace_bytes); + if (!value.equals(restored_value)) { + // Cannot encode String with CP1252 have to use URL encoding ... + return URLEncoder.encode(value, "UTF-8"); + } + return value; + } + + public static byte[] applyWinAnsiEncoding(String text) + throws UnsupportedEncodingException { + byte[] replace_bytes; + replace_bytes = text.getBytes("windows-1252");// CP1252 = + // WinAnsiEncoding + return replace_bytes; + } + + /** + * Unapplies the WinAnsi encoding. + * + * @param replace_bytes + * The bytes. + * @return Returns the decoded String. + * @throws UnsupportedEncodingException + */ + public static String unapplyWinAnsiEncoding(byte[] replace_bytes) throws UnsupportedEncodingException { + String text = new String(replace_bytes, "windows-1252"); + return text; + } } diff --git a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/lib/impl/stamping/pdfbox/PDFBoxTable.java b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/lib/impl/stamping/pdfbox/PDFBoxTable.java index 9cde5fe6..7274cdf2 100644 --- a/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/lib/impl/stamping/pdfbox/PDFBoxTable.java +++ b/pdf-as-lib/src/main/java/at/gv/egiz/pdfas/lib/impl/stamping/pdfbox/PDFBoxTable.java @@ -2,6 +2,7 @@ package at.gv.egiz.pdfas.lib.impl.stamping.pdfbox; import java.awt.Color; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.List; @@ -10,7 +11,10 @@ import org.apache.pdfbox.pdmodel.font.PDType1Font; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import at.gv.egiz.pdfas.common.exceptions.PdfAsException; +import at.gv.egiz.pdfas.common.exceptions.PdfAsWrappedIOException; import at.gv.egiz.pdfas.common.settings.ISettings; +import at.gv.egiz.pdfas.common.utils.StringUtils; import at.knowcenter.wag.egov.egiz.table.Entry; import at.knowcenter.wag.egov.egiz.table.Style; import at.knowcenter.wag.egov.egiz.table.Table; @@ -36,10 +40,37 @@ public class PDFBoxTable { float[] rowHeights; float[] colWidths; + private void normalizeContent(Table abstractTable) throws PdfAsException { + try { + int rows = abstractTable.getRows().size(); + for (int i = 0; i < rows; i++) { + ArrayList row = this.table.getRows().get(i); + for (int j = 0; j < row.size(); j++) { + Entry cell = (Entry) row.get(j); + + switch (cell.getType()) { + case Entry.TYPE_CAPTION: + case Entry.TYPE_VALUE: + String value = (String) cell.getValue(); + cell.setValue(StringUtils + .convertStringToPDFFormat(value)); + break; + } + } + } + } catch (UnsupportedEncodingException e) { + throw new PdfAsException("Unsupported Encoding", e); + } + } + private void initializeStyle(Table abstractTable, PDFBoxTable parent) throws IOException { this.table = abstractTable; - + try { + normalizeContent(abstractTable); + } catch(PdfAsException e) { + throw new PdfAsWrappedIOException(e); + } if (abstractTable.getStyle() != null) { style = abstractTable.getStyle(); } @@ -284,39 +315,41 @@ public class PDFBoxTable { lines.add(cLineValue.trim()); return lines.toArray(new String[0]); } - -// private String[] breakString(String value, PDFont f, float maxwidth) throws IOException { -// String[] words = value.split(" "); -// List lines = new ArrayList(); -// int cLine = 0; -// String cLineValue = ""; -// for (int i = 0; i < words.length; i++) { -// String word = words[i]; -// String[] lineBreaks = word.split("\n"); -// if (lineBreaks.length > 1) { -// for (int j = 0; j < lineBreaks.length; j++) { -// String subword = lineBreaks[j]; -// // if (cLine + subword.length() > maxline) { -// lines.add(cLineValue.trim()); -// cLineValue = ""; -// cLine = 0; -// // } -// cLineValue += subword + " "; -// cLine += subword.length(); -// } -// } else { -// if (f.getStringWidth(cLineValue + word) > maxwidth && cLineValue.length() != 0) { -// lines.add(cLineValue.trim()); -// cLineValue = ""; -// cLine = 0; -// } -// cLineValue += word + " "; -// cLine += word.length(); -// } -// } -// lines.add(cLineValue.trim()); -// return lines.toArray(new String[0]); -// } + + // private String[] breakString(String value, PDFont f, float maxwidth) + // throws IOException { + // String[] words = value.split(" "); + // List lines = new ArrayList(); + // int cLine = 0; + // String cLineValue = ""; + // for (int i = 0; i < words.length; i++) { + // String word = words[i]; + // String[] lineBreaks = word.split("\n"); + // if (lineBreaks.length > 1) { + // for (int j = 0; j < lineBreaks.length; j++) { + // String subword = lineBreaks[j]; + // // if (cLine + subword.length() > maxline) { + // lines.add(cLineValue.trim()); + // cLineValue = ""; + // cLine = 0; + // // } + // cLineValue += subword + " "; + // cLine += subword.length(); + // } + // } else { + // if (f.getStringWidth(cLineValue + word) > maxwidth && cLineValue.length() + // != 0) { + // lines.add(cLineValue.trim()); + // cLineValue = ""; + // cLine = 0; + // } + // cLineValue += word + " "; + // cLine += word.length(); + // } + // } + // lines.add(cLineValue.trim()); + // return lines.toArray(new String[0]); + // } private float getCellHeight(Entry cell, float width) throws IOException { boolean isValue = true; @@ -349,7 +382,7 @@ public class PDFBoxTable { float fheight = c.getFontDescriptor().getFontBoundingBox() .getHeight() / 1000 * fontSize; - + String[] lines = breakString(string, maxcharcount); cell.setValue(concatLines(lines)); return fheight * lines.length; -- cgit v1.2.3