pdf-as-lib maven project files moved to pdf-as-lib

git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/pdf-as/trunk@926 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
author: tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> 2013-01-09 15:41:29 +0000
committer: tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> 2013-01-09 15:41:29 +0000
commit: 535a04fa05f739ec16dd81666e3b0f82dfbd442d (patch)
tree: 0804f301c1a9ceb303a8441b7b29244fc8eb7ff0 /src/main/java/at/knowcenter/wag/egov/egiz/pdf/Placeholder.java
parent: 1efaf6fd5619dfa95c9d7e8c71eda4c2ffba4998 (diff)
download: pdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.tar.gz
pdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.tar.bz2
pdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.zip
1 files changed, 0 insertions, 572 deletions
diff --git a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/Placeholder.java b/src/main/java/at/knowcenter/wag/egov/egiz/pdf/Placeholder.java
deleted file mode 100644
index 9249985..0000000
--- a/src/main/java/at/knowcenter/wag/egov/egiz/pdf/Placeholder.java
+++ /dev/null
@@ -1,572 +0,0 @@
-/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: Placeholder.java,v 1.5 2006/10/31 08:17:50 wprinz Exp $
- */
-package at.knowcenter.wag.egov.egiz.pdf;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import javax.sound.midi.SysexMessage;
-
-import org.apache.commons.codec.net.URLCodec;
-import org.apache.log4j.Logger;
-
-import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger;
-import at.knowcenter.wag.egov.egiz.exceptions.PDFDocumentException;
-import at.knowcenter.wag.egov.egiz.exceptions.PlaceholderException;
-import at.knowcenter.wag.exactparser.ByteArrayUtils;
-
-/**
- * Helper class that provides functionality for dealing with placeholders and
- * replacements in pdf.
- * 
- * @author wprinz
- */
-public abstract class Placeholder
-{
-  /**
-   * The logger definition.
-   */
-  private static final Logger logger_ = ConfigLogger.getLogger(Placeholder.class);
-
-  /**
-   * Escapes the String to be a suitable Literal String..
-   * 
-   * @param data
-   *          The String to be escaped.
-   * @return Returns the escaped PDF String.
-   */
-  public static byte[] escapePDFString(byte[] data)
-  {
-    try
-    {
-      ByteArrayOutputStream baos = new ByteArrayOutputStream();
-      for (int i = 0; i < data.length; i++)
-      {
-        byte[] escaped_bytes = escapeByte(data[i]);
-        baos.write(escaped_bytes);
-      }
-      return baos.toByteArray();
-    }
-    catch (IOException e)
-    {
-      logger_.error(e.getMessage(), e);
-      return null;
-    }
-  }
-
-  /**
-   * Unescapes the PDF String.
-   * 
-   * @param data
-   *          The escaped String.
-   * @return Returns the unescaped String.
-   */
-  public static byte[] unescapePDFString(byte[] data)
-  {
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    for (int i = 0; i < data.length; i++)
-    {
-      if (data[i] == '\\' && data[i + 1] == '\\')
-      {
-        baos.write('\\');
-        i++;
-        continue;
-      }
-      if (data[i] == '\\' && data[i + 1] == '(')
-
-      {
-        baos.write('(');
-        i++;
-        continue;
-      }
-      if (data[i] == '\\' && data[i + 1] == ')')
-      {
-        baos.write(')');
-        i++;
-        continue;
-      }
-      baos.write(data[i]);
-    }
-    return baos.toByteArray();
-  }
-
-  /**
-   * Reconstructs the string from a partition of placeholders.
-   * 
-   * @param pdf
-   *          The PDF to read the string from.
-   * @param sis
-   *          The list of StringInfo objects that specify the bytes of the
-   *          string in the pdf.
-   * @return Returns the extracted and reconverted string.
-   * @throws IOException
-   *           Forwarded exception.
-   */
-  public static String reconstructStringFromPartition(byte[] pdf, List sis,
-      byte[] enc) throws IOException
-  {
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-
-    Iterator it = sis.iterator();
-    while (it.hasNext())
-    {
-      StringInfo si = (StringInfo) it.next();
-
-      for (int i = si.string_start; i < si.string_start + si.string_length; i++)
-      {
-        if (pdf[i] != 0)
-        {
-          baos.write(pdf[i]);
-        }
-      }
-    }
-
-    baos.close();
-    byte[] bytes = baos.toByteArray();
-
-    byte[] unescaped_bytes = unescapePDFString(bytes);
-
-    if (!ByteArrayUtils.compareByteArrays(enc, 0, BinarySignature.ENCODING_WIN) && !ByteArrayUtils.compareByteArrays(enc, 0, BinarySignature.ENCODING_URL))
-    {
-      String enc_str = new String(enc, "US-ASCII");
-      logger_.warn("The encoding " + enc_str + " is not known by this application - trying to proceed anyways.");
-    }
-
-    String text = new String(unescaped_bytes, "windows-1252");
-
-    String str = text;
-    if (ByteArrayUtils.compareByteArrays(enc, 0, BinarySignature.ENCODING_URL))
-    {
-      str = unapplyURLEncoding(str);
-    }
-
-    return str;
-  }
-
-  /**
-   * Prepares the given String to a byte array that can be substituted into the
-   * placeholder.
-   * 
-   * @param text
-   *          The text to be prepared for substitution.
-   * @return Returns the prepared byte array.
-   */
-  public static byte[] applyWinAnsiEncoding(String text)
-  {
-    // text = text.replace("\\", "\\\\");
-    // text = text.replace("(", "\\(");
-    // text = text.replace(")", "\\)");
-
-    byte[] replace_bytes;
-    try
-    {
-      replace_bytes = text.getBytes("windows-1252");// CP1252 = WinAnsiEncoding
-
-      // test the opposite way:
-      // String restored_string = new String (replace_bytes, "windows-1252");
-      // if (!restored_string.equals(text))
-      // {
-      // String url_encoded = URLEncoder.encode(text);
-      // replace_bytes = url_encoded.getBytes("windows-1252");
-      // }
-    }
-    catch (UnsupportedEncodingException e)
-    {
-      logger_.error(e.getMessage(), e);
-      return null;
-    }
-    return replace_bytes;
-  }
-
-  /**
-   * Unapplies the WinAnsi encoding.
-   * 
-   * @param replace_bytes
-   *          The bytes.
-   * @return Returns the decoded String.
-   */
-  public static String unapplyWinAnsiEncoding(byte[] replace_bytes)
-  {
-    try
-    {
-      String text = new String(replace_bytes, "windows-1252");
-
-      return text;
-    }
-    catch (UnsupportedEncodingException e)
-    {
-      logger_.error(e.getMessage(), e);
-      return null;
-    }
-
-  }
-
-  /**
-   * Applies the URL encoding to the text.
-   * 
-   * @param text
-   *          The text
-   * @return Returns the URL and WinAnsi encoded text.
-   */
-  public static byte[] applyURLEncoding(String text)
-  {
-    URLCodec utf8_url_codec = new URLCodec("UTF-8");
-    String url_encoded = null;
-    try
-    {
-      url_encoded = utf8_url_codec.encode(text, "UTF-8");
-    }
-    catch (UnsupportedEncodingException e)
-    {
-      throw new RuntimeException("Couldn't url encode : " + text, e);
-    }
-    // String url_encoded = URLEncoder.encode(text);
-    return applyWinAnsiEncoding(url_encoded);
-  }
-
-  /**
-   * Unapplies the WinAnsi and URL encoding.
-   * 
-   * @param winansi_str
-   *          The Winansi and URL text.
-   * @return Returns the decoded text.
-   */
-  public static String unapplyURLEncoding(String winansi_str)
-  {
-    URLCodec utf8_url_codec = new URLCodec("UTF-8");
-    String url_decoded = null;
-    try
-    {
-      url_decoded = utf8_url_codec.decode(winansi_str, "UTF-8");
-    }
-    catch (Exception e)
-    {
-      throw new RuntimeException("Couldn't url decode : " + winansi_str, e);
-    }
-    // String url_decoded = URLDecoder.decode(winansi_str);
-    return url_decoded;
-  }
-
-  /**
-   * Restores the String from a previously prepared byte array.
-   * 
-   * @param pdf_string
-   *          The byte array.
-   * @return Returns the unprepared String.
-   */
-  public static String unprepareAndUnescapeString(byte[] pdf_string)
-  {
-    try
-    {
-      String text = new String(pdf_string, "windows-1252");
-
-      // This makes problems when "+" appears.
-      // if (isURLEncoded(text))
-      // {
-      // text = URLDecoder.decode(text);
-      // }
-
-//      text = text.replace("\\)", ")");
-//      text = text.replace("\\(", "(");
-//      text = text.replace("\\\\", "\\");
-      
-      // TODO: replace jdk1.5-code with jdf1.4-code (should be tested)
-      /* */
-      text = text.replaceAll("\\\\\\)", ")");
-      text = text.replaceAll("\\\\\\(", "(");
-      text = text.replaceAll("\\\\\\\\", "\\\\");
-      
-
-      return text;
-    }
-    catch (UnsupportedEncodingException e)
-    {
-      logger_.error(e.getMessage(), e);
-      return null;
-    }
-  }
-
-  /**
-   * Checks the presence of typical URL encoded characters to tell if the string
-   * is URL encoded.
-   * 
-   * <p>
-   * This heuristic checks if there are any non URL encoded characters in the
-   * String, like ASCII control characters, which aren't allowed in the
-   * URLEncoding characterset.
-   * </p>
-   * 
-   * @param text
-   *          The text under suspicion.
-   * @return Returns true if the String is URL encoded, false otherwise.
-   */
-  protected static boolean isURLEncoded(String text)
-  {
-    if (text.indexOf(' ') >= 0)
-    {
-      return false;
-    }
-    for (int i = 0; i < text.length(); i++)
-    {
-      char c = text.charAt(i);
-      if (0x00 <= c && c <= 0x1f)
-      {
-        return false;
-      }
-      if (c == 0x7F)
-      {
-        return false;
-      }
-      if (0x80 <= c)
-      {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  /**
-   * Tells, if a break can occur behind the given character.
-   * 
-   * @param character
-   *          The character.
-   * @return Returns true, if a break may occur behind the character, false
-   *         otherwise.
-   */
-  protected static boolean canBreakAfter(byte character)
-  {
-     return (character == ' ' || character == '.' || character == ',' || character == ';' || character == '-' || character == '\n') ;
-  }
-
-  /**
-   * Scans the given PDF content stream for literal PDF strings.
-   * 
-   * @param pdf
-   *          The PDF.
-   * @param stream_start
-   *          The start of the content stream to be scanned.
-   * @param stream_next
-   *          The end of the content stream.
-   * @return Returns a list of StringInfo objects specifying the strings that
-   *         could be found.
-   */
-  public static List parseStrings(byte[] pdf, int stream_start, int stream_next)
-  {
-    List strings = new ArrayList();
-    StringInfo cur_string = null;
-    for (int i = stream_start; i < stream_next; i++)
-    {
-      byte cur_byte = pdf[i];
-
-      if (cur_byte == '(' && pdf[i - 1] != '\\')
-      {
-        cur_string = new StringInfo();
-        cur_string.pdf = pdf;
-        cur_string.string_start = i + 1;
-        cur_string.string_length = -1;
-        // logger_.debug("String start = " + cur_string.string_start);
-        continue;
-      }
-      if (cur_byte == ')' && pdf[i - 1] != '\\')
-      {
-        cur_string.string_length = i - cur_string.string_start;
-        // logger_.debug("String length = " + cur_string.string_length);
-        strings.add(cur_string);
-
-        cur_string = null;
-        continue;
-      }
-    }
-
-    return strings;
-  }
-
-  /**
-   * Escapes the data byte if necessary.
-   * 
-   * <p>
-   * Before bytes can be written into the pdf Strings, they have to be escaped.
-   * Special care has to be taken that escaped sequences are not split due to
-   * line breaks. This could have fatal consequences and usually renders the
-   * whole document invalid.
-   * </p>
-   * 
-   * @param data
-   *          The data byte to be escaped.
-   * @return Returns a new byte array escaping the data byte. If the byte needs
-   *         not to be escaped, this new array will contain only the original
-   *         data byte.
-   */
-  public static byte[] escapeByte(byte data)
-  {
-    if (data == '\\')
-    {
-      return new byte[] { '\\', '\\' };
-    }
-    if (data == '(')
-    {
-      return new byte[] { '\\', '(' };
-    }
-    if (data == ')')
-    {
-      return new byte[] { '\\', ')' };
-    }
-    return new byte[] { data };
-  }
-
-  /**
-   * Replaces the placeholder with the given String breaking lines with a given
-   * tolerance.
-   * 
-   * @param pdf
-   *          The PDF.
-   * @param sis
-   *          The list of StringInfo objects describing the positions where the
-   *          String should be filled in.
-   * @param replace_bytes
-   *          The unescaped bytes to be filled in. Escaping is performed by this
-   *          method.
-   * @param tolerance
-   *          The tolerance for line wrapping. The tolerance counts from the end
-   *          of a StringInfo backwards to its start. If a word that starts
-   *          within the tolerance doesn't fit, it is wrapped into the next
-   *          line.
-   * @throws PDFDocumentException
-   *           Forwarded exception.
-   */
-  public static void replacePlaceholderWithTolerance(byte[] pdf, List sis,
-      byte[] replace_bytes, int tolerance) throws PDFDocumentException
-  {
-    try
-    {
-      // String rep_str = new String(replace_bytes);
-
-      SplitStrings ss = new SplitStrings(pdf, sis);
-
-      int read_index = 0;
-      while (read_index < replace_bytes.length)
-      {
-        if (!ss.isValidLine())
-        {
-          break;
-        }
-
-        byte[] token = readToken(replace_bytes, read_index);
-        // String token_str = new String(token);
-        byte[] escaped_token = escapeToken(token);
-
-        if (ss.fits(escaped_token))
-        {
-          ss.write(escaped_token);
-          read_index += token.length;
-          continue;
-        }
-        else
-        {
-          if (ss.getAvailable() < tolerance)
-          {
-            ss.newline();
-            continue;
-          }
-          else
-          {
-            // break the token
-            for (; read_index < replace_bytes.length; read_index++)
-            {
-              byte data = replace_bytes[read_index];
-
-              byte[] escaped_data = escapeByte(data);
-
-              if (ss.fits(escaped_data))
-              {
-                ss.write(escaped_data);
-              }
-              else
-              {
-                ss.newline();
-                break;
-              }
-            }
-            continue;
-
-          }
-        }
-      }
-      ss.fillRest();
-
-      if (read_index < replace_bytes.length)
-      {
-        logger_.error("The replace string was longer than the reserved placeholder.");
-        throw new PlaceholderException(null, replace_bytes.length - read_index);
-      }
-
-    }
-    catch (IOException e)
-    {
-      throw new PDFDocumentException(201, e);
-    }
-
-  }
-
-  protected static byte[] readToken(byte[] bytes, int index)
-  {
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    for (; index < bytes.length; index++)
-    {
-      byte data = bytes[index];
-
-     
-      // byte [] escaped_data = escapeByte(data);
-      baos.write(data);
-
-      if (canBreakAfter(data))
-      {
-        break;
-      }
-    }
-
-    return baos.toByteArray();
-  }
-
-
-
-protected static byte[] escapeToken(byte[] token) throws IOException
-  {
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-
-    for (int i = 0; i < token.length; i++)
-    {
-      byte[] escaped_data = escapeByte(token[i]);
-      baos.write(escaped_data);
-    }
-
-    return baos.toByteArray();
-  }
-}
author	tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>	2013-01-09 15:41:29 +0000
committer	tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>	2013-01-09 15:41:29 +0000
commit	535a04fa05f739ec16dd81666e3b0f82dfbd442d (patch)
tree	0804f301c1a9ceb303a8441b7b29244fc8eb7ff0 /src/main/java/at/knowcenter/wag/egov/egiz/pdf/Placeholder.java
parent	1efaf6fd5619dfa95c9d7e8c71eda4c2ffba4998 (diff)
download	pdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.tar.gz pdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.tar.bz2 pdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.zip