From 535a04fa05f739ec16dd81666e3b0f82dfbd442d Mon Sep 17 00:00:00 2001
From: tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>
Date: Wed, 9 Jan 2013 15:41:29 +0000
Subject: pdf-as-lib maven project files moved to pdf-as-lib

git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/pdf-as/trunk@926 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
---
 .../wag/exactparser/parsing/PDFUtils.java          | 1405 ++++++++++++++++++++
 1 file changed, 1405 insertions(+)
 create mode 100644 pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java

(limited to 'pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java')
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java
new file mode 100644
index 0000000..de356c9
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java
@@ -0,0 +1,1405 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: PDFUtils.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.log4j.Logger;
+
+import at.knowcenter.wag.exactparser.ByteArrayUtils;
+import at.knowcenter.wag.exactparser.parsing.results.ArrayParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.BooleanParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.DictionaryParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.EOFParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.HeaderParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.HexStringParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.IndirectObjectReferenceParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.IntegerParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.LiteralStringParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.NameParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.NullParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.NumberParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.ObjectHeaderParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.ObjectParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.ParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.StartXRefParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.StreamParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.TrailerParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.XRefLineParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.XRefSectionParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.XRefSubSectionParseResult;
+
+
+
+/**
+ * Abstract class that contains several static utility methods for parsing and
+ * analyzing PDF documents on the lowest level.
+ * 
+ * <p>
+ * Most operations require random access to the PDF data (mostly to verify the
+ * synthax). So the whole PDF document has to be provided as a byte array. The
+ * term "pdf+index" states a specific position index within this byte array.
+ * </p>
+ * 
+ * @author wprinz
+ * 
+ */
+public abstract class PDFUtils
+{
+   private static Logger log = Logger.getLogger(PDFUtils.class);
+
+  public static boolean isWhitespace(final byte data)
+  {
+    return ByteArrayUtils.contains(PDFNames.WHITESPACE_CHARACTERS, data);
+  }
+
+  public static boolean isDelimiter(final byte data)
+  {
+    return ByteArrayUtils.contains(PDFNames.DELIMITER_CHARACTERS, data);
+  }
+
+  protected static boolean isRegular(final byte data)
+  {
+    return !(isWhitespace(data) || isDelimiter(data));
+  }
+
+  /**
+   * Skips whitespace.
+   * 
+   * <p>
+   * Skips all whitespace, which may be none, one or multiple whitespace
+   * characters.
+   * </p>
+   * <p>
+   * Note that this also skips newline characters (which belong to whitespace as
+   * well).
+   * </p>
+   * 
+   * @param data
+   *          The PDF data.
+   * @param index
+   *          The index.
+   * @return Returns the index of the first non whitespace character. This may
+   *         be equal to index if no whitespaces were skipped at all.
+   */
+  public static int skipWhitespace(final byte[] data, final int index)
+  {
+    int non_whitespace_index = index;
+    while (isWhitespace(data[non_whitespace_index]))
+    {
+      non_whitespace_index++;
+    }
+    return non_whitespace_index;
+  }
+
+  /**
+   * Skips bytes until whitespace is reached.
+   * 
+   * <p>
+   * Skips all non whitespace characters, which may be none at all.
+   * </p>
+   * 
+   * @param data
+   *          The PDF data.
+   * @param index
+   *          The index.
+   * @return Returns the index of the first whitespace character. This may be
+   *         equal to index if no non whitespaces were skipped at all.
+   */
+  public static int skipToWhitespace(final byte[] data, final int index)
+  {
+    int whitespace_index = index;
+    while (!isWhitespace(data[whitespace_index]))
+    {
+      whitespace_index++;
+    }
+    return whitespace_index;
+  }
+
+  protected static final byte[] LINE_TERMINATOR_CRLF = {
+      PDFNames.WHITESPACE_CR, PDFNames.WHITESPACE_LF };
+
+  protected static final byte[] LINE_TERMINATOR_CRALONE = { PDFNames.WHITESPACE_CR };
+
+  protected static final byte[] LINE_TERMINATOR_LF = { PDFNames.WHITESPACE_LF };
+
+  public static boolean isNewline(final byte[] data, final int index)
+  {
+    if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_LF))
+    {
+      return true;
+    }
+    if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRLF))
+    {
+      return true;
+    }
+    // although not specified by PDF, some applications use the CR alone as line
+    // terminator
+    if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRALONE))
+    {
+      return true;
+    }
+    return false;
+  }
+
+  public static int skipNewline(final byte[] data, final int index)
+  {
+    if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_LF))
+    {
+      return index + LINE_TERMINATOR_LF.length;
+    }
+    if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRLF))
+    {
+      return index + LINE_TERMINATOR_CRLF.length;
+    }
+    // although not specified by PDF, some applications use the CR alone as line
+    // terminator
+    if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRALONE))
+    {
+      return index + LINE_TERMINATOR_CRALONE.length;
+    }
+
+    assert false : "don't call this if you don't expect a newline - call skipWhitespace instead";
+    return index;
+  }
+
+  public static int skipToNewline(final byte[] data, final int index)
+  {
+    int current_index = index;
+    for (;;)
+    {
+      if (ByteArrayUtils.compareByteArrays(data, current_index, LINE_TERMINATOR_LF))
+      {
+        return current_index + LINE_TERMINATOR_LF.length;
+      }
+      if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRLF))
+      {
+        return index + LINE_TERMINATOR_CRLF.length;
+      }
+      // although not specified by PDF, some applications use the CR alone as
+      // line terminator
+      if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRALONE))
+      {
+        return index + LINE_TERMINATOR_CRALONE.length;
+      }
+      current_index++;
+    }
+  }
+
+  /**
+   * Parses a boolean value.
+   * 
+   * @param pdf
+   *          The PDF data.
+   * @param index
+   *          The index.
+   * @return Returns the result of the parsing operation.
+   */
+  public static BooleanParseResult parseBoolean(final byte[] pdf,
+      final int index)
+  {
+    BooleanParseResult bpr = new BooleanParseResult();
+    bpr.start_index = index;
+
+    if (ByteArrayUtils.compareByteArrays(pdf, bpr.start_index, PDFNames.TRUE_STR))
+    {
+      bpr.value = true;
+      bpr.next_index = bpr.start_index + PDFNames.TRUE_STR.length;
+
+      return bpr;
+    }
+    if (ByteArrayUtils.compareByteArrays(pdf, bpr.start_index, PDFNames.FALSE_STR))
+    {
+      bpr.value = false;
+      bpr.next_index = bpr.start_index + PDFNames.FALSE_STR.length;
+
+      return bpr;
+    }
+
+    throw new RuntimeException("Boolean couldn't be parsed at index " + index);
+  }
+
+  public static boolean isSign(final byte data)
+  {
+    return data == '+' || data == '-';
+  }
+
+  public static boolean isNumeric(final byte data)
+  {
+    return '0' <= data && data <= '9';
+  }
+
+  /**
+   * Reads the (positive integer) number from the data. The number must be
+   * terminated by the end of line.
+   * 
+   * @param data
+   *          The data.
+   * @param index
+   *          The index.
+   * @return Returns the read number.
+   */
+  public static int readNumberFromByteArray(final byte[] data, final int index)
+  {
+    NumberParseResult npr = parseNumberFromByteArray(data, index);
+
+    assert npr.number >= 0;
+    return npr.number;
+  }
+
+  /**
+   * Parses an unsigned integer.
+   * 
+   * <p>
+   * The integer must be a block of successive number characters. It must not be
+   * preceded by a sign (not even '+').
+   * </p>
+   * 
+   * @param pdf
+   *          The PDF data.
+   * @param index
+   *          The index.
+   * @return Returns the result of the parsing operation.
+   */
+  public static IntegerParseResult parseUnsignedInteger(final byte[] pdf,
+      final int index)
+  {
+    assert isNumeric(pdf[index]);
+
+    String number = "";
+
+    int cur_index = index;
+    while (isNumeric(pdf[cur_index]))
+    {
+
+      number += (char) pdf[cur_index];
+
+      cur_index++;
+    }
+
+    // TODO: make better
+    int int_value = Integer.parseInt(number);
+
+    assert int_value >= 0;
+
+    IntegerParseResult ipr = new IntegerParseResult();
+    ipr.start_index = index;
+    ipr.next_index = cur_index;
+    ipr.number = int_value;
+    return ipr;
+  }
+
+  /**
+   * Parses a (potentially) signed integer.
+   * 
+   * <p>
+   * The integer must be a block of successive number characters. It may be
+   * preceded by a sign character ('+' or '-').
+   * </p>
+   * 
+   * @param pdf
+   *          The PDF data.
+   * @param index
+   *          The index.
+   * @return Returns the result of the parsing operation.
+   */
+  public static IntegerParseResult parseInteger(final byte[] pdf,
+      final int index)
+  {
+    assert isSign(pdf[index]) || isNumeric(pdf[index]);
+
+    int sign = +1;
+    int number_start = index;
+    if (pdf[index] == '+')
+    {
+      sign = +1;
+      number_start++;
+    }
+    else
+    {
+      if (pdf[index] == '-')
+      {
+        sign = -1;
+        number_start++;
+      }
+      else
+      {
+        assert isNumeric(pdf[index]);
+      }
+    }
+
+    IntegerParseResult ipr = parseUnsignedInteger(pdf, number_start);
+    ipr.start_index = index;
+    ipr.number *= sign;
+    return ipr;
+  }
+
+  /**
+   * Parses an arbitrary number;
+   * 
+   * @param pdf
+   *          The PDF data.
+   * @param index
+   *          The index.
+   * @return Returns the result of the parsing operation.
+   */
+  public static NumberParseResult parseNumberFromByteArray(final byte[] pdf,
+      int index)
+  {
+    String number = "";
+
+    assert isSign(pdf[index]) || isNumeric(pdf[index]);
+
+    int sign = +1;
+    if (pdf[index] == '+')
+    {
+      sign = +1;
+      index++;
+    }
+    else
+    {
+      if (pdf[index] == '-')
+      {
+        sign = -1;
+        index++;
+      }
+      else
+      {
+        assert isNumeric(pdf[index]);
+      }
+    }
+
+    while (isNumeric(pdf[index]) || pdf[index] == '.')
+    {
+
+      char digit = (char) pdf[index];
+      number += digit;
+
+      index++;
+    }
+
+    NumberParseResult npr = new NumberParseResult();
+    npr.next_index = index;
+    // TODO: make better
+    try
+    {
+      npr.number = Integer.parseInt(number) * sign;
+    }
+    catch (NumberFormatException e)
+    {
+      npr.floating = Float.parseFloat(number) * sign;
+    }
+
+    return npr;
+  }
+
+  /**
+   * Searches the last occurrence of the "startxref" entry ... in other words
+   * starts the search from the end of the document and works reversely.
+   * 
+   * @param pdf
+   *          The complete PDF file data.
+   * @return Returns the offset (byte index) of the "startxref" entry.
+   */
+  public static int findLastStartXRef(final byte[] pdf)
+  {
+    return ByteArrayUtils.lastIndexOf(pdf, PDFNames.STARTXREF_STR);
+  }
+
+  /**
+   * Parses the xref section at pdf+index.
+   * 
+   * <p>
+   * An xref section starts with 'xref' and contains one or more xref
+   * sub-sections.
+   * </p>
+   * 
+   * @param pdf
+   *          The PDF data.
+   * @param index
+   *          The start index of the xref table.
+   * @return Returns the result of the parsing operation.
+   */
+  public static XRefSectionParseResult parseXRefSection(final byte[] pdf,
+      final int index)
+  {
+    at.knowcenter.wag.exactparser.parsing.results.XRefSectionParseResult xpr = new XRefSectionParseResult();
+    xpr.start_index = index;
+
+    assert ByteArrayUtils.compareByteArrays(pdf, xpr.start_index, PDFNames.XREF_STR);
+    assert isNewline(pdf, xpr.start_index + PDFNames.XREF_STR.length);
+
+    int cur_index = skipWhitespace(pdf, xpr.start_index + PDFNames.XREF_STR.length);
+    // skipNewline(pdf, xpr.start_index + PDFNames.XREF_STR.length);
+
+    for (;;)
+    {
+      // trailer ends the xref section.
+      if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.TRAILER_STR))
+      {
+        break;
+      }
+
+      // no trailer ==> another xref section
+
+      XRefSubSectionParseResult sspr = parseXRefSubSection(pdf, cur_index);
+      xpr.appendXRefSubSection(sspr);
+
+      cur_index = sspr.next_index;
+    }
+
+    xpr.next_index = cur_index;
+    assert ByteArrayUtils.compareByteArrays(pdf, xpr.next_index, PDFNames.TRAILER_STR);
+
+    return xpr;
+  }
+
+  /**
+   * Parses a xref sub-section.
+   * 
+   * @param pdf
+   *          The PDF data.
+   * @param index
+   *          The index.
+   * @return Returns the result of the parsing operation.
+   */
+  public static XRefSubSectionParseResult parseXRefSubSection(final byte[] pdf,
+      final int index)
+  {
+    XRefSubSectionParseResult sspr = new XRefSubSectionParseResult();
+    sspr.start_index = index;
+
+    NumberParseResult start_obj_num_npr = parseNumberFromByteArray(pdf, sspr.start_index);
+    sspr.start_obj_number = start_obj_num_npr.number;
+    assert sspr.start_obj_number >= 0;
+
+    assert isWhitespace(pdf[start_obj_num_npr.next_index]);
+    int num_obj_index = skipWhitespace(pdf, start_obj_num_npr.next_index);
+
+    NumberParseResult num_obj_npr = parseNumberFromByteArray(pdf, num_obj_index);
+    sspr.num_objects = num_obj_npr.number;
+
+    // assert isNewline(pdf, num_obj_npr.next_index);
+    assert isWhitespace(pdf[num_obj_npr.next_index]);
+    int start_of_line = skipWhitespace(pdf, num_obj_npr.next_index);
+    // skipNewline(pdf, num_obj_npr.next_index);
+
+    for (int i = 0; i < sspr.num_objects; i++)
+    {
+      final int cur_object_number = sspr.start_obj_number + i;
+
+      XRefLineParseResult lpr = parseXrefLine(pdf, start_of_line);
+      sspr.appendXRefLine(lpr);
+
+      // System.out.println("xref line of object " + (oc.start_obj_number + i) +
+      // " at " + lpr.start_index + ": " + lpr.object_offset + " " +
+      // lpr.generation_number + " " + (char) lpr.object_usage);
+
+      if (lpr.object_usage == 'n')
+      {
+        // check the line - this simple check may make problems with object
+        // streams and xref streams
+        ObjectHeaderParseResult ohpr = parseObjectHeader(pdf, lpr.object_offset);
+        assert ohpr.object_number == cur_object_number;
+        assert ohpr.generation_number == lpr.generation_number;
+      }
+
+      start_of_line = lpr.next_index;
+    }
+
+    sspr.next_index = start_of_line;
+    return sspr;
+  }
+
+  /**
+   * Parses a single 20 bytes xref line at pdf+index.
+   * 
+   * @param pdf
+   *          The PDF data.
+   * @param index
+   *          The index.
+   * @return Returns the result of the parsing operation.
+   */
+  public static XRefLineParseResult parseXrefLine(final byte[] pdf,
+      final int index)
+  {
+    XRefLineParseResult lpr = new XRefLineParseResult();
+
+    lpr.start_index = index;
+
+    IntegerParseResult object_offset_ipr = parseUnsignedInteger(pdf, lpr.start_index);
+    lpr.object_offset = object_offset_ipr.number;
+    assert lpr.object_offset >= 0;
+    assert lpr.object_offset < pdf.length;
+    assert object_offset_ipr.next_index == lpr.start_index + 10;
+
+    assert pdf[object_offset_ipr.next_index] == PDFNames.WHITESPACE_SP; // Standard
+    // explicitely
+    // says 1
+    // single
+    // SPACE
+    int generation_number_index = object_offset_ipr.next_index + 1;
+
+    IntegerParseResult generation_number_ipr = parseUnsignedInteger(pdf, generation_number_index);
+    lpr.generation_number = generation_number_ipr.number;
+    assert generation_number_ipr.next_index == lpr.start_index + 16;
+
+    assert pdf[generation_number_ipr.next_index] == PDFNames.WHITESPACE_SP;
+    int usage_index = generation_number_ipr.next_index + 1;
+
+    lpr.object_usage = pdf[usage_index];
+    assert lpr.object_usage == 'n' || lpr.object_usage == 'f';
+
+    if (pdf[usage_index + 1] == PDFNames.WHITESPACE_SP)
+    {
+      assert pdf[usage_index + 2] == PDFNames.WHITESPACE_CR || pdf[usage_index + 2] == PDFNames.WHITESPACE_LF;
+    }
+    else
+    {
+      assert pdf[usage_index + 1] == PDFNames.WHITESPACE_CR;
+      assert pdf[usage_index + 2] == PDFNames.WHITESPACE_LF;
+    }
+
+    lpr.next_index = usage_index + 3;
+
+    assert lpr.next_index == lpr.start_index + 20;
+
+    return lpr;
+  }
+
+  public static int indexOfName(final byte[] pdf, List names,
+      byte[] sought)
+  {
+    for (int i = 0; i < names.size(); i++)
+    {
+      NameParseResult name = (NameParseResult) names.get(i);
+      if (ByteArrayUtils.compareByteArrays(pdf, name.name_start_index, sought))
+      {
+        return i;
+      }
+    }
+    return -1;
+  }
+
+  public static TrailerParseResult parseTrailer(final byte[] pdf,
+      final int index)
+  {
+    TrailerParseResult tpr = new TrailerParseResult();
+    tpr.start_index = index;
+    tpr.has_predecessor = false;
+
+    assert ByteArrayUtils.compareByteArrays(pdf, tpr.start_index, PDFNames.TRAILER_STR);
+
+    // assert isWhitespace(pdf[tpr.start_index + PDFNames.TRAILER_STR.length]);
+    tpr.contents_index = skipWhitespace(pdf, tpr.start_index + PDFNames.TRAILER_STR.length);
+
+    int trailer_dict_index = skipWhitespace(pdf, tpr.contents_index);
+
+    assert ByteArrayUtils.compareByteArrays(pdf, trailer_dict_index, PDFNames.DICT_START_STR);
+
+    tpr.dpr = parseDictionary(pdf, trailer_dict_index);
+
+    int cur_index = tpr.dpr.next_index;
+
+    int info_index = indexOfName(pdf, tpr.dpr.names, PDFNames.INFO_STR);
+    if (info_index >= 0)
+    {
+      tpr.info = (IndirectObjectReferenceParseResult) tpr.dpr.values.get(info_index);
+    }
+
+    int root_index = indexOfName(pdf, tpr.dpr.names, PDFNames.ROOT_STR);
+    if (root_index >= 0)
+    {
+      tpr.root = (IndirectObjectReferenceParseResult) tpr.dpr.values.get(root_index);
+    }
+
+    tpr.size = ((NumberParseResult) tpr.dpr.values.get(indexOfName(pdf, tpr.dpr.names, PDFNames.SIZE_STR))).number;
+
+    int prev_index = indexOfName(pdf, tpr.dpr.names, PDFNames.PREV_STR);
+    if (prev_index >= 0)
+    {
+      tpr.has_predecessor = true;
+      tpr.setPrev(((NumberParseResult) tpr.dpr.values.get(prev_index)).number);
+    }
+
+    //
+    // int cur_index = skipWhitespace(pdf, trailer_dict_index +
+    // PDFNames.DICT_START_STR.length);
+    // for (;;) {
+    // if (ByteArrayUtils.compareByteArrays(pdf, cur_index,
+    // PDFNames.DICT_END_STR)) {
+    // cur_index += PDFNames.DICT_END_STR.length;
+    // break;
+    // }
+    //
+    // assert pdf[cur_index] == PDFNames.DELIMITER_NAME;
+    // cur_index++;
+    //
+    // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.INFO_STR))
+    // {
+    // assert isWhitespace(pdf[cur_index + PDFNames.INFO_STR.length]);
+    // int ir_index = skipWhitespace(pdf, cur_index + PDFNames.INFO_STR.length);
+    //
+    // IndirectObjectReferenceParseResult iorpr =
+    // parseIndirectObjectReference(pdf, ir_index);
+    // tpr.info = iorpr;
+    //
+    // cur_index = skipWhitespace(pdf, iorpr.next_index);
+    // continue;
+    // }
+    //
+    // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.ROOT_STR))
+    // {
+    // assert isWhitespace(pdf[cur_index + PDFNames.ROOT_STR.length]);
+    // int ir_index = skipWhitespace(pdf, cur_index + PDFNames.ROOT_STR.length);
+    //
+    // IndirectObjectReferenceParseResult iorpr =
+    // parseIndirectObjectReference(pdf, ir_index);
+    // tpr.root = iorpr;
+    //
+    // cur_index = skipWhitespace(pdf, iorpr.next_index);
+    // continue;
+    // }
+    //
+    // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.SIZE_STR))
+    // {
+    // assert isWhitespace(pdf[cur_index + PDFNames.SIZE_STR.length]);
+    // int size_index = skipWhitespace(pdf, cur_index +
+    // PDFNames.SIZE_STR.length);
+    //
+    // NumberParseResult npr = parseNumberFromByteArray(pdf, size_index);
+    // tpr.size = npr.number;
+    // assert tpr.size > 0;
+    //
+    // cur_index = skipWhitespace(pdf, npr.next_index);
+    // continue;
+    // }
+    //
+    // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.PREV_STR))
+    // {
+    // assert isWhitespace(pdf[cur_index + PDFNames.PREV_STR.length]);
+    // int prev_index = skipWhitespace(pdf, cur_index +
+    // PDFNames.PREV_STR.length);
+    //
+    // NumberParseResult npr = parseNumberFromByteArray(pdf, prev_index);
+    // tpr.has_predecessor = true;
+    // tpr.setPrev(npr.number);
+    // assert tpr.getPrev() >= 0;
+    // assert tpr.getPrev() < pdf.length;
+    //
+    // assert ByteArrayUtils.compareByteArrays(pdf, tpr.getPrev(),
+    // PDFNames.XREF_STR);
+    //
+    // cur_index = skipWhitespace(pdf, npr.next_index);
+    // continue;
+    // }
+    //
+    // // unrecognized type
+    // // skip to next delimiter
+    // // TODO: this will not work with nested dicts. - already deprecated
+    // while (pdf[cur_index] != PDFNames.DELIMITER_NAME) {
+    // cur_index++;
+    // }
+    // }
+
+    tpr.contents_end_index = cur_index;
+    tpr.next_index = skipWhitespace(pdf, tpr.contents_end_index);
+
+    assert ByteArrayUtils.compareByteArrays(pdf, tpr.next_index, PDFNames.STARTXREF_STR);
+    return tpr;
+  }
+
+  /**
+   * Parses the startxref section at pdf+index.
+   * 
+   * @param pdf
+   *          The complete PDF file data.
+   * @param index
+   *          The index of the startxref section.
+   * @return Returns the retsult of the parsing operation.
+   */
+  public static StartXRefParseResult parseStartXRef(final byte[] pdf,
+      final int index)
+  {
+    StartXRefParseResult spr = new StartXRefParseResult();
+    spr.next_index = index;
+
+    assert ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.STARTXREF_STR);
+    assert isNewline(pdf, index + PDFNames.STARTXREF_STR.length);
+
+    int index_of_number = skipWhitespace(pdf, index + PDFNames.STARTXREF_STR.length);
+    // skipNewline(pdf, index + PDFNames.STARTXREF_STR.length);
+    NumberParseResult npr = parseNumberFromByteArray(pdf, index_of_number);
+    spr.xref_index = npr.number;
+
+    assert isNewline(pdf, npr.next_index);
+    spr.next_index = skipWhitespace(pdf, npr.next_index);
+    // skipNewline(pdf, npr.next_index);
+
+    assert ByteArrayUtils.compareByteArrays(pdf, spr.next_index, PDFNames.EOF_STR);
+
+    assert spr.xref_index >= 0;
+    assert spr.xref_index < pdf.length;
+
+    // A linearized document sets the startxref value of the first page's footer
+    // to 0.
+    if (spr.xref_index != 0)
+    {
+      assert ByteArrayUtils.compareByteArrays(pdf, spr.xref_index, PDFNames.XREF_STR);
+    }
+
+    return spr;
+  }
+
+  /**
+   * Parses the End Of File (EOF) marker at pdf+index.
+   * 
+   * @param pdf
+   *          The PDF data.
+   * @param index
+   *          The index where to start the parsing.
+   * @return Returns the result of the parsing operation.
+   */
+  public static EOFParseResult parseEOF(final byte[] pdf, final int index)
+  {
+    EOFParseResult eofpr = new EOFParseResult();
+    eofpr.start_index = index;
+
+    assert ByteArrayUtils.compareByteArrays(pdf, eofpr.start_index, PDFNames.EOF_STR);
+
+    eofpr.eof_end_index = eofpr.start_index + PDFNames.EOF_STR.length;
+
+    // Note: The EOF marker is not necessarily terminated with a
+    // newline.
+
+    // perhaps explicitely determine a newline.
+
+    eofpr.next_index = eofpr.eof_end_index;
+
+    return eofpr;
+  }
+
+  public static boolean isIndirectObjectReference(final byte[] pdf,
+      final int index)
+  {
+    IndirectObjectReferenceParseResult iorpr = new IndirectObjectReferenceParseResult();
+    iorpr.ior = new IndirectObjectReference();
+    iorpr.start_index = index;
+
+    if (!PDFUtils.isNumeric(pdf[iorpr.start_index]))
+    {
+      return false;
+    }
+    NumberParseResult object_number_npr = parseNumberFromByteArray(pdf, iorpr.start_index);
+    iorpr.ior.object_number = object_number_npr.number;
+    if (iorpr.ior.object_number <= 0)
+    {
+      return false;
+    }
+
+    if (!isWhitespace(pdf[object_number_npr.next_index]))
+    {
+      return false;
+    }
+    int generation_number_index = skipWhitespace(pdf, object_number_npr.next_index);
+
+    if (!PDFUtils.isNumeric(pdf[generation_number_index]))
+    {
+      return false;
+    }
+    NumberParseResult generation_number_npr = parseNumberFromByteArray(pdf, generation_number_index);
+    iorpr.ior.generation_number = generation_number_npr.number;
+    if (iorpr.ior.generation_number < 0)
+    {
+      return false;
+    }
+
+    if (!isWhitespace(pdf[generation_number_npr.next_index]))
+    {
+      return false;
+    }
+    int R_index = skipWhitespace(pdf, generation_number_npr.next_index);
+
+    if (!ByteArrayUtils.compareByteArrays(pdf, R_index, PDFNames.REFERENCE_STR))
+    {
+      return false;
+    }
+
+    iorpr.next_index = R_index + PDFNames.REFERENCE_STR.length;
+
+    return true;
+  }
+
+  /**
+   * Parses an indirect object reference.
+   * 
+   * @param pdf
+   *          The PDF data.
+   * @param index
+   *          The index.
+   * @return Returns the result of the parsing operation.
+   */
+  public static IndirectObjectReferenceParseResult parseIndirectObjectReference(
+      final byte[] pdf, final int index)
+  {
+
+    assert isIndirectObjectReference(pdf, index);
+
+    IndirectObjectReferenceParseResult iorpr = new IndirectObjectReferenceParseResult();
+    iorpr.ior = new IndirectObjectReference();
+    iorpr.start_index = index;
+
+    NumberParseResult object_number_npr = parseNumberFromByteArray(pdf, iorpr.start_index);
+    iorpr.ior.object_number = object_number_npr.number;
+    assert iorpr.ior.object_number > 0;
+
+    assert isWhitespace(pdf[object_number_npr.next_index]);
+    int generation_number_index = skipWhitespace(pdf, object_number_npr.next_index);
+
+    NumberParseResult generation_number_npr = parseNumberFromByteArray(pdf, generation_number_index);
+    iorpr.ior.generation_number = generation_number_npr.number;
+    assert iorpr.ior.generation_number >= 0;
+
+    assert isWhitespace(pdf[generation_number_npr.next_index]);
+    int R_index = skipWhitespace(pdf, generation_number_npr.next_index);
+
+    assert ByteArrayUtils.compareByteArrays(pdf, R_index, PDFNames.REFERENCE_STR);
+
+    iorpr.next_index = R_index + PDFNames.REFERENCE_STR.length;
+
+    return iorpr;
+  }
+
+  /**
+   * Parses the object header at pdf+index.
+   * 
+   * @param pdf
+   *          The PDF data.
+   * @param index
+   *          The index.
+   * @return Returns the result of the parsing operation.
+   */
+  public static ObjectHeaderParseResult parseObjectHeader(final byte[] pdf,
+      final int index)
+  {
+    ObjectHeaderParseResult ohpr = new ObjectHeaderParseResult();
+
+    ohpr.start_index = index;
+
+    NumberParseResult object_number_npr = parseNumberFromByteArray(pdf, ohpr.start_index);
+    ohpr.object_number = object_number_npr.number;
+    assert ohpr.object_number > 0;
+
+    assert isWhitespace(pdf[object_number_npr.next_index]);
+    int generation_number_index = skipWhitespace(pdf, object_number_npr.next_index);
+
+    NumberParseResult generation_number_npr = parseNumberFromByteArray(pdf, generation_number_index);
+    ohpr.generation_number = generation_number_npr.number;
+    assert ohpr.generation_number >= 0;
+
+    assert isWhitespace(pdf[generation_number_npr.next_index]);
+    int obj_index = skipWhitespace(pdf, generation_number_npr.next_index);
+
+    assert ByteArrayUtils.compareByteArrays(pdf, obj_index, PDFNames.OBJ_STR);
+
+    // not all pdfwriters make a newline after obj...
+    // assert isNewline(pdf, obj_index + PDFNames.OBJ_STR.length);
+    // ohpr.next_index = skipNewline(pdf, obj_index + PDFNames.OBJ_STR.length);
+    ohpr.next_index = skipWhitespace(pdf, obj_index + PDFNames.OBJ_STR.length);
+
+    return ohpr;
+  }
+
+  public static ObjectParseResult parseObject(final byte[] pdf, final int index)
+  {
+    ObjectParseResult opr = new ObjectParseResult();
+    opr.start_index = index;
+
+    opr.header = parseObjectHeader(pdf, opr.start_index);
+    opr.content_index = opr.header.next_index;
+
+    int cur_index = skipWhitespace(pdf, opr.content_index);
+
+    opr.object = parseUnknownObject(pdf, cur_index);
+
+    cur_index = skipWhitespace(pdf, opr.object.next_index);
+
+    opr.end_of_content_index = cur_index;
+    assert ByteArrayUtils.compareByteArrays(pdf, opr.end_of_content_index, PDFNames.ENDOBJ_STR);
+
+    cur_index = opr.end_of_content_index + PDFNames.ENDOBJ_STR.length;
+    
+    opr.next_index = cur_index;
+    //assert isNewline(pdf, cur_index);
+    //opr.next_index = skipNewline(pdf, cur_index);
+
+    return opr;
+  }
+
+  public static ParseResult parseUnknownObject(final byte[] pdf, final int index)
+  {
+    if (ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.DICT_START_STR))
+    {
+      DictionaryParseResult dpr = parseDictionary(pdf, index);
+
+      int possible_stream_index = skipWhitespace(pdf, dpr.next_index);
+      if (ByteArrayUtils.compareByteArrays(pdf, possible_stream_index, PDFNames.STREAM_STR))
+      {
+        return parseStream(pdf, possible_stream_index, dpr);
+      }
+
+      return dpr;
+    }
+
+    if (ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.NULL_STR))
+    {
+      return parseNull(pdf, index);
+    }
+
+    if (ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.TRUE_STR) || ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.FALSE_STR))
+    {
+      return parseBoolean(pdf, index);
+    }
+
+    final byte first_byte = pdf[index];
+
+    if (isNumeric(first_byte) || isSign(first_byte))
+    {
+
+      // try to parse a Indirect reference first - if this fails, parse a number
+      if (isIndirectObjectReference(pdf, index))
+      {
+        return parseIndirectObjectReference(pdf, index);
+      }
+
+      return parseNumberFromByteArray(pdf, index);
+    }
+
+    ParseResult pr = null;
+
+    switch (first_byte)
+    {
+    case PDFNames.DELIMITER_STRING_OPEN:
+      pr = parseLiteralString(pdf, index);
+      break;
+    case PDFNames.DELIMITER_HEXSTRING_OPEN:
+      pr = parseHexString(pdf, index);
+      break;
+    case PDFNames.DELIMITER_ARRAY_OPEN:
+      pr = parseArray(pdf, index);
+      break;
+    case PDFNames.DELIMITER_NAME:
+      pr = parseName(pdf, index);
+      break;
+    default:
+      throw new RuntimeException("Unknown first_byte " + first_byte + "' when parsing an unknown object at index=" + index + ".");
+    // assert false : "nyi or invalid char";
+    }
+    assert pr != null;
+
+    return pr;
+  }
+
+  /**
+   * Parses a literal string.
+   * 
+   * <p>
+   * A literal string is a string of ASCII characters enclosed by '(' and ')'.
+   * Balanced pairs of '(' and ')' are allowed within the string. Unbalanced '('
+   * or ')' must be escaped as '\(' or '\)'.
+   * </p>
+   * 
+   * @param pdf
+   *          The PDF data.
+   * @param index
+   *          The index.
+   * @return Returns the result of the parsing operation.
+   */
+  public static LiteralStringParseResult parseLiteralString(final byte[] pdf,
+      final int index)
+  {
+    LiteralStringParseResult lspr = new LiteralStringParseResult();
+    lspr.start_index = index;
+
+    assert pdf[lspr.start_index] == PDFNames.DELIMITER_STRING_OPEN;
+
+    lspr.content_start_index = lspr.start_index + 1;
+
+    int cur_index = lspr.content_start_index;
+    int parenthesis_stack = 0;
+    for (;;)
+    {
+      if (pdf[cur_index] == '\\' && (pdf[cur_index + 1] == PDFNames.DELIMITER_STRING_CLOSE || pdf[cur_index + 1] == PDFNames.DELIMITER_STRING_OPEN))
+      {
+        cur_index += 2;
+        continue;
+      }
+      if (pdf[cur_index] == PDFNames.DELIMITER_STRING_OPEN)
+      {
+        parenthesis_stack++;
+      }
+      if (pdf[cur_index] == PDFNames.DELIMITER_STRING_CLOSE)
+      {
+        assert parenthesis_stack >= 0;
+
+        if (parenthesis_stack == 0)
+        {
+          break;
+        }
+
+        assert parenthesis_stack > 0;
+        parenthesis_stack--;
+
+      }
+
+      cur_index++;
+    }
+
+    lspr.content_end_index = cur_index;
+    assert pdf[lspr.content_end_index] == PDFNames.DELIMITER_STRING_CLOSE;
+
+    lspr.next_index = lspr.content_end_index + 1;
+
+    return lspr;
+  }
+
+  protected static boolean isHex(final byte data)
+  {
+    return isNumeric(data) || ('a' <= data && data <= 'f') || ('A' <= data && data <= 'f');
+  }
+
+  /**
+   * Parses a hexadecimal string.
+   * 
+   * @param pdf
+   *          The PDF data.
+   * @param index
+   *          The index.
+   * @return Returns the result of the parsing operation.
+   */
+  public static HexStringParseResult parseHexString(final byte[] pdf,
+      final int index)
+  {
+    HexStringParseResult hspr = new HexStringParseResult();
+    hspr.start_index = index;
+
+    assert pdf[hspr.start_index] == PDFNames.DELIMITER_HEXSTRING_OPEN;
+
+    hspr.content_start_index = hspr.start_index + 1;
+
+    int cur_index = hspr.content_start_index;
+    while (isHex(pdf[cur_index]) || isWhitespace(pdf[cur_index]))
+    {
+      cur_index++;
+    }
+
+    hspr.content_end_index = cur_index;
+    assert pdf[hspr.content_end_index] == PDFNames.DELIMITER_HEXSTRING_CLOSE;
+
+    hspr.next_index = hspr.content_end_index + 1;
+
+    return hspr;
+  }
+
+  public static ArrayParseResult parseArray(final byte[] pdf, final int index)
+  {
+    ArrayParseResult apr = new ArrayParseResult();
+    apr.start_index = index;
+    assert pdf[apr.start_index] == PDFNames.DELIMITER_ARRAY_OPEN;
+
+    apr.content_start_index = apr.start_index + 1;
+
+    apr.elements = new ArrayList();
+
+    int cur_index = skipWhitespace(pdf, apr.content_start_index);
+    for (;;)
+    {
+      if (pdf[cur_index] == PDFNames.DELIMITER_ARRAY_CLOSE)
+      {
+        break;
+      }
+
+      ParseResult pr = parseUnknownObject(pdf, cur_index);
+      apr.elements.add(pr);
+
+      cur_index = skipWhitespace(pdf, pr.next_index);
+    }
+    assert pdf[cur_index] == PDFNames.DELIMITER_ARRAY_CLOSE;
+
+    apr.content_end_index = cur_index;
+    assert pdf[apr.content_end_index] == PDFNames.DELIMITER_ARRAY_CLOSE;
+
+    apr.next_index = apr.content_end_index + 1;
+    return apr;
+  }
+
+  /**
+   * Parses a PDF Name.
+   * 
+   * @param pdf
+   *          The PDF data.
+   * @param index
+   *          The index.
+   * @return Returns the result of this parsing operation.
+   */
+  public static NameParseResult parseName(final byte[] pdf, final int index)
+  {
+    NameParseResult npr = new NameParseResult();
+    npr.start_index = index;
+
+    assert pdf[npr.start_index] == PDFNames.DELIMITER_NAME;
+
+    npr.name_start_index = npr.start_index + 1;
+
+    assert isRegular(pdf[npr.name_start_index]);
+
+    int cur_index = npr.name_start_index;
+    while (isRegular(pdf[cur_index]))
+    {
+      cur_index++;
+    }
+    assert !isRegular(pdf[cur_index]);
+
+    npr.next_index = cur_index;
+
+    return npr;
+  }
+
+  public static DictionaryParseResult parseDictionary(final byte[] pdf,
+      final int index)
+  {
+    DictionaryParseResult dpr = new DictionaryParseResult();
+    dpr.start_index = index;
+
+    assert ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.DICT_START_STR);
+
+    dpr.content_start_index = dpr.start_index + PDFNames.DICT_START_STR.length;
+
+    dpr.names = new ArrayList();
+    dpr.values = new ArrayList();
+
+    int cur_index = skipWhitespace(pdf, dpr.content_start_index);
+    for (;;)
+    {
+      if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.DICT_END_STR))
+      {
+        break;
+      }
+
+      NameParseResult npr = parseName(pdf, cur_index);
+      dpr.names.add(npr);
+
+      cur_index = npr.next_index;
+      cur_index = skipWhitespace(pdf, cur_index);
+
+      ParseResult pr = parseUnknownObject(pdf, cur_index);
+      dpr.values.add(pr);
+
+      cur_index = pr.next_index;
+      cur_index = skipWhitespace(pdf, cur_index);
+    }
+
+    dpr.content_end_index = cur_index;
+    assert ByteArrayUtils.compareByteArrays(pdf, dpr.content_end_index, PDFNames.DICT_END_STR);
+    dpr.next_index = dpr.content_end_index + PDFNames.DICT_END_STR.length;
+
+    return dpr;
+  }
+
+  /**
+   * Parses a stream.
+   * 
+   * @param pdf
+   *          The PDF data.
+   * @param index
+   *          The index.
+   * @param dpr
+   *          The DictionaryParseResult of the stream's dictionary. This
+   *          dictionary must precede the stream keyword. Usually this is
+   *          provided in the stream object's dictionary via the /Length field.
+   * @return Returns the result of this parsing operation.
+   */
+  public static StreamParseResult parseStream(final byte[] pdf,
+      final int index, final DictionaryParseResult dpr)
+  {
+    StreamParseResult spr = new StreamParseResult();
+    spr.stream_dictionary = dpr;
+    spr.start_index = spr.stream_dictionary.start_index;
+    spr.stream_start_index = index;
+    assert ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.STREAM_STR);
+
+    // assert that the provided dictionary really belongs to this stream
+    assert spr.stream_start_index == skipWhitespace(pdf, spr.stream_dictionary.next_index);
+
+    // see PDF Spec 1.4 chapter 3.2.7
+    assert pdf[spr.stream_start_index + PDFNames.STREAM_STR.length] == PDFNames.WHITESPACE_LF || (pdf[spr.stream_start_index + PDFNames.STREAM_STR.length] == PDFNames.WHITESPACE_CR && pdf[spr.stream_start_index + PDFNames.STREAM_STR.length + 1] == PDFNames.WHITESPACE_LF);
+    spr.content_start_index = skipNewline(pdf, spr.stream_start_index + PDFNames.STREAM_STR.length);
+
+    int length = -1;
+    for (int i = 0; i < spr.stream_dictionary.names.size(); i++)
+    {
+      NameParseResult name = (NameParseResult) spr.stream_dictionary.names.get(i);
+      if (ByteArrayUtils.compareByteArrays(pdf, name.name_start_index, PDFNames.LENGTH_STR))
+      {
+        ParseResult pr = (ParseResult) spr.stream_dictionary.values.get(i);
+        NumberParseResult npr = null;
+        if (pr instanceof IndirectObjectReferenceParseResult)
+        {
+          log.debug("An object stream with indirect length - cannot parse this instantly - parse later again.");
+          spr.content_end_index = -1;
+          spr.next_index = -1;
+          return spr;
+
+        }
+        else
+        {
+          npr = (NumberParseResult) pr;
+        }
+        assert npr != null;
+
+        length = npr.number;
+        break;
+      }
+
+    }
+    assert length >= 0;
+
+    spr.content_end_index = spr.content_start_index + length;
+
+    int endstr_index = spr.content_end_index;
+    if (isNewline(pdf, endstr_index))
+    {
+      endstr_index = skipWhitespace(pdf, endstr_index);
+    }
+    assert ByteArrayUtils.compareByteArrays(pdf, endstr_index, PDFNames.ENDSTREAM_STR);
+
+    spr.next_index = endstr_index + PDFNames.ENDSTREAM_STR.length;
+
+    return spr;
+  }
+
+  public static NullParseResult parseNull(final byte[] pdf, final int index)
+  {
+    NullParseResult npr = new NullParseResult();
+    npr.start_index = index;
+
+    assert ByteArrayUtils.compareByteArrays(pdf, npr.start_index, PDFNames.NULL_STR);
+
+    npr.next_index = npr.start_index + PDFNames.NULL_STR.length;
+
+    return npr;
+  }
+
+  public static int getObjectOffsetFromXRefByIndirectObjectReference(
+      XRefSectionParseResult xpr, IndirectObjectReference ior)
+  {
+    Iterator it = xpr.xref_subsections.iterator();
+    while (it.hasNext())
+    {
+      XRefSubSectionParseResult section = (XRefSubSectionParseResult) it.next();
+
+      for (int i = 0; i < section.xref_lines.size(); i++)
+      {
+        if (section.start_obj_number + i == ior.object_number)
+        {
+          XRefLineParseResult lpr = (XRefLineParseResult) section.xref_lines.get(i);
+          return lpr.object_offset;
+        }
+      }
+    }
+
+    return -1;
+  }
+
+  public static HeaderParseResult parseHeader(final byte[] pdf, final int index)
+  {
+    HeaderParseResult hpr = new HeaderParseResult();
+    hpr.start_index = index;
+
+    assert pdf[hpr.start_index] == PDFNames.COMMENT;
+
+    assert ByteArrayUtils.compareByteArrays(pdf, hpr.start_index + 1, PDFNames.PDF_VERSION_STR);
+
+    hpr.major_index = hpr.start_index + 1 + PDFNames.PDF_VERSION_STR.length;
+
+    IntegerParseResult major_ipr = parseUnsignedInteger(pdf, hpr.major_index);
+    hpr.major = major_ipr.number;
+    assert hpr.major >= 1;
+
+    assert pdf[major_ipr.next_index] == PDFNames.PDF_VERSION_SEPARATOR;
+
+    hpr.minor_index = major_ipr.next_index + 1;
+
+    IntegerParseResult minor_ipr = parseUnsignedInteger(pdf, hpr.minor_index);
+    hpr.minor = minor_ipr.number;
+    assert hpr.minor >= 0;
+
+    assert isWhitespace(pdf[minor_ipr.next_index]);
+    hpr.binary_characters_index = skipWhitespace(pdf, minor_ipr.next_index);
+
+    assert pdf[hpr.binary_characters_index] == PDFNames.COMMENT;
+
+    hpr.next_index = skipToNewline(pdf, hpr.binary_characters_index);
+    return hpr;
+  }
+
+  /**
+   * Parses a PDF footer.
+   * 
+   * <p>
+   * A PDF footer starts with the xref, followed by the trailer, the startxref
+   * and the EOF marker.
+   * </p>
+   * 
+   * @param pdf
+   *          The PDF data.
+   * @param index
+   *          The index.
+   * @return Returns the result of the parsing operation.
+   * 
+   * @see FooterParseResult
+   */
+  public static FooterParseResult parseFooter(final byte[] pdf, final int index)
+  {
+    FooterParseResult fpr = new FooterParseResult();
+    fpr.start_index = index;
+
+    fpr.xpr = PDFUtils.parseXRefSection(pdf, fpr.start_index);
+
+    fpr.tpr = PDFUtils.parseTrailer(pdf, fpr.xpr.next_index);
+
+    fpr.sxpr = PDFUtils.parseStartXRef(pdf, fpr.tpr.next_index);
+
+    fpr.eofpr = PDFUtils.parseEOF(pdf, fpr.sxpr.next_index);
+
+    fpr.next_index = fpr.eofpr.next_index;
+    return fpr;
+  }
+
+}
-- 
cgit v1.2.3