aboutsummaryrefslogtreecommitdiff
path: root/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java
diff options
context:
space:
mode:
Diffstat (limited to 'pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java')
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java1405
1 files changed, 1405 insertions, 0 deletions
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java
new file mode 100644
index 0000000..de356c9
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java
@@ -0,0 +1,1405 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: PDFUtils.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.log4j.Logger;
+
+import at.knowcenter.wag.exactparser.ByteArrayUtils;
+import at.knowcenter.wag.exactparser.parsing.results.ArrayParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.BooleanParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.DictionaryParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.EOFParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.HeaderParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.HexStringParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.IndirectObjectReferenceParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.IntegerParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.LiteralStringParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.NameParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.NullParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.NumberParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.ObjectHeaderParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.ObjectParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.ParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.StartXRefParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.StreamParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.TrailerParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.XRefLineParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.XRefSectionParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.XRefSubSectionParseResult;
+
+
+
+/**
+ * Abstract class that contains several static utility methods for parsing and
+ * analyzing PDF documents on the lowest level.
+ *
+ * <p>
+ * Most operations require random access to the PDF data (mostly to verify the
+ * synthax). So the whole PDF document has to be provided as a byte array. The
+ * term "pdf+index" states a specific position index within this byte array.
+ * </p>
+ *
+ * @author wprinz
+ *
+ */
+public abstract class PDFUtils
+{
+ private static Logger log = Logger.getLogger(PDFUtils.class);
+
+ public static boolean isWhitespace(final byte data)
+ {
+ return ByteArrayUtils.contains(PDFNames.WHITESPACE_CHARACTERS, data);
+ }
+
+ public static boolean isDelimiter(final byte data)
+ {
+ return ByteArrayUtils.contains(PDFNames.DELIMITER_CHARACTERS, data);
+ }
+
+ protected static boolean isRegular(final byte data)
+ {
+ return !(isWhitespace(data) || isDelimiter(data));
+ }
+
+ /**
+ * Skips whitespace.
+ *
+ * <p>
+ * Skips all whitespace, which may be none, one or multiple whitespace
+ * characters.
+ * </p>
+ * <p>
+ * Note that this also skips newline characters (which belong to whitespace as
+ * well).
+ * </p>
+ *
+ * @param data
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the index of the first non whitespace character. This may
+ * be equal to index if no whitespaces were skipped at all.
+ */
+ public static int skipWhitespace(final byte[] data, final int index)
+ {
+ int non_whitespace_index = index;
+ while (isWhitespace(data[non_whitespace_index]))
+ {
+ non_whitespace_index++;
+ }
+ return non_whitespace_index;
+ }
+
+ /**
+ * Skips bytes until whitespace is reached.
+ *
+ * <p>
+ * Skips all non whitespace characters, which may be none at all.
+ * </p>
+ *
+ * @param data
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the index of the first whitespace character. This may be
+ * equal to index if no non whitespaces were skipped at all.
+ */
+ public static int skipToWhitespace(final byte[] data, final int index)
+ {
+ int whitespace_index = index;
+ while (!isWhitespace(data[whitespace_index]))
+ {
+ whitespace_index++;
+ }
+ return whitespace_index;
+ }
+
+ protected static final byte[] LINE_TERMINATOR_CRLF = {
+ PDFNames.WHITESPACE_CR, PDFNames.WHITESPACE_LF };
+
+ protected static final byte[] LINE_TERMINATOR_CRALONE = { PDFNames.WHITESPACE_CR };
+
+ protected static final byte[] LINE_TERMINATOR_LF = { PDFNames.WHITESPACE_LF };
+
+ public static boolean isNewline(final byte[] data, final int index)
+ {
+ if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_LF))
+ {
+ return true;
+ }
+ if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRLF))
+ {
+ return true;
+ }
+ // although not specified by PDF, some applications use the CR alone as line
+ // terminator
+ if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRALONE))
+ {
+ return true;
+ }
+ return false;
+ }
+
+ public static int skipNewline(final byte[] data, final int index)
+ {
+ if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_LF))
+ {
+ return index + LINE_TERMINATOR_LF.length;
+ }
+ if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRLF))
+ {
+ return index + LINE_TERMINATOR_CRLF.length;
+ }
+ // although not specified by PDF, some applications use the CR alone as line
+ // terminator
+ if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRALONE))
+ {
+ return index + LINE_TERMINATOR_CRALONE.length;
+ }
+
+ assert false : "don't call this if you don't expect a newline - call skipWhitespace instead";
+ return index;
+ }
+
+ public static int skipToNewline(final byte[] data, final int index)
+ {
+ int current_index = index;
+ for (;;)
+ {
+ if (ByteArrayUtils.compareByteArrays(data, current_index, LINE_TERMINATOR_LF))
+ {
+ return current_index + LINE_TERMINATOR_LF.length;
+ }
+ if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRLF))
+ {
+ return index + LINE_TERMINATOR_CRLF.length;
+ }
+ // although not specified by PDF, some applications use the CR alone as
+ // line terminator
+ if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRALONE))
+ {
+ return index + LINE_TERMINATOR_CRALONE.length;
+ }
+ current_index++;
+ }
+ }
+
+ /**
+ * Parses a boolean value.
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static BooleanParseResult parseBoolean(final byte[] pdf,
+ final int index)
+ {
+ BooleanParseResult bpr = new BooleanParseResult();
+ bpr.start_index = index;
+
+ if (ByteArrayUtils.compareByteArrays(pdf, bpr.start_index, PDFNames.TRUE_STR))
+ {
+ bpr.value = true;
+ bpr.next_index = bpr.start_index + PDFNames.TRUE_STR.length;
+
+ return bpr;
+ }
+ if (ByteArrayUtils.compareByteArrays(pdf, bpr.start_index, PDFNames.FALSE_STR))
+ {
+ bpr.value = false;
+ bpr.next_index = bpr.start_index + PDFNames.FALSE_STR.length;
+
+ return bpr;
+ }
+
+ throw new RuntimeException("Boolean couldn't be parsed at index " + index);
+ }
+
+ public static boolean isSign(final byte data)
+ {
+ return data == '+' || data == '-';
+ }
+
+ public static boolean isNumeric(final byte data)
+ {
+ return '0' <= data && data <= '9';
+ }
+
+ /**
+ * Reads the (positive integer) number from the data. The number must be
+ * terminated by the end of line.
+ *
+ * @param data
+ * The data.
+ * @param index
+ * The index.
+ * @return Returns the read number.
+ */
+ public static int readNumberFromByteArray(final byte[] data, final int index)
+ {
+ NumberParseResult npr = parseNumberFromByteArray(data, index);
+
+ assert npr.number >= 0;
+ return npr.number;
+ }
+
+ /**
+ * Parses an unsigned integer.
+ *
+ * <p>
+ * The integer must be a block of successive number characters. It must not be
+ * preceded by a sign (not even '+').
+ * </p>
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static IntegerParseResult parseUnsignedInteger(final byte[] pdf,
+ final int index)
+ {
+ assert isNumeric(pdf[index]);
+
+ String number = "";
+
+ int cur_index = index;
+ while (isNumeric(pdf[cur_index]))
+ {
+
+ number += (char) pdf[cur_index];
+
+ cur_index++;
+ }
+
+ // TODO: make better
+ int int_value = Integer.parseInt(number);
+
+ assert int_value >= 0;
+
+ IntegerParseResult ipr = new IntegerParseResult();
+ ipr.start_index = index;
+ ipr.next_index = cur_index;
+ ipr.number = int_value;
+ return ipr;
+ }
+
+ /**
+ * Parses a (potentially) signed integer.
+ *
+ * <p>
+ * The integer must be a block of successive number characters. It may be
+ * preceded by a sign character ('+' or '-').
+ * </p>
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static IntegerParseResult parseInteger(final byte[] pdf,
+ final int index)
+ {
+ assert isSign(pdf[index]) || isNumeric(pdf[index]);
+
+ int sign = +1;
+ int number_start = index;
+ if (pdf[index] == '+')
+ {
+ sign = +1;
+ number_start++;
+ }
+ else
+ {
+ if (pdf[index] == '-')
+ {
+ sign = -1;
+ number_start++;
+ }
+ else
+ {
+ assert isNumeric(pdf[index]);
+ }
+ }
+
+ IntegerParseResult ipr = parseUnsignedInteger(pdf, number_start);
+ ipr.start_index = index;
+ ipr.number *= sign;
+ return ipr;
+ }
+
+ /**
+ * Parses an arbitrary number;
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static NumberParseResult parseNumberFromByteArray(final byte[] pdf,
+ int index)
+ {
+ String number = "";
+
+ assert isSign(pdf[index]) || isNumeric(pdf[index]);
+
+ int sign = +1;
+ if (pdf[index] == '+')
+ {
+ sign = +1;
+ index++;
+ }
+ else
+ {
+ if (pdf[index] == '-')
+ {
+ sign = -1;
+ index++;
+ }
+ else
+ {
+ assert isNumeric(pdf[index]);
+ }
+ }
+
+ while (isNumeric(pdf[index]) || pdf[index] == '.')
+ {
+
+ char digit = (char) pdf[index];
+ number += digit;
+
+ index++;
+ }
+
+ NumberParseResult npr = new NumberParseResult();
+ npr.next_index = index;
+ // TODO: make better
+ try
+ {
+ npr.number = Integer.parseInt(number) * sign;
+ }
+ catch (NumberFormatException e)
+ {
+ npr.floating = Float.parseFloat(number) * sign;
+ }
+
+ return npr;
+ }
+
+ /**
+ * Searches the last occurrence of the "startxref" entry ... in other words
+ * starts the search from the end of the document and works reversely.
+ *
+ * @param pdf
+ * The complete PDF file data.
+ * @return Returns the offset (byte index) of the "startxref" entry.
+ */
+ public static int findLastStartXRef(final byte[] pdf)
+ {
+ return ByteArrayUtils.lastIndexOf(pdf, PDFNames.STARTXREF_STR);
+ }
+
+ /**
+ * Parses the xref section at pdf+index.
+ *
+ * <p>
+ * An xref section starts with 'xref' and contains one or more xref
+ * sub-sections.
+ * </p>
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The start index of the xref table.
+ * @return Returns the result of the parsing operation.
+ */
+ public static XRefSectionParseResult parseXRefSection(final byte[] pdf,
+ final int index)
+ {
+ at.knowcenter.wag.exactparser.parsing.results.XRefSectionParseResult xpr = new XRefSectionParseResult();
+ xpr.start_index = index;
+
+ assert ByteArrayUtils.compareByteArrays(pdf, xpr.start_index, PDFNames.XREF_STR);
+ assert isNewline(pdf, xpr.start_index + PDFNames.XREF_STR.length);
+
+ int cur_index = skipWhitespace(pdf, xpr.start_index + PDFNames.XREF_STR.length);
+ // skipNewline(pdf, xpr.start_index + PDFNames.XREF_STR.length);
+
+ for (;;)
+ {
+ // trailer ends the xref section.
+ if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.TRAILER_STR))
+ {
+ break;
+ }
+
+ // no trailer ==> another xref section
+
+ XRefSubSectionParseResult sspr = parseXRefSubSection(pdf, cur_index);
+ xpr.appendXRefSubSection(sspr);
+
+ cur_index = sspr.next_index;
+ }
+
+ xpr.next_index = cur_index;
+ assert ByteArrayUtils.compareByteArrays(pdf, xpr.next_index, PDFNames.TRAILER_STR);
+
+ return xpr;
+ }
+
+ /**
+ * Parses a xref sub-section.
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static XRefSubSectionParseResult parseXRefSubSection(final byte[] pdf,
+ final int index)
+ {
+ XRefSubSectionParseResult sspr = new XRefSubSectionParseResult();
+ sspr.start_index = index;
+
+ NumberParseResult start_obj_num_npr = parseNumberFromByteArray(pdf, sspr.start_index);
+ sspr.start_obj_number = start_obj_num_npr.number;
+ assert sspr.start_obj_number >= 0;
+
+ assert isWhitespace(pdf[start_obj_num_npr.next_index]);
+ int num_obj_index = skipWhitespace(pdf, start_obj_num_npr.next_index);
+
+ NumberParseResult num_obj_npr = parseNumberFromByteArray(pdf, num_obj_index);
+ sspr.num_objects = num_obj_npr.number;
+
+ // assert isNewline(pdf, num_obj_npr.next_index);
+ assert isWhitespace(pdf[num_obj_npr.next_index]);
+ int start_of_line = skipWhitespace(pdf, num_obj_npr.next_index);
+ // skipNewline(pdf, num_obj_npr.next_index);
+
+ for (int i = 0; i < sspr.num_objects; i++)
+ {
+ final int cur_object_number = sspr.start_obj_number + i;
+
+ XRefLineParseResult lpr = parseXrefLine(pdf, start_of_line);
+ sspr.appendXRefLine(lpr);
+
+ // System.out.println("xref line of object " + (oc.start_obj_number + i) +
+ // " at " + lpr.start_index + ": " + lpr.object_offset + " " +
+ // lpr.generation_number + " " + (char) lpr.object_usage);
+
+ if (lpr.object_usage == 'n')
+ {
+ // check the line - this simple check may make problems with object
+ // streams and xref streams
+ ObjectHeaderParseResult ohpr = parseObjectHeader(pdf, lpr.object_offset);
+ assert ohpr.object_number == cur_object_number;
+ assert ohpr.generation_number == lpr.generation_number;
+ }
+
+ start_of_line = lpr.next_index;
+ }
+
+ sspr.next_index = start_of_line;
+ return sspr;
+ }
+
+ /**
+ * Parses a single 20 bytes xref line at pdf+index.
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static XRefLineParseResult parseXrefLine(final byte[] pdf,
+ final int index)
+ {
+ XRefLineParseResult lpr = new XRefLineParseResult();
+
+ lpr.start_index = index;
+
+ IntegerParseResult object_offset_ipr = parseUnsignedInteger(pdf, lpr.start_index);
+ lpr.object_offset = object_offset_ipr.number;
+ assert lpr.object_offset >= 0;
+ assert lpr.object_offset < pdf.length;
+ assert object_offset_ipr.next_index == lpr.start_index + 10;
+
+ assert pdf[object_offset_ipr.next_index] == PDFNames.WHITESPACE_SP; // Standard
+ // explicitely
+ // says 1
+ // single
+ // SPACE
+ int generation_number_index = object_offset_ipr.next_index + 1;
+
+ IntegerParseResult generation_number_ipr = parseUnsignedInteger(pdf, generation_number_index);
+ lpr.generation_number = generation_number_ipr.number;
+ assert generation_number_ipr.next_index == lpr.start_index + 16;
+
+ assert pdf[generation_number_ipr.next_index] == PDFNames.WHITESPACE_SP;
+ int usage_index = generation_number_ipr.next_index + 1;
+
+ lpr.object_usage = pdf[usage_index];
+ assert lpr.object_usage == 'n' || lpr.object_usage == 'f';
+
+ if (pdf[usage_index + 1] == PDFNames.WHITESPACE_SP)
+ {
+ assert pdf[usage_index + 2] == PDFNames.WHITESPACE_CR || pdf[usage_index + 2] == PDFNames.WHITESPACE_LF;
+ }
+ else
+ {
+ assert pdf[usage_index + 1] == PDFNames.WHITESPACE_CR;
+ assert pdf[usage_index + 2] == PDFNames.WHITESPACE_LF;
+ }
+
+ lpr.next_index = usage_index + 3;
+
+ assert lpr.next_index == lpr.start_index + 20;
+
+ return lpr;
+ }
+
+ public static int indexOfName(final byte[] pdf, List names,
+ byte[] sought)
+ {
+ for (int i = 0; i < names.size(); i++)
+ {
+ NameParseResult name = (NameParseResult) names.get(i);
+ if (ByteArrayUtils.compareByteArrays(pdf, name.name_start_index, sought))
+ {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ public static TrailerParseResult parseTrailer(final byte[] pdf,
+ final int index)
+ {
+ TrailerParseResult tpr = new TrailerParseResult();
+ tpr.start_index = index;
+ tpr.has_predecessor = false;
+
+ assert ByteArrayUtils.compareByteArrays(pdf, tpr.start_index, PDFNames.TRAILER_STR);
+
+ // assert isWhitespace(pdf[tpr.start_index + PDFNames.TRAILER_STR.length]);
+ tpr.contents_index = skipWhitespace(pdf, tpr.start_index + PDFNames.TRAILER_STR.length);
+
+ int trailer_dict_index = skipWhitespace(pdf, tpr.contents_index);
+
+ assert ByteArrayUtils.compareByteArrays(pdf, trailer_dict_index, PDFNames.DICT_START_STR);
+
+ tpr.dpr = parseDictionary(pdf, trailer_dict_index);
+
+ int cur_index = tpr.dpr.next_index;
+
+ int info_index = indexOfName(pdf, tpr.dpr.names, PDFNames.INFO_STR);
+ if (info_index >= 0)
+ {
+ tpr.info = (IndirectObjectReferenceParseResult) tpr.dpr.values.get(info_index);
+ }
+
+ int root_index = indexOfName(pdf, tpr.dpr.names, PDFNames.ROOT_STR);
+ if (root_index >= 0)
+ {
+ tpr.root = (IndirectObjectReferenceParseResult) tpr.dpr.values.get(root_index);
+ }
+
+ tpr.size = ((NumberParseResult) tpr.dpr.values.get(indexOfName(pdf, tpr.dpr.names, PDFNames.SIZE_STR))).number;
+
+ int prev_index = indexOfName(pdf, tpr.dpr.names, PDFNames.PREV_STR);
+ if (prev_index >= 0)
+ {
+ tpr.has_predecessor = true;
+ tpr.setPrev(((NumberParseResult) tpr.dpr.values.get(prev_index)).number);
+ }
+
+ //
+ // int cur_index = skipWhitespace(pdf, trailer_dict_index +
+ // PDFNames.DICT_START_STR.length);
+ // for (;;) {
+ // if (ByteArrayUtils.compareByteArrays(pdf, cur_index,
+ // PDFNames.DICT_END_STR)) {
+ // cur_index += PDFNames.DICT_END_STR.length;
+ // break;
+ // }
+ //
+ // assert pdf[cur_index] == PDFNames.DELIMITER_NAME;
+ // cur_index++;
+ //
+ // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.INFO_STR))
+ // {
+ // assert isWhitespace(pdf[cur_index + PDFNames.INFO_STR.length]);
+ // int ir_index = skipWhitespace(pdf, cur_index + PDFNames.INFO_STR.length);
+ //
+ // IndirectObjectReferenceParseResult iorpr =
+ // parseIndirectObjectReference(pdf, ir_index);
+ // tpr.info = iorpr;
+ //
+ // cur_index = skipWhitespace(pdf, iorpr.next_index);
+ // continue;
+ // }
+ //
+ // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.ROOT_STR))
+ // {
+ // assert isWhitespace(pdf[cur_index + PDFNames.ROOT_STR.length]);
+ // int ir_index = skipWhitespace(pdf, cur_index + PDFNames.ROOT_STR.length);
+ //
+ // IndirectObjectReferenceParseResult iorpr =
+ // parseIndirectObjectReference(pdf, ir_index);
+ // tpr.root = iorpr;
+ //
+ // cur_index = skipWhitespace(pdf, iorpr.next_index);
+ // continue;
+ // }
+ //
+ // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.SIZE_STR))
+ // {
+ // assert isWhitespace(pdf[cur_index + PDFNames.SIZE_STR.length]);
+ // int size_index = skipWhitespace(pdf, cur_index +
+ // PDFNames.SIZE_STR.length);
+ //
+ // NumberParseResult npr = parseNumberFromByteArray(pdf, size_index);
+ // tpr.size = npr.number;
+ // assert tpr.size > 0;
+ //
+ // cur_index = skipWhitespace(pdf, npr.next_index);
+ // continue;
+ // }
+ //
+ // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.PREV_STR))
+ // {
+ // assert isWhitespace(pdf[cur_index + PDFNames.PREV_STR.length]);
+ // int prev_index = skipWhitespace(pdf, cur_index +
+ // PDFNames.PREV_STR.length);
+ //
+ // NumberParseResult npr = parseNumberFromByteArray(pdf, prev_index);
+ // tpr.has_predecessor = true;
+ // tpr.setPrev(npr.number);
+ // assert tpr.getPrev() >= 0;
+ // assert tpr.getPrev() < pdf.length;
+ //
+ // assert ByteArrayUtils.compareByteArrays(pdf, tpr.getPrev(),
+ // PDFNames.XREF_STR);
+ //
+ // cur_index = skipWhitespace(pdf, npr.next_index);
+ // continue;
+ // }
+ //
+ // // unrecognized type
+ // // skip to next delimiter
+ // // TODO: this will not work with nested dicts. - already deprecated
+ // while (pdf[cur_index] != PDFNames.DELIMITER_NAME) {
+ // cur_index++;
+ // }
+ // }
+
+ tpr.contents_end_index = cur_index;
+ tpr.next_index = skipWhitespace(pdf, tpr.contents_end_index);
+
+ assert ByteArrayUtils.compareByteArrays(pdf, tpr.next_index, PDFNames.STARTXREF_STR);
+ return tpr;
+ }
+
+ /**
+ * Parses the startxref section at pdf+index.
+ *
+ * @param pdf
+ * The complete PDF file data.
+ * @param index
+ * The index of the startxref section.
+ * @return Returns the retsult of the parsing operation.
+ */
+ public static StartXRefParseResult parseStartXRef(final byte[] pdf,
+ final int index)
+ {
+ StartXRefParseResult spr = new StartXRefParseResult();
+ spr.next_index = index;
+
+ assert ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.STARTXREF_STR);
+ assert isNewline(pdf, index + PDFNames.STARTXREF_STR.length);
+
+ int index_of_number = skipWhitespace(pdf, index + PDFNames.STARTXREF_STR.length);
+ // skipNewline(pdf, index + PDFNames.STARTXREF_STR.length);
+ NumberParseResult npr = parseNumberFromByteArray(pdf, index_of_number);
+ spr.xref_index = npr.number;
+
+ assert isNewline(pdf, npr.next_index);
+ spr.next_index = skipWhitespace(pdf, npr.next_index);
+ // skipNewline(pdf, npr.next_index);
+
+ assert ByteArrayUtils.compareByteArrays(pdf, spr.next_index, PDFNames.EOF_STR);
+
+ assert spr.xref_index >= 0;
+ assert spr.xref_index < pdf.length;
+
+ // A linearized document sets the startxref value of the first page's footer
+ // to 0.
+ if (spr.xref_index != 0)
+ {
+ assert ByteArrayUtils.compareByteArrays(pdf, spr.xref_index, PDFNames.XREF_STR);
+ }
+
+ return spr;
+ }
+
+ /**
+ * Parses the End Of File (EOF) marker at pdf+index.
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index where to start the parsing.
+ * @return Returns the result of the parsing operation.
+ */
+ public static EOFParseResult parseEOF(final byte[] pdf, final int index)
+ {
+ EOFParseResult eofpr = new EOFParseResult();
+ eofpr.start_index = index;
+
+ assert ByteArrayUtils.compareByteArrays(pdf, eofpr.start_index, PDFNames.EOF_STR);
+
+ eofpr.eof_end_index = eofpr.start_index + PDFNames.EOF_STR.length;
+
+ // Note: The EOF marker is not necessarily terminated with a
+ // newline.
+
+ // perhaps explicitely determine a newline.
+
+ eofpr.next_index = eofpr.eof_end_index;
+
+ return eofpr;
+ }
+
+ public static boolean isIndirectObjectReference(final byte[] pdf,
+ final int index)
+ {
+ IndirectObjectReferenceParseResult iorpr = new IndirectObjectReferenceParseResult();
+ iorpr.ior = new IndirectObjectReference();
+ iorpr.start_index = index;
+
+ if (!PDFUtils.isNumeric(pdf[iorpr.start_index]))
+ {
+ return false;
+ }
+ NumberParseResult object_number_npr = parseNumberFromByteArray(pdf, iorpr.start_index);
+ iorpr.ior.object_number = object_number_npr.number;
+ if (iorpr.ior.object_number <= 0)
+ {
+ return false;
+ }
+
+ if (!isWhitespace(pdf[object_number_npr.next_index]))
+ {
+ return false;
+ }
+ int generation_number_index = skipWhitespace(pdf, object_number_npr.next_index);
+
+ if (!PDFUtils.isNumeric(pdf[generation_number_index]))
+ {
+ return false;
+ }
+ NumberParseResult generation_number_npr = parseNumberFromByteArray(pdf, generation_number_index);
+ iorpr.ior.generation_number = generation_number_npr.number;
+ if (iorpr.ior.generation_number < 0)
+ {
+ return false;
+ }
+
+ if (!isWhitespace(pdf[generation_number_npr.next_index]))
+ {
+ return false;
+ }
+ int R_index = skipWhitespace(pdf, generation_number_npr.next_index);
+
+ if (!ByteArrayUtils.compareByteArrays(pdf, R_index, PDFNames.REFERENCE_STR))
+ {
+ return false;
+ }
+
+ iorpr.next_index = R_index + PDFNames.REFERENCE_STR.length;
+
+ return true;
+ }
+
+ /**
+ * Parses an indirect object reference.
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static IndirectObjectReferenceParseResult parseIndirectObjectReference(
+ final byte[] pdf, final int index)
+ {
+
+ assert isIndirectObjectReference(pdf, index);
+
+ IndirectObjectReferenceParseResult iorpr = new IndirectObjectReferenceParseResult();
+ iorpr.ior = new IndirectObjectReference();
+ iorpr.start_index = index;
+
+ NumberParseResult object_number_npr = parseNumberFromByteArray(pdf, iorpr.start_index);
+ iorpr.ior.object_number = object_number_npr.number;
+ assert iorpr.ior.object_number > 0;
+
+ assert isWhitespace(pdf[object_number_npr.next_index]);
+ int generation_number_index = skipWhitespace(pdf, object_number_npr.next_index);
+
+ NumberParseResult generation_number_npr = parseNumberFromByteArray(pdf, generation_number_index);
+ iorpr.ior.generation_number = generation_number_npr.number;
+ assert iorpr.ior.generation_number >= 0;
+
+ assert isWhitespace(pdf[generation_number_npr.next_index]);
+ int R_index = skipWhitespace(pdf, generation_number_npr.next_index);
+
+ assert ByteArrayUtils.compareByteArrays(pdf, R_index, PDFNames.REFERENCE_STR);
+
+ iorpr.next_index = R_index + PDFNames.REFERENCE_STR.length;
+
+ return iorpr;
+ }
+
+ /**
+ * Parses the object header at pdf+index.
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static ObjectHeaderParseResult parseObjectHeader(final byte[] pdf,
+ final int index)
+ {
+ ObjectHeaderParseResult ohpr = new ObjectHeaderParseResult();
+
+ ohpr.start_index = index;
+
+ NumberParseResult object_number_npr = parseNumberFromByteArray(pdf, ohpr.start_index);
+ ohpr.object_number = object_number_npr.number;
+ assert ohpr.object_number > 0;
+
+ assert isWhitespace(pdf[object_number_npr.next_index]);
+ int generation_number_index = skipWhitespace(pdf, object_number_npr.next_index);
+
+ NumberParseResult generation_number_npr = parseNumberFromByteArray(pdf, generation_number_index);
+ ohpr.generation_number = generation_number_npr.number;
+ assert ohpr.generation_number >= 0;
+
+ assert isWhitespace(pdf[generation_number_npr.next_index]);
+ int obj_index = skipWhitespace(pdf, generation_number_npr.next_index);
+
+ assert ByteArrayUtils.compareByteArrays(pdf, obj_index, PDFNames.OBJ_STR);
+
+ // not all pdfwriters make a newline after obj...
+ // assert isNewline(pdf, obj_index + PDFNames.OBJ_STR.length);
+ // ohpr.next_index = skipNewline(pdf, obj_index + PDFNames.OBJ_STR.length);
+ ohpr.next_index = skipWhitespace(pdf, obj_index + PDFNames.OBJ_STR.length);
+
+ return ohpr;
+ }
+
+ public static ObjectParseResult parseObject(final byte[] pdf, final int index)
+ {
+ ObjectParseResult opr = new ObjectParseResult();
+ opr.start_index = index;
+
+ opr.header = parseObjectHeader(pdf, opr.start_index);
+ opr.content_index = opr.header.next_index;
+
+ int cur_index = skipWhitespace(pdf, opr.content_index);
+
+ opr.object = parseUnknownObject(pdf, cur_index);
+
+ cur_index = skipWhitespace(pdf, opr.object.next_index);
+
+ opr.end_of_content_index = cur_index;
+ assert ByteArrayUtils.compareByteArrays(pdf, opr.end_of_content_index, PDFNames.ENDOBJ_STR);
+
+ cur_index = opr.end_of_content_index + PDFNames.ENDOBJ_STR.length;
+
+ opr.next_index = cur_index;
+ //assert isNewline(pdf, cur_index);
+ //opr.next_index = skipNewline(pdf, cur_index);
+
+ return opr;
+ }
+
+ public static ParseResult parseUnknownObject(final byte[] pdf, final int index)
+ {
+ if (ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.DICT_START_STR))
+ {
+ DictionaryParseResult dpr = parseDictionary(pdf, index);
+
+ int possible_stream_index = skipWhitespace(pdf, dpr.next_index);
+ if (ByteArrayUtils.compareByteArrays(pdf, possible_stream_index, PDFNames.STREAM_STR))
+ {
+ return parseStream(pdf, possible_stream_index, dpr);
+ }
+
+ return dpr;
+ }
+
+ if (ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.NULL_STR))
+ {
+ return parseNull(pdf, index);
+ }
+
+ if (ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.TRUE_STR) || ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.FALSE_STR))
+ {
+ return parseBoolean(pdf, index);
+ }
+
+ final byte first_byte = pdf[index];
+
+ if (isNumeric(first_byte) || isSign(first_byte))
+ {
+
+ // try to parse a Indirect reference first - if this fails, parse a number
+ if (isIndirectObjectReference(pdf, index))
+ {
+ return parseIndirectObjectReference(pdf, index);
+ }
+
+ return parseNumberFromByteArray(pdf, index);
+ }
+
+ ParseResult pr = null;
+
+ switch (first_byte)
+ {
+ case PDFNames.DELIMITER_STRING_OPEN:
+ pr = parseLiteralString(pdf, index);
+ break;
+ case PDFNames.DELIMITER_HEXSTRING_OPEN:
+ pr = parseHexString(pdf, index);
+ break;
+ case PDFNames.DELIMITER_ARRAY_OPEN:
+ pr = parseArray(pdf, index);
+ break;
+ case PDFNames.DELIMITER_NAME:
+ pr = parseName(pdf, index);
+ break;
+ default:
+ throw new RuntimeException("Unknown first_byte " + first_byte + "' when parsing an unknown object at index=" + index + ".");
+ // assert false : "nyi or invalid char";
+ }
+ assert pr != null;
+
+ return pr;
+ }
+
+ /**
+ * Parses a literal string.
+ *
+ * <p>
+ * A literal string is a string of ASCII characters enclosed by '(' and ')'.
+ * Balanced pairs of '(' and ')' are allowed within the string. Unbalanced '('
+ * or ')' must be escaped as '\(' or '\)'.
+ * </p>
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static LiteralStringParseResult parseLiteralString(final byte[] pdf,
+ final int index)
+ {
+ LiteralStringParseResult lspr = new LiteralStringParseResult();
+ lspr.start_index = index;
+
+ assert pdf[lspr.start_index] == PDFNames.DELIMITER_STRING_OPEN;
+
+ lspr.content_start_index = lspr.start_index + 1;
+
+ int cur_index = lspr.content_start_index;
+ int parenthesis_stack = 0;
+ for (;;)
+ {
+ if (pdf[cur_index] == '\\' && (pdf[cur_index + 1] == PDFNames.DELIMITER_STRING_CLOSE || pdf[cur_index + 1] == PDFNames.DELIMITER_STRING_OPEN))
+ {
+ cur_index += 2;
+ continue;
+ }
+ if (pdf[cur_index] == PDFNames.DELIMITER_STRING_OPEN)
+ {
+ parenthesis_stack++;
+ }
+ if (pdf[cur_index] == PDFNames.DELIMITER_STRING_CLOSE)
+ {
+ assert parenthesis_stack >= 0;
+
+ if (parenthesis_stack == 0)
+ {
+ break;
+ }
+
+ assert parenthesis_stack > 0;
+ parenthesis_stack--;
+
+ }
+
+ cur_index++;
+ }
+
+ lspr.content_end_index = cur_index;
+ assert pdf[lspr.content_end_index] == PDFNames.DELIMITER_STRING_CLOSE;
+
+ lspr.next_index = lspr.content_end_index + 1;
+
+ return lspr;
+ }
+
+ protected static boolean isHex(final byte data)
+ {
+ return isNumeric(data) || ('a' <= data && data <= 'f') || ('A' <= data && data <= 'f');
+ }
+
+ /**
+ * Parses a hexadecimal string.
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static HexStringParseResult parseHexString(final byte[] pdf,
+ final int index)
+ {
+ HexStringParseResult hspr = new HexStringParseResult();
+ hspr.start_index = index;
+
+ assert pdf[hspr.start_index] == PDFNames.DELIMITER_HEXSTRING_OPEN;
+
+ hspr.content_start_index = hspr.start_index + 1;
+
+ int cur_index = hspr.content_start_index;
+ while (isHex(pdf[cur_index]) || isWhitespace(pdf[cur_index]))
+ {
+ cur_index++;
+ }
+
+ hspr.content_end_index = cur_index;
+ assert pdf[hspr.content_end_index] == PDFNames.DELIMITER_HEXSTRING_CLOSE;
+
+ hspr.next_index = hspr.content_end_index + 1;
+
+ return hspr;
+ }
+
+ public static ArrayParseResult parseArray(final byte[] pdf, final int index)
+ {
+ ArrayParseResult apr = new ArrayParseResult();
+ apr.start_index = index;
+ assert pdf[apr.start_index] == PDFNames.DELIMITER_ARRAY_OPEN;
+
+ apr.content_start_index = apr.start_index + 1;
+
+ apr.elements = new ArrayList();
+
+ int cur_index = skipWhitespace(pdf, apr.content_start_index);
+ for (;;)
+ {
+ if (pdf[cur_index] == PDFNames.DELIMITER_ARRAY_CLOSE)
+ {
+ break;
+ }
+
+ ParseResult pr = parseUnknownObject(pdf, cur_index);
+ apr.elements.add(pr);
+
+ cur_index = skipWhitespace(pdf, pr.next_index);
+ }
+ assert pdf[cur_index] == PDFNames.DELIMITER_ARRAY_CLOSE;
+
+ apr.content_end_index = cur_index;
+ assert pdf[apr.content_end_index] == PDFNames.DELIMITER_ARRAY_CLOSE;
+
+ apr.next_index = apr.content_end_index + 1;
+ return apr;
+ }
+
+ /**
+ * Parses a PDF Name.
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of this parsing operation.
+ */
+ public static NameParseResult parseName(final byte[] pdf, final int index)
+ {
+ NameParseResult npr = new NameParseResult();
+ npr.start_index = index;
+
+ assert pdf[npr.start_index] == PDFNames.DELIMITER_NAME;
+
+ npr.name_start_index = npr.start_index + 1;
+
+ assert isRegular(pdf[npr.name_start_index]);
+
+ int cur_index = npr.name_start_index;
+ while (isRegular(pdf[cur_index]))
+ {
+ cur_index++;
+ }
+ assert !isRegular(pdf[cur_index]);
+
+ npr.next_index = cur_index;
+
+ return npr;
+ }
+
+ public static DictionaryParseResult parseDictionary(final byte[] pdf,
+ final int index)
+ {
+ DictionaryParseResult dpr = new DictionaryParseResult();
+ dpr.start_index = index;
+
+ assert ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.DICT_START_STR);
+
+ dpr.content_start_index = dpr.start_index + PDFNames.DICT_START_STR.length;
+
+ dpr.names = new ArrayList();
+ dpr.values = new ArrayList();
+
+ int cur_index = skipWhitespace(pdf, dpr.content_start_index);
+ for (;;)
+ {
+ if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.DICT_END_STR))
+ {
+ break;
+ }
+
+ NameParseResult npr = parseName(pdf, cur_index);
+ dpr.names.add(npr);
+
+ cur_index = npr.next_index;
+ cur_index = skipWhitespace(pdf, cur_index);
+
+ ParseResult pr = parseUnknownObject(pdf, cur_index);
+ dpr.values.add(pr);
+
+ cur_index = pr.next_index;
+ cur_index = skipWhitespace(pdf, cur_index);
+ }
+
+ dpr.content_end_index = cur_index;
+ assert ByteArrayUtils.compareByteArrays(pdf, dpr.content_end_index, PDFNames.DICT_END_STR);
+ dpr.next_index = dpr.content_end_index + PDFNames.DICT_END_STR.length;
+
+ return dpr;
+ }
+
+ /**
+ * Parses a stream.
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @param dpr
+ * The DictionaryParseResult of the stream's dictionary. This
+ * dictionary must precede the stream keyword. Usually this is
+ * provided in the stream object's dictionary via the /Length field.
+ * @return Returns the result of this parsing operation.
+ */
+ public static StreamParseResult parseStream(final byte[] pdf,
+ final int index, final DictionaryParseResult dpr)
+ {
+ StreamParseResult spr = new StreamParseResult();
+ spr.stream_dictionary = dpr;
+ spr.start_index = spr.stream_dictionary.start_index;
+ spr.stream_start_index = index;
+ assert ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.STREAM_STR);
+
+ // assert that the provided dictionary really belongs to this stream
+ assert spr.stream_start_index == skipWhitespace(pdf, spr.stream_dictionary.next_index);
+
+ // see PDF Spec 1.4 chapter 3.2.7
+ assert pdf[spr.stream_start_index + PDFNames.STREAM_STR.length] == PDFNames.WHITESPACE_LF || (pdf[spr.stream_start_index + PDFNames.STREAM_STR.length] == PDFNames.WHITESPACE_CR && pdf[spr.stream_start_index + PDFNames.STREAM_STR.length + 1] == PDFNames.WHITESPACE_LF);
+ spr.content_start_index = skipNewline(pdf, spr.stream_start_index + PDFNames.STREAM_STR.length);
+
+ int length = -1;
+ for (int i = 0; i < spr.stream_dictionary.names.size(); i++)
+ {
+ NameParseResult name = (NameParseResult) spr.stream_dictionary.names.get(i);
+ if (ByteArrayUtils.compareByteArrays(pdf, name.name_start_index, PDFNames.LENGTH_STR))
+ {
+ ParseResult pr = (ParseResult) spr.stream_dictionary.values.get(i);
+ NumberParseResult npr = null;
+ if (pr instanceof IndirectObjectReferenceParseResult)
+ {
+ log.debug("An object stream with indirect length - cannot parse this instantly - parse later again.");
+ spr.content_end_index = -1;
+ spr.next_index = -1;
+ return spr;
+
+ }
+ else
+ {
+ npr = (NumberParseResult) pr;
+ }
+ assert npr != null;
+
+ length = npr.number;
+ break;
+ }
+
+ }
+ assert length >= 0;
+
+ spr.content_end_index = spr.content_start_index + length;
+
+ int endstr_index = spr.content_end_index;
+ if (isNewline(pdf, endstr_index))
+ {
+ endstr_index = skipWhitespace(pdf, endstr_index);
+ }
+ assert ByteArrayUtils.compareByteArrays(pdf, endstr_index, PDFNames.ENDSTREAM_STR);
+
+ spr.next_index = endstr_index + PDFNames.ENDSTREAM_STR.length;
+
+ return spr;
+ }
+
+ public static NullParseResult parseNull(final byte[] pdf, final int index)
+ {
+ NullParseResult npr = new NullParseResult();
+ npr.start_index = index;
+
+ assert ByteArrayUtils.compareByteArrays(pdf, npr.start_index, PDFNames.NULL_STR);
+
+ npr.next_index = npr.start_index + PDFNames.NULL_STR.length;
+
+ return npr;
+ }
+
+ public static int getObjectOffsetFromXRefByIndirectObjectReference(
+ XRefSectionParseResult xpr, IndirectObjectReference ior)
+ {
+ Iterator it = xpr.xref_subsections.iterator();
+ while (it.hasNext())
+ {
+ XRefSubSectionParseResult section = (XRefSubSectionParseResult) it.next();
+
+ for (int i = 0; i < section.xref_lines.size(); i++)
+ {
+ if (section.start_obj_number + i == ior.object_number)
+ {
+ XRefLineParseResult lpr = (XRefLineParseResult) section.xref_lines.get(i);
+ return lpr.object_offset;
+ }
+ }
+ }
+
+ return -1;
+ }
+
+ public static HeaderParseResult parseHeader(final byte[] pdf, final int index)
+ {
+ HeaderParseResult hpr = new HeaderParseResult();
+ hpr.start_index = index;
+
+ assert pdf[hpr.start_index] == PDFNames.COMMENT;
+
+ assert ByteArrayUtils.compareByteArrays(pdf, hpr.start_index + 1, PDFNames.PDF_VERSION_STR);
+
+ hpr.major_index = hpr.start_index + 1 + PDFNames.PDF_VERSION_STR.length;
+
+ IntegerParseResult major_ipr = parseUnsignedInteger(pdf, hpr.major_index);
+ hpr.major = major_ipr.number;
+ assert hpr.major >= 1;
+
+ assert pdf[major_ipr.next_index] == PDFNames.PDF_VERSION_SEPARATOR;
+
+ hpr.minor_index = major_ipr.next_index + 1;
+
+ IntegerParseResult minor_ipr = parseUnsignedInteger(pdf, hpr.minor_index);
+ hpr.minor = minor_ipr.number;
+ assert hpr.minor >= 0;
+
+ assert isWhitespace(pdf[minor_ipr.next_index]);
+ hpr.binary_characters_index = skipWhitespace(pdf, minor_ipr.next_index);
+
+ assert pdf[hpr.binary_characters_index] == PDFNames.COMMENT;
+
+ hpr.next_index = skipToNewline(pdf, hpr.binary_characters_index);
+ return hpr;
+ }
+
+ /**
+ * Parses a PDF footer.
+ *
+ * <p>
+ * A PDF footer starts with the xref, followed by the trailer, the startxref
+ * and the EOF marker.
+ * </p>
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ *
+ * @see FooterParseResult
+ */
+ public static FooterParseResult parseFooter(final byte[] pdf, final int index)
+ {
+ FooterParseResult fpr = new FooterParseResult();
+ fpr.start_index = index;
+
+ fpr.xpr = PDFUtils.parseXRefSection(pdf, fpr.start_index);
+
+ fpr.tpr = PDFUtils.parseTrailer(pdf, fpr.xpr.next_index);
+
+ fpr.sxpr = PDFUtils.parseStartXRef(pdf, fpr.tpr.next_index);
+
+ fpr.eofpr = PDFUtils.parseEOF(pdf, fpr.sxpr.next_index);
+
+ fpr.next_index = fpr.eofpr.next_index;
+ return fpr;
+ }
+
+}