aboutsummaryrefslogtreecommitdiff
path: root/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser
diff options
context:
space:
mode:
Diffstat (limited to 'pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser')
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/ByteArrayUtils.java148
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java272
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/IndirectObjectReference.java57
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java184
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java1405
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ArrayParseResult.java42
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/BooleanParseResult.java38
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ContainerParseResult.java45
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/DictionaryParseResult.java41
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/EOFParseResult.java47
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/FooterParseResult.java53
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HeaderParseResult.java48
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HexStringParseResult.java36
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IndirectObjectReferenceParseResult.java44
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IntegerParseResult.java36
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/LiteralStringParseResult.java37
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NameParseResult.java35
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NullParseResult.java34
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NumberParseResult.java41
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectHeaderParseResult.java51
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectParseResult.java50
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ParseResult.java50
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StartXRefParseResult.java36
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StreamParseResult.java41
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/TrailerParseResult.java84
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefLineParseResult.java40
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSectionParseResult.java66
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSubSectionParseResult.java59
28 files changed, 3120 insertions, 0 deletions
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/ByteArrayUtils.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/ByteArrayUtils.java
new file mode 100644
index 0000000..4442650
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/ByteArrayUtils.java
@@ -0,0 +1,148 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: ByteArrayUtils.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser;
+
+import java.io.UnsupportedEncodingException;
+
+/**
+ * Abstract class that contains utility methods for handling byte arrays.
+ *
+ * @author wprinz
+ */
+public abstract class ByteArrayUtils {
+
+ public static final String BYTE_ARRAY_ENCODING = "ISO-8859-1";
+
+ /**
+ * Converts the byte array to a String.
+ *
+ * @param data
+ * The byte array.
+ * @return Returns the String.
+ * @throws UnsupportedEncodingException
+ * Forwarded exception
+ */
+ public static String convertByteArrayToString(final byte[] data) throws UnsupportedEncodingException {
+ return new String(data, BYTE_ARRAY_ENCODING);
+ }
+
+ /**
+ * Finds the first occurance of search in data starting to search from the
+ * given index.
+ *
+ * @param data
+ * The big array.
+ * @param index
+ * The index to start searching from.
+ * @param search
+ * The sought array.
+ * @return Returns the index of the found occurence or -1 if nothing was
+ * found.
+ */
+ public static int indexOf(final byte[] data, final int index, final byte[] search) {
+ for (int i = index; i <= data.length - search.length; i++) {
+ if (compareByteArrays(data, i, search)) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Finds the last occurance of the array.
+ *
+ * @param data
+ * The source array to be searched.
+ * @param search
+ * The sought array.
+ * @return Returns the index of the last occurance - or -1 if nothing was
+ * found.
+ */
+ public static int lastIndexOf(final byte[] data, byte[] search) {
+ for (int index = data.length - search.length; index >= 0; index--) {
+ if (compareByteArrays(data, index, search)) {
+ return index;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Compares the two byte arrays for equality.
+ *
+ * @param data
+ * The source array.
+ * @param index
+ * In index into the source array marking where the comparison should
+ * start.
+ * @param search
+ * The sought array.
+ * @return Returns true if the first search.length bytes of data+index and
+ * search match exactly. Returns false otherwise.
+ */
+ public static boolean compareByteArrays(final byte[] data, final int index, byte[] search) {
+ if (index < 0 || index >= data.length) {
+ throw new IndexOutOfBoundsException("The index " + index + " is out of bounds");
+ }
+
+ if (search.length > data.length) {
+ return false;
+ }
+
+ if (search.length > data.length - index) {
+ return false;
+ }
+
+ for (int i = 0; i < search.length; i++) {
+ if (data[index + i] != search[i]) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * Checks, if the sought data byte is contained within the byte array.
+ *
+ * @param byte_array
+ * The byte array.
+ * @param data
+ * A data byte sought within the byte array.
+ * @return Returns true, if the data byte was found (at least once) in the
+ * byte array, false otherwise.
+ */
+ public static boolean contains(final byte[] byte_array, final byte data) {
+ for (int i = 0; i < byte_array.length; i++) {
+ byte b = byte_array[i];
+ if (b == data) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java
new file mode 100644
index 0000000..fbaa4de
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java
@@ -0,0 +1,272 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: ParseDocument.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import at.knowcenter.wag.exactparser.parsing.PDFUtils;
+import at.knowcenter.wag.exactparser.parsing.results.DictionaryParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.IndirectObjectReferenceParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.NameParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.NumberParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.ObjectParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.StartXRefParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.TrailerParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.XRefSectionParseResult;
+
+
+/**
+ * Test class.
+ * @author wprinz
+ */
+public class ParseDocument
+{
+
+ public static final String DOCUMENT = "C:/wprinz/temp.pdf";
+
+ public static final byte[] EGIZ_DICT_NAME = { 'E', 'G', 'I', 'Z', 'S', 'i',
+ 'g', 'D', 'i', 'c', 't' };
+
+ public static final byte[] EGIZ_ODS_NAME = { 'O', 'D', 'S' };
+
+ public static final byte[] EGIZ_XOBJ_NAME = { 'S', 'i', 'g', 'X', 'O', 'b',
+ 'j', 'e', 'c', 't' };
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args)
+ {
+
+ try
+ {
+ File in = new File(DOCUMENT);
+ FileInputStream fis = new FileInputStream(in);
+ byte[] pdf = new byte[(int) in.length()];
+ fis.read(pdf);
+ fis.close();
+ fis = null;
+
+ List blocks = parseDocument(pdf);
+
+ Iterator it = blocks.iterator();
+ while (it.hasNext())
+ {
+ FooterParseResult bpr = (FooterParseResult) it.next();
+
+ System.out.print("block from " + bpr.start_index + " to " + bpr.next_index);
+
+ if (bpr.tpr.root != null)
+ {
+ int root_index = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(bpr.xpr, bpr.tpr.root.ior);
+ ObjectParseResult root_opr = PDFUtils.parseObject(pdf, root_index);
+ DictionaryParseResult root_dpr = (DictionaryParseResult) root_opr.object;
+
+ int egiz_index = PDFUtils.indexOfName(pdf, root_dpr.names, EGIZ_DICT_NAME);
+ if (egiz_index >= 0)
+ {
+ System.out.print(" == EGIZDict");
+ }
+ }
+
+ System.out.println();
+ }
+
+ }
+ catch (IOException e)
+ {
+ e.printStackTrace();
+ }
+ }
+
+ public static List parseDocument(final byte[] pdf) throws IOException
+ {
+ //HeaderParseResult hpr = PDFUtils.parseHeader(pdf, 0);
+ //System.out.println("PDF-version = " + hpr.major + "." + hpr.minor);
+
+ List blocks = new ArrayList();
+
+ int last_start_xref = PDFUtils.findLastStartXRef(pdf);
+ StartXRefParseResult last_sxpr = PDFUtils.parseStartXRef(pdf, last_start_xref);
+ int xref_index = last_sxpr.xref_index;
+
+ for (;;)
+ {
+ FooterParseResult fpr = PDFUtils.parseFooter(pdf, xref_index);
+ blocks.add(0, fpr);
+
+ //System.out.println("tpr.has_predecessor = " + fpr.tpr.has_predecessor);
+ if (!fpr.tpr.has_predecessor)
+ {
+ // eventually parse the PDF header here.
+ break;
+ }
+
+ //System.out.println("tpr.prev = " + fpr.tpr.getPrev());
+
+ xref_index = fpr.tpr.getPrev();
+ }
+
+ return blocks;
+ }
+
+ // public static void parseEGIZ()
+ // {
+ //
+ // int root_index =
+ // PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(bpr.xpr,
+ // bpr.tpr.root.ior);
+ // ObjectParseResult root_opr = PDFUtils.parseObject(pdf, root_index);
+ // DictionaryParseResult root_dpr = (DictionaryParseResult) root_opr.object;
+ //
+ // int egiz_index = PDFUtils.indexOfName(pdf, root_dpr.names, EGIZ_DICT_NAME);
+ // if (egiz_index >= 0)
+ // {
+ // IndirectObjectReferenceParseResult egiz_iorpr =
+ // (IndirectObjectReferenceParseResult) root_dpr.values.get(egiz_index);
+ // System.out.println("EGIZ signature info at = " + egiz_iorpr);
+ //
+ // int egiz_dict_index =
+ // PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(bpr.xpr,
+ // egiz_iorpr.ior);
+ // ObjectParseResult opr = PDFUtils.parseObject(pdf, egiz_dict_index);
+ // DictionaryParseResult egiz_dict = (DictionaryParseResult) opr.object;
+ //
+ // for (int i = 0; i < egiz_dict.names.size(); i++)
+ // {
+ // NameParseResult npr = egiz_dict.names.get(i);
+ // int len = npr.next_index - npr.name_start_index;
+ // byte[] name = new byte[len];
+ // System.arraycopy(pdf, npr.name_start_index, name, 0, len);
+ // System.out.print(" " + new String(name, "US-ASCII") + " = ");
+ //
+ // System.out.println(egiz_dict.values.get(i));
+ // }
+ //
+ // // int key = PDFUtils.indexOfName(pdf, egiz_dict.names, new byte [] { 'K',
+ // // 'e', 'y'});
+ // // IndirectObjectReferenceParseResult key_iorpr =
+ // // (IndirectObjectReferenceParseResult) egiz_dict.values.get(key);
+ // // int key_offset =
+ // // PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(xpr,
+ // // key_iorpr.ior);
+ // // ObjectParseResult key_opr = PDFUtils.parseObject(pdf, key_offset);
+ // // StreamParseResult spr = (StreamParseResult) key_opr.object;
+ // // System.out.println(" key stream from " + spr.content_start_index + " to
+ // // " + spr.content_end_index);
+ // //
+ // // int data_len = spr.content_end_index - spr.content_start_index;
+ // // byte [] data = new byte[data_len];
+ // // System.arraycopy(pdf, spr.content_start_index, data, 0, data_len);
+ // // System.out.println(new String(data, "US-ASCII"));
+ //
+ // }
+ // else
+ // {
+ // System.out.println("No EGIZ block found.");
+ // }
+ //
+ // }
+
+ public static byte[] getOriginalDocument(final File file_name) throws IOException
+ {
+ FileInputStream fis = new FileInputStream(file_name);
+ byte[] pdf = new byte[(int) file_name.length()];
+ fis.read(pdf);
+ fis.close();
+ fis = null;
+
+ int last_start_xref = PDFUtils.findLastStartXRef(pdf);
+
+ StartXRefParseResult sxpr = PDFUtils.parseStartXRef(pdf, last_start_xref);
+
+ XRefSectionParseResult xpr = PDFUtils.parseXRefSection(pdf, sxpr.xref_index);
+
+ TrailerParseResult tpr = PDFUtils.parseTrailer(pdf, xpr.next_index);
+
+ System.out.println("tpr.info = " + tpr.info);
+ System.out.println("tpr.root = " + tpr.root);
+ System.out.println("tpr.size = " + tpr.size);
+
+ System.out.println("tpr.has_predecessor = " + tpr.has_predecessor);
+ if (tpr.has_predecessor)
+ {
+ System.out.println("tpr.prev = " + tpr.getPrev());
+ }
+
+ int root_index = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(xpr, tpr.root.ior);
+ ObjectParseResult root_opr = PDFUtils.parseObject(pdf, root_index);
+ DictionaryParseResult root_dpr = (DictionaryParseResult) root_opr.object;
+
+ byte[] EGIZ_TYPE = new String("EGIZSigDict").getBytes("US-ASCII");
+ int egiz_index = PDFUtils.indexOfName(pdf, root_dpr.names, EGIZ_TYPE);
+ if (egiz_index >= 0)
+ {
+ System.out.println("The document is EGIZ-signed. ==> extract original document");
+
+ IndirectObjectReferenceParseResult egiz_iorpr = (IndirectObjectReferenceParseResult) root_dpr.values.get(egiz_index);
+ System.out.println("EGIZ signature info at = " + egiz_iorpr);
+
+ int egiz_dict_index = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(xpr, egiz_iorpr.ior);
+ ObjectParseResult opr = PDFUtils.parseObject(pdf, egiz_dict_index);
+ DictionaryParseResult egiz_dict = (DictionaryParseResult) opr.object;
+
+ for (int i = 0; i < egiz_dict.names.size(); i++)
+ {
+ NameParseResult npr = (NameParseResult) egiz_dict.names.get(i);
+ int len = npr.next_index - npr.name_start_index;
+ byte[] name = new byte[len];
+ System.arraycopy(pdf, npr.name_start_index, name, 0, len);
+ System.out.print(" " + new String(name, "US-ASCII") + " = ");
+
+ System.out.println(egiz_dict.values.get(i));
+ }
+
+ // Original document size
+ int key = PDFUtils.indexOfName(pdf, egiz_dict.names, new byte[] { 'O',
+ 'D', 'S' });
+ NumberParseResult ods = (NumberParseResult) egiz_dict.values.get(key);
+
+ int original_document_size = ods.number;
+ System.out.println("Original Document Size = " + original_document_size);
+
+ byte[] original = new byte[original_document_size];
+ System.arraycopy(pdf, 0, original, 0, original_document_size);
+
+ return original;
+ }
+
+ System.out.println("No EGIZ block found. ==> the whold document is the original document");
+ return pdf;
+ }
+
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/IndirectObjectReference.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/IndirectObjectReference.java
new file mode 100644
index 0000000..2bfdf56
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/IndirectObjectReference.java
@@ -0,0 +1,57 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: IndirectObjectReference.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing;
+
+/**
+ * The IndirectObjectReference class completely holds a so called object
+ * identifier of an indirect object.
+ *
+ * <p>
+ * An indirect object is an object not contained within another object. In
+ * accordance, a direct object is structurally part of another object. For
+ * example, a direct String object that is the value of some key in a dictionary
+ * object.
+ * </p>
+ * <p>
+ * An object identifier uniquely identifies a specific indirect object by the
+ * object number and the generation number. In PDF such an object identifier may
+ * be used to reference to the object.
+ * </p>
+ *
+ * @author wprinz
+ */
+public class IndirectObjectReference {
+
+ public int object_number;
+
+ public int generation_number;
+
+ //@Override
+ public String toString() {
+ return object_number + " " + generation_number;
+ }
+
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java
new file mode 100644
index 0000000..0ee5863
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java
@@ -0,0 +1,184 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: PDFNames.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing;
+
+/**
+ * Abstract class that contains several frequently used PDF constants.
+ *
+ * <p>
+ * The PDF specification partitions the character set (ASCII) into three groups:
+ * </p>
+ * <ul>
+ * <li>Whitespace characters (space, tab, etc., but also newline and carriage
+ * return) used to separate tokens. Unless otherwise specified a group of
+ * consecutive whitespace characters behaves like a single whitespace character.</li>
+ * <li>Delimiter characters ('(', '<', etc., but also '/', which precedes the
+ * PDF Key Names in dictionaries) that are used to encircle semantic groups.
+ * <li>Regular characters are per definition the rest characters that are
+ * neither whitespaces nor delimiters.</li>
+ * </ul>
+ * <p>
+ * Newlines consist per default of CR and LF, but also LF and even CR alone are
+ * allowed. It seems that all variations of newlines may exist within a single
+ * document.
+ * </p>
+ *
+ * @author wprinz
+ */
+public abstract class PDFNames
+{
+
+ /**
+ * The standard encoding of PDF tokens and names.
+ *
+ * <p>
+ * PDF is usually an 8 bit format. Binary data etc. can be saves just as it
+ * is. Nevertheless all PDF tokens ('xref', 'obj', etc.) and PDF Names
+ * ('/Size', '/Pages', '/Type', etc.) must be in 7 bit ASCII US encoding.
+ * </p>
+ * <p>
+ * Therefor, whenever using Java Strings to convert e.g. numbers to such PDF
+ * tokens use this encoding constant.
+ * </p>
+ * <p>
+ * The same applies for PDF token/name byte arrays that are retransfromed to
+ * Java Strings.
+ * </p>
+ */
+ public static final String PDF_STANDARD_ENCODING = "US-ASCII";
+
+ // Whitespace characters
+
+ // TABLE 3.1 White-space characters
+ // DECIMAL HEXADECIMAL OCTAL NAME
+ // 0 00 000 Null (NUL)
+ // 9 09 011 Tab (HT)
+ // 10 0A 012 Line feed (LF)
+ // 12 0C 014 Form feed (FF)
+ // 13 0D 015 Carriage return
+ // 32 20 040 Space (SP)
+
+ public static final byte WHITESPACE_NUL = 0x00;
+
+ public static final byte WHITESPACE_HT = 0x09;
+
+ public static final byte WHITESPACE_LF = 0x0A;
+
+ public static final byte WHITESPACE_FF = 0x0C;
+
+ public static final byte WHITESPACE_CR = 0x0D;
+
+ public static final byte WHITESPACE_SP = 0x20;
+
+ public static final byte[] WHITESPACE_CHARACTERS = { WHITESPACE_NUL,
+ WHITESPACE_HT, WHITESPACE_LF, WHITESPACE_FF, WHITESPACE_CR, WHITESPACE_SP };
+
+ // comment character
+
+ public static final byte COMMENT = '%';
+
+ // PDF-version
+
+ public static final byte[] PDF_VERSION_STR = { 'P', 'D', 'F', '-' };
+
+ public static final byte PDF_VERSION_SEPARATOR = '.';
+
+ // delimiter characters
+
+ public static final byte DELIMITER_STRING_OPEN = '(';
+
+ public static final byte DELIMITER_STRING_CLOSE = ')';
+
+ public static final byte DELIMITER_HEXSTRING_OPEN = '<';
+
+ public static final byte DELIMITER_HEXSTRING_CLOSE = '>';
+
+ public static final byte DELIMITER_ARRAY_OPEN = '[';
+
+ public static final byte DELIMITER_ARRAY_CLOSE = ']';
+
+ public static final byte DELIMITER_CURLY_OPEN = '{';
+
+ public static final byte DELIMITER_CURLY_CLOSE = '}';
+
+ public static final byte DELIMITER_NAME = '/';
+
+ public static final byte[] DELIMITER_CHARACTERS = { DELIMITER_STRING_OPEN,
+ DELIMITER_STRING_CLOSE, DELIMITER_HEXSTRING_OPEN,
+ DELIMITER_HEXSTRING_CLOSE, DELIMITER_ARRAY_OPEN, DELIMITER_ARRAY_CLOSE,
+ DELIMITER_CURLY_OPEN, DELIMITER_CURLY_CLOSE, DELIMITER_NAME };
+
+ // Footer
+
+ public static final byte[] XREF_STR = { 'x', 'r', 'e', 'f' };
+
+ public static final byte[] TRAILER_STR = { 't', 'r', 'a', 'i', 'l', 'e', 'r' };
+
+ public static final byte[] STARTXREF_STR = { 's', 't', 'a', 'r', 't', 'x',
+ 'r', 'e', 'f' };
+
+ public static final byte[] EOF_STR = { '%', '%', 'E', 'O', 'F' };
+
+ // objects
+
+ public static final byte[] OBJ_STR = { 'o', 'b', 'j' };
+
+ public static final byte[] ENDOBJ_STR = { 'e', 'n', 'd', 'o', 'b', 'j' };
+
+ public static final byte[] DICT_START_STR = { DELIMITER_HEXSTRING_OPEN,
+ DELIMITER_HEXSTRING_OPEN };
+
+ public static final byte[] DICT_END_STR = { DELIMITER_HEXSTRING_CLOSE,
+ DELIMITER_HEXSTRING_CLOSE };
+
+ public static final byte[] STREAM_STR = { 's', 't', 'r', 'e', 'a', 'm' };
+
+ public static final byte[] ENDSTREAM_STR = { 'e', 'n', 'd', 's', 't', 'r',
+ 'e', 'a', 'm' };
+
+ public static final byte[] NULL_STR = { 'n', 'u', 'l', 'l' };
+
+ public static final byte[] TRUE_STR = { 't', 'r', 'u', 'e' };
+
+ public static final byte[] FALSE_STR = { 'f', 'a', 'l', 's', 'e' };
+
+ // indirect object references
+
+ public static final byte[] REFERENCE_STR = { 'R' };
+
+ // Dictionary keys
+
+ public static final byte[] SIZE_STR = { 'S', 'i', 'z', 'e' };
+
+ public static final byte[] PREV_STR = { 'P', 'r', 'e', 'v' };
+
+ public static final byte[] ROOT_STR = { 'R', 'o', 'o', 't' };
+
+ public static final byte[] INFO_STR = { 'I', 'n', 'f', 'o' };
+
+ public static final byte[] LENGTH_STR = { 'L', 'e', 'n', 'g', 't', 'h' };
+
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java
new file mode 100644
index 0000000..de356c9
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java
@@ -0,0 +1,1405 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: PDFUtils.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.log4j.Logger;
+
+import at.knowcenter.wag.exactparser.ByteArrayUtils;
+import at.knowcenter.wag.exactparser.parsing.results.ArrayParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.BooleanParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.DictionaryParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.EOFParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.HeaderParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.HexStringParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.IndirectObjectReferenceParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.IntegerParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.LiteralStringParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.NameParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.NullParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.NumberParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.ObjectHeaderParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.ObjectParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.ParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.StartXRefParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.StreamParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.TrailerParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.XRefLineParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.XRefSectionParseResult;
+import at.knowcenter.wag.exactparser.parsing.results.XRefSubSectionParseResult;
+
+
+
+/**
+ * Abstract class that contains several static utility methods for parsing and
+ * analyzing PDF documents on the lowest level.
+ *
+ * <p>
+ * Most operations require random access to the PDF data (mostly to verify the
+ * synthax). So the whole PDF document has to be provided as a byte array. The
+ * term "pdf+index" states a specific position index within this byte array.
+ * </p>
+ *
+ * @author wprinz
+ *
+ */
+public abstract class PDFUtils
+{
+ private static Logger log = Logger.getLogger(PDFUtils.class);
+
+ public static boolean isWhitespace(final byte data)
+ {
+ return ByteArrayUtils.contains(PDFNames.WHITESPACE_CHARACTERS, data);
+ }
+
+ public static boolean isDelimiter(final byte data)
+ {
+ return ByteArrayUtils.contains(PDFNames.DELIMITER_CHARACTERS, data);
+ }
+
+ protected static boolean isRegular(final byte data)
+ {
+ return !(isWhitespace(data) || isDelimiter(data));
+ }
+
+ /**
+ * Skips whitespace.
+ *
+ * <p>
+ * Skips all whitespace, which may be none, one or multiple whitespace
+ * characters.
+ * </p>
+ * <p>
+ * Note that this also skips newline characters (which belong to whitespace as
+ * well).
+ * </p>
+ *
+ * @param data
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the index of the first non whitespace character. This may
+ * be equal to index if no whitespaces were skipped at all.
+ */
+ public static int skipWhitespace(final byte[] data, final int index)
+ {
+ int non_whitespace_index = index;
+ while (isWhitespace(data[non_whitespace_index]))
+ {
+ non_whitespace_index++;
+ }
+ return non_whitespace_index;
+ }
+
+ /**
+ * Skips bytes until whitespace is reached.
+ *
+ * <p>
+ * Skips all non whitespace characters, which may be none at all.
+ * </p>
+ *
+ * @param data
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the index of the first whitespace character. This may be
+ * equal to index if no non whitespaces were skipped at all.
+ */
+ public static int skipToWhitespace(final byte[] data, final int index)
+ {
+ int whitespace_index = index;
+ while (!isWhitespace(data[whitespace_index]))
+ {
+ whitespace_index++;
+ }
+ return whitespace_index;
+ }
+
+ protected static final byte[] LINE_TERMINATOR_CRLF = {
+ PDFNames.WHITESPACE_CR, PDFNames.WHITESPACE_LF };
+
+ protected static final byte[] LINE_TERMINATOR_CRALONE = { PDFNames.WHITESPACE_CR };
+
+ protected static final byte[] LINE_TERMINATOR_LF = { PDFNames.WHITESPACE_LF };
+
+ public static boolean isNewline(final byte[] data, final int index)
+ {
+ if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_LF))
+ {
+ return true;
+ }
+ if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRLF))
+ {
+ return true;
+ }
+ // although not specified by PDF, some applications use the CR alone as line
+ // terminator
+ if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRALONE))
+ {
+ return true;
+ }
+ return false;
+ }
+
+ public static int skipNewline(final byte[] data, final int index)
+ {
+ if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_LF))
+ {
+ return index + LINE_TERMINATOR_LF.length;
+ }
+ if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRLF))
+ {
+ return index + LINE_TERMINATOR_CRLF.length;
+ }
+ // although not specified by PDF, some applications use the CR alone as line
+ // terminator
+ if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRALONE))
+ {
+ return index + LINE_TERMINATOR_CRALONE.length;
+ }
+
+ assert false : "don't call this if you don't expect a newline - call skipWhitespace instead";
+ return index;
+ }
+
+ public static int skipToNewline(final byte[] data, final int index)
+ {
+ int current_index = index;
+ for (;;)
+ {
+ if (ByteArrayUtils.compareByteArrays(data, current_index, LINE_TERMINATOR_LF))
+ {
+ return current_index + LINE_TERMINATOR_LF.length;
+ }
+ if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRLF))
+ {
+ return index + LINE_TERMINATOR_CRLF.length;
+ }
+ // although not specified by PDF, some applications use the CR alone as
+ // line terminator
+ if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRALONE))
+ {
+ return index + LINE_TERMINATOR_CRALONE.length;
+ }
+ current_index++;
+ }
+ }
+
+ /**
+ * Parses a boolean value.
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static BooleanParseResult parseBoolean(final byte[] pdf,
+ final int index)
+ {
+ BooleanParseResult bpr = new BooleanParseResult();
+ bpr.start_index = index;
+
+ if (ByteArrayUtils.compareByteArrays(pdf, bpr.start_index, PDFNames.TRUE_STR))
+ {
+ bpr.value = true;
+ bpr.next_index = bpr.start_index + PDFNames.TRUE_STR.length;
+
+ return bpr;
+ }
+ if (ByteArrayUtils.compareByteArrays(pdf, bpr.start_index, PDFNames.FALSE_STR))
+ {
+ bpr.value = false;
+ bpr.next_index = bpr.start_index + PDFNames.FALSE_STR.length;
+
+ return bpr;
+ }
+
+ throw new RuntimeException("Boolean couldn't be parsed at index " + index);
+ }
+
+ public static boolean isSign(final byte data)
+ {
+ return data == '+' || data == '-';
+ }
+
+ public static boolean isNumeric(final byte data)
+ {
+ return '0' <= data && data <= '9';
+ }
+
+ /**
+ * Reads the (positive integer) number from the data. The number must be
+ * terminated by the end of line.
+ *
+ * @param data
+ * The data.
+ * @param index
+ * The index.
+ * @return Returns the read number.
+ */
+ public static int readNumberFromByteArray(final byte[] data, final int index)
+ {
+ NumberParseResult npr = parseNumberFromByteArray(data, index);
+
+ assert npr.number >= 0;
+ return npr.number;
+ }
+
+ /**
+ * Parses an unsigned integer.
+ *
+ * <p>
+ * The integer must be a block of successive number characters. It must not be
+ * preceded by a sign (not even '+').
+ * </p>
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static IntegerParseResult parseUnsignedInteger(final byte[] pdf,
+ final int index)
+ {
+ assert isNumeric(pdf[index]);
+
+ String number = "";
+
+ int cur_index = index;
+ while (isNumeric(pdf[cur_index]))
+ {
+
+ number += (char) pdf[cur_index];
+
+ cur_index++;
+ }
+
+ // TODO: make better
+ int int_value = Integer.parseInt(number);
+
+ assert int_value >= 0;
+
+ IntegerParseResult ipr = new IntegerParseResult();
+ ipr.start_index = index;
+ ipr.next_index = cur_index;
+ ipr.number = int_value;
+ return ipr;
+ }
+
+ /**
+ * Parses a (potentially) signed integer.
+ *
+ * <p>
+ * The integer must be a block of successive number characters. It may be
+ * preceded by a sign character ('+' or '-').
+ * </p>
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static IntegerParseResult parseInteger(final byte[] pdf,
+ final int index)
+ {
+ assert isSign(pdf[index]) || isNumeric(pdf[index]);
+
+ int sign = +1;
+ int number_start = index;
+ if (pdf[index] == '+')
+ {
+ sign = +1;
+ number_start++;
+ }
+ else
+ {
+ if (pdf[index] == '-')
+ {
+ sign = -1;
+ number_start++;
+ }
+ else
+ {
+ assert isNumeric(pdf[index]);
+ }
+ }
+
+ IntegerParseResult ipr = parseUnsignedInteger(pdf, number_start);
+ ipr.start_index = index;
+ ipr.number *= sign;
+ return ipr;
+ }
+
+ /**
+ * Parses an arbitrary number;
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static NumberParseResult parseNumberFromByteArray(final byte[] pdf,
+ int index)
+ {
+ String number = "";
+
+ assert isSign(pdf[index]) || isNumeric(pdf[index]);
+
+ int sign = +1;
+ if (pdf[index] == '+')
+ {
+ sign = +1;
+ index++;
+ }
+ else
+ {
+ if (pdf[index] == '-')
+ {
+ sign = -1;
+ index++;
+ }
+ else
+ {
+ assert isNumeric(pdf[index]);
+ }
+ }
+
+ while (isNumeric(pdf[index]) || pdf[index] == '.')
+ {
+
+ char digit = (char) pdf[index];
+ number += digit;
+
+ index++;
+ }
+
+ NumberParseResult npr = new NumberParseResult();
+ npr.next_index = index;
+ // TODO: make better
+ try
+ {
+ npr.number = Integer.parseInt(number) * sign;
+ }
+ catch (NumberFormatException e)
+ {
+ npr.floating = Float.parseFloat(number) * sign;
+ }
+
+ return npr;
+ }
+
+ /**
+ * Searches the last occurrence of the "startxref" entry ... in other words
+ * starts the search from the end of the document and works reversely.
+ *
+ * @param pdf
+ * The complete PDF file data.
+ * @return Returns the offset (byte index) of the "startxref" entry.
+ */
+ public static int findLastStartXRef(final byte[] pdf)
+ {
+ return ByteArrayUtils.lastIndexOf(pdf, PDFNames.STARTXREF_STR);
+ }
+
+ /**
+ * Parses the xref section at pdf+index.
+ *
+ * <p>
+ * An xref section starts with 'xref' and contains one or more xref
+ * sub-sections.
+ * </p>
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The start index of the xref table.
+ * @return Returns the result of the parsing operation.
+ */
+ public static XRefSectionParseResult parseXRefSection(final byte[] pdf,
+ final int index)
+ {
+ at.knowcenter.wag.exactparser.parsing.results.XRefSectionParseResult xpr = new XRefSectionParseResult();
+ xpr.start_index = index;
+
+ assert ByteArrayUtils.compareByteArrays(pdf, xpr.start_index, PDFNames.XREF_STR);
+ assert isNewline(pdf, xpr.start_index + PDFNames.XREF_STR.length);
+
+ int cur_index = skipWhitespace(pdf, xpr.start_index + PDFNames.XREF_STR.length);
+ // skipNewline(pdf, xpr.start_index + PDFNames.XREF_STR.length);
+
+ for (;;)
+ {
+ // trailer ends the xref section.
+ if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.TRAILER_STR))
+ {
+ break;
+ }
+
+ // no trailer ==> another xref section
+
+ XRefSubSectionParseResult sspr = parseXRefSubSection(pdf, cur_index);
+ xpr.appendXRefSubSection(sspr);
+
+ cur_index = sspr.next_index;
+ }
+
+ xpr.next_index = cur_index;
+ assert ByteArrayUtils.compareByteArrays(pdf, xpr.next_index, PDFNames.TRAILER_STR);
+
+ return xpr;
+ }
+
+ /**
+ * Parses a xref sub-section.
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static XRefSubSectionParseResult parseXRefSubSection(final byte[] pdf,
+ final int index)
+ {
+ XRefSubSectionParseResult sspr = new XRefSubSectionParseResult();
+ sspr.start_index = index;
+
+ NumberParseResult start_obj_num_npr = parseNumberFromByteArray(pdf, sspr.start_index);
+ sspr.start_obj_number = start_obj_num_npr.number;
+ assert sspr.start_obj_number >= 0;
+
+ assert isWhitespace(pdf[start_obj_num_npr.next_index]);
+ int num_obj_index = skipWhitespace(pdf, start_obj_num_npr.next_index);
+
+ NumberParseResult num_obj_npr = parseNumberFromByteArray(pdf, num_obj_index);
+ sspr.num_objects = num_obj_npr.number;
+
+ // assert isNewline(pdf, num_obj_npr.next_index);
+ assert isWhitespace(pdf[num_obj_npr.next_index]);
+ int start_of_line = skipWhitespace(pdf, num_obj_npr.next_index);
+ // skipNewline(pdf, num_obj_npr.next_index);
+
+ for (int i = 0; i < sspr.num_objects; i++)
+ {
+ final int cur_object_number = sspr.start_obj_number + i;
+
+ XRefLineParseResult lpr = parseXrefLine(pdf, start_of_line);
+ sspr.appendXRefLine(lpr);
+
+ // System.out.println("xref line of object " + (oc.start_obj_number + i) +
+ // " at " + lpr.start_index + ": " + lpr.object_offset + " " +
+ // lpr.generation_number + " " + (char) lpr.object_usage);
+
+ if (lpr.object_usage == 'n')
+ {
+ // check the line - this simple check may make problems with object
+ // streams and xref streams
+ ObjectHeaderParseResult ohpr = parseObjectHeader(pdf, lpr.object_offset);
+ assert ohpr.object_number == cur_object_number;
+ assert ohpr.generation_number == lpr.generation_number;
+ }
+
+ start_of_line = lpr.next_index;
+ }
+
+ sspr.next_index = start_of_line;
+ return sspr;
+ }
+
+ /**
+ * Parses a single 20 bytes xref line at pdf+index.
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static XRefLineParseResult parseXrefLine(final byte[] pdf,
+ final int index)
+ {
+ XRefLineParseResult lpr = new XRefLineParseResult();
+
+ lpr.start_index = index;
+
+ IntegerParseResult object_offset_ipr = parseUnsignedInteger(pdf, lpr.start_index);
+ lpr.object_offset = object_offset_ipr.number;
+ assert lpr.object_offset >= 0;
+ assert lpr.object_offset < pdf.length;
+ assert object_offset_ipr.next_index == lpr.start_index + 10;
+
+ assert pdf[object_offset_ipr.next_index] == PDFNames.WHITESPACE_SP; // Standard
+ // explicitely
+ // says 1
+ // single
+ // SPACE
+ int generation_number_index = object_offset_ipr.next_index + 1;
+
+ IntegerParseResult generation_number_ipr = parseUnsignedInteger(pdf, generation_number_index);
+ lpr.generation_number = generation_number_ipr.number;
+ assert generation_number_ipr.next_index == lpr.start_index + 16;
+
+ assert pdf[generation_number_ipr.next_index] == PDFNames.WHITESPACE_SP;
+ int usage_index = generation_number_ipr.next_index + 1;
+
+ lpr.object_usage = pdf[usage_index];
+ assert lpr.object_usage == 'n' || lpr.object_usage == 'f';
+
+ if (pdf[usage_index + 1] == PDFNames.WHITESPACE_SP)
+ {
+ assert pdf[usage_index + 2] == PDFNames.WHITESPACE_CR || pdf[usage_index + 2] == PDFNames.WHITESPACE_LF;
+ }
+ else
+ {
+ assert pdf[usage_index + 1] == PDFNames.WHITESPACE_CR;
+ assert pdf[usage_index + 2] == PDFNames.WHITESPACE_LF;
+ }
+
+ lpr.next_index = usage_index + 3;
+
+ assert lpr.next_index == lpr.start_index + 20;
+
+ return lpr;
+ }
+
+ public static int indexOfName(final byte[] pdf, List names,
+ byte[] sought)
+ {
+ for (int i = 0; i < names.size(); i++)
+ {
+ NameParseResult name = (NameParseResult) names.get(i);
+ if (ByteArrayUtils.compareByteArrays(pdf, name.name_start_index, sought))
+ {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ public static TrailerParseResult parseTrailer(final byte[] pdf,
+ final int index)
+ {
+ TrailerParseResult tpr = new TrailerParseResult();
+ tpr.start_index = index;
+ tpr.has_predecessor = false;
+
+ assert ByteArrayUtils.compareByteArrays(pdf, tpr.start_index, PDFNames.TRAILER_STR);
+
+ // assert isWhitespace(pdf[tpr.start_index + PDFNames.TRAILER_STR.length]);
+ tpr.contents_index = skipWhitespace(pdf, tpr.start_index + PDFNames.TRAILER_STR.length);
+
+ int trailer_dict_index = skipWhitespace(pdf, tpr.contents_index);
+
+ assert ByteArrayUtils.compareByteArrays(pdf, trailer_dict_index, PDFNames.DICT_START_STR);
+
+ tpr.dpr = parseDictionary(pdf, trailer_dict_index);
+
+ int cur_index = tpr.dpr.next_index;
+
+ int info_index = indexOfName(pdf, tpr.dpr.names, PDFNames.INFO_STR);
+ if (info_index >= 0)
+ {
+ tpr.info = (IndirectObjectReferenceParseResult) tpr.dpr.values.get(info_index);
+ }
+
+ int root_index = indexOfName(pdf, tpr.dpr.names, PDFNames.ROOT_STR);
+ if (root_index >= 0)
+ {
+ tpr.root = (IndirectObjectReferenceParseResult) tpr.dpr.values.get(root_index);
+ }
+
+ tpr.size = ((NumberParseResult) tpr.dpr.values.get(indexOfName(pdf, tpr.dpr.names, PDFNames.SIZE_STR))).number;
+
+ int prev_index = indexOfName(pdf, tpr.dpr.names, PDFNames.PREV_STR);
+ if (prev_index >= 0)
+ {
+ tpr.has_predecessor = true;
+ tpr.setPrev(((NumberParseResult) tpr.dpr.values.get(prev_index)).number);
+ }
+
+ //
+ // int cur_index = skipWhitespace(pdf, trailer_dict_index +
+ // PDFNames.DICT_START_STR.length);
+ // for (;;) {
+ // if (ByteArrayUtils.compareByteArrays(pdf, cur_index,
+ // PDFNames.DICT_END_STR)) {
+ // cur_index += PDFNames.DICT_END_STR.length;
+ // break;
+ // }
+ //
+ // assert pdf[cur_index] == PDFNames.DELIMITER_NAME;
+ // cur_index++;
+ //
+ // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.INFO_STR))
+ // {
+ // assert isWhitespace(pdf[cur_index + PDFNames.INFO_STR.length]);
+ // int ir_index = skipWhitespace(pdf, cur_index + PDFNames.INFO_STR.length);
+ //
+ // IndirectObjectReferenceParseResult iorpr =
+ // parseIndirectObjectReference(pdf, ir_index);
+ // tpr.info = iorpr;
+ //
+ // cur_index = skipWhitespace(pdf, iorpr.next_index);
+ // continue;
+ // }
+ //
+ // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.ROOT_STR))
+ // {
+ // assert isWhitespace(pdf[cur_index + PDFNames.ROOT_STR.length]);
+ // int ir_index = skipWhitespace(pdf, cur_index + PDFNames.ROOT_STR.length);
+ //
+ // IndirectObjectReferenceParseResult iorpr =
+ // parseIndirectObjectReference(pdf, ir_index);
+ // tpr.root = iorpr;
+ //
+ // cur_index = skipWhitespace(pdf, iorpr.next_index);
+ // continue;
+ // }
+ //
+ // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.SIZE_STR))
+ // {
+ // assert isWhitespace(pdf[cur_index + PDFNames.SIZE_STR.length]);
+ // int size_index = skipWhitespace(pdf, cur_index +
+ // PDFNames.SIZE_STR.length);
+ //
+ // NumberParseResult npr = parseNumberFromByteArray(pdf, size_index);
+ // tpr.size = npr.number;
+ // assert tpr.size > 0;
+ //
+ // cur_index = skipWhitespace(pdf, npr.next_index);
+ // continue;
+ // }
+ //
+ // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.PREV_STR))
+ // {
+ // assert isWhitespace(pdf[cur_index + PDFNames.PREV_STR.length]);
+ // int prev_index = skipWhitespace(pdf, cur_index +
+ // PDFNames.PREV_STR.length);
+ //
+ // NumberParseResult npr = parseNumberFromByteArray(pdf, prev_index);
+ // tpr.has_predecessor = true;
+ // tpr.setPrev(npr.number);
+ // assert tpr.getPrev() >= 0;
+ // assert tpr.getPrev() < pdf.length;
+ //
+ // assert ByteArrayUtils.compareByteArrays(pdf, tpr.getPrev(),
+ // PDFNames.XREF_STR);
+ //
+ // cur_index = skipWhitespace(pdf, npr.next_index);
+ // continue;
+ // }
+ //
+ // // unrecognized type
+ // // skip to next delimiter
+ // // TODO: this will not work with nested dicts. - already deprecated
+ // while (pdf[cur_index] != PDFNames.DELIMITER_NAME) {
+ // cur_index++;
+ // }
+ // }
+
+ tpr.contents_end_index = cur_index;
+ tpr.next_index = skipWhitespace(pdf, tpr.contents_end_index);
+
+ assert ByteArrayUtils.compareByteArrays(pdf, tpr.next_index, PDFNames.STARTXREF_STR);
+ return tpr;
+ }
+
+ /**
+ * Parses the startxref section at pdf+index.
+ *
+ * @param pdf
+ * The complete PDF file data.
+ * @param index
+ * The index of the startxref section.
+ * @return Returns the retsult of the parsing operation.
+ */
+ public static StartXRefParseResult parseStartXRef(final byte[] pdf,
+ final int index)
+ {
+ StartXRefParseResult spr = new StartXRefParseResult();
+ spr.next_index = index;
+
+ assert ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.STARTXREF_STR);
+ assert isNewline(pdf, index + PDFNames.STARTXREF_STR.length);
+
+ int index_of_number = skipWhitespace(pdf, index + PDFNames.STARTXREF_STR.length);
+ // skipNewline(pdf, index + PDFNames.STARTXREF_STR.length);
+ NumberParseResult npr = parseNumberFromByteArray(pdf, index_of_number);
+ spr.xref_index = npr.number;
+
+ assert isNewline(pdf, npr.next_index);
+ spr.next_index = skipWhitespace(pdf, npr.next_index);
+ // skipNewline(pdf, npr.next_index);
+
+ assert ByteArrayUtils.compareByteArrays(pdf, spr.next_index, PDFNames.EOF_STR);
+
+ assert spr.xref_index >= 0;
+ assert spr.xref_index < pdf.length;
+
+ // A linearized document sets the startxref value of the first page's footer
+ // to 0.
+ if (spr.xref_index != 0)
+ {
+ assert ByteArrayUtils.compareByteArrays(pdf, spr.xref_index, PDFNames.XREF_STR);
+ }
+
+ return spr;
+ }
+
+ /**
+ * Parses the End Of File (EOF) marker at pdf+index.
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index where to start the parsing.
+ * @return Returns the result of the parsing operation.
+ */
+ public static EOFParseResult parseEOF(final byte[] pdf, final int index)
+ {
+ EOFParseResult eofpr = new EOFParseResult();
+ eofpr.start_index = index;
+
+ assert ByteArrayUtils.compareByteArrays(pdf, eofpr.start_index, PDFNames.EOF_STR);
+
+ eofpr.eof_end_index = eofpr.start_index + PDFNames.EOF_STR.length;
+
+ // Note: The EOF marker is not necessarily terminated with a
+ // newline.
+
+ // perhaps explicitely determine a newline.
+
+ eofpr.next_index = eofpr.eof_end_index;
+
+ return eofpr;
+ }
+
+ public static boolean isIndirectObjectReference(final byte[] pdf,
+ final int index)
+ {
+ IndirectObjectReferenceParseResult iorpr = new IndirectObjectReferenceParseResult();
+ iorpr.ior = new IndirectObjectReference();
+ iorpr.start_index = index;
+
+ if (!PDFUtils.isNumeric(pdf[iorpr.start_index]))
+ {
+ return false;
+ }
+ NumberParseResult object_number_npr = parseNumberFromByteArray(pdf, iorpr.start_index);
+ iorpr.ior.object_number = object_number_npr.number;
+ if (iorpr.ior.object_number <= 0)
+ {
+ return false;
+ }
+
+ if (!isWhitespace(pdf[object_number_npr.next_index]))
+ {
+ return false;
+ }
+ int generation_number_index = skipWhitespace(pdf, object_number_npr.next_index);
+
+ if (!PDFUtils.isNumeric(pdf[generation_number_index]))
+ {
+ return false;
+ }
+ NumberParseResult generation_number_npr = parseNumberFromByteArray(pdf, generation_number_index);
+ iorpr.ior.generation_number = generation_number_npr.number;
+ if (iorpr.ior.generation_number < 0)
+ {
+ return false;
+ }
+
+ if (!isWhitespace(pdf[generation_number_npr.next_index]))
+ {
+ return false;
+ }
+ int R_index = skipWhitespace(pdf, generation_number_npr.next_index);
+
+ if (!ByteArrayUtils.compareByteArrays(pdf, R_index, PDFNames.REFERENCE_STR))
+ {
+ return false;
+ }
+
+ iorpr.next_index = R_index + PDFNames.REFERENCE_STR.length;
+
+ return true;
+ }
+
+ /**
+ * Parses an indirect object reference.
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static IndirectObjectReferenceParseResult parseIndirectObjectReference(
+ final byte[] pdf, final int index)
+ {
+
+ assert isIndirectObjectReference(pdf, index);
+
+ IndirectObjectReferenceParseResult iorpr = new IndirectObjectReferenceParseResult();
+ iorpr.ior = new IndirectObjectReference();
+ iorpr.start_index = index;
+
+ NumberParseResult object_number_npr = parseNumberFromByteArray(pdf, iorpr.start_index);
+ iorpr.ior.object_number = object_number_npr.number;
+ assert iorpr.ior.object_number > 0;
+
+ assert isWhitespace(pdf[object_number_npr.next_index]);
+ int generation_number_index = skipWhitespace(pdf, object_number_npr.next_index);
+
+ NumberParseResult generation_number_npr = parseNumberFromByteArray(pdf, generation_number_index);
+ iorpr.ior.generation_number = generation_number_npr.number;
+ assert iorpr.ior.generation_number >= 0;
+
+ assert isWhitespace(pdf[generation_number_npr.next_index]);
+ int R_index = skipWhitespace(pdf, generation_number_npr.next_index);
+
+ assert ByteArrayUtils.compareByteArrays(pdf, R_index, PDFNames.REFERENCE_STR);
+
+ iorpr.next_index = R_index + PDFNames.REFERENCE_STR.length;
+
+ return iorpr;
+ }
+
+ /**
+ * Parses the object header at pdf+index.
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static ObjectHeaderParseResult parseObjectHeader(final byte[] pdf,
+ final int index)
+ {
+ ObjectHeaderParseResult ohpr = new ObjectHeaderParseResult();
+
+ ohpr.start_index = index;
+
+ NumberParseResult object_number_npr = parseNumberFromByteArray(pdf, ohpr.start_index);
+ ohpr.object_number = object_number_npr.number;
+ assert ohpr.object_number > 0;
+
+ assert isWhitespace(pdf[object_number_npr.next_index]);
+ int generation_number_index = skipWhitespace(pdf, object_number_npr.next_index);
+
+ NumberParseResult generation_number_npr = parseNumberFromByteArray(pdf, generation_number_index);
+ ohpr.generation_number = generation_number_npr.number;
+ assert ohpr.generation_number >= 0;
+
+ assert isWhitespace(pdf[generation_number_npr.next_index]);
+ int obj_index = skipWhitespace(pdf, generation_number_npr.next_index);
+
+ assert ByteArrayUtils.compareByteArrays(pdf, obj_index, PDFNames.OBJ_STR);
+
+ // not all pdfwriters make a newline after obj...
+ // assert isNewline(pdf, obj_index + PDFNames.OBJ_STR.length);
+ // ohpr.next_index = skipNewline(pdf, obj_index + PDFNames.OBJ_STR.length);
+ ohpr.next_index = skipWhitespace(pdf, obj_index + PDFNames.OBJ_STR.length);
+
+ return ohpr;
+ }
+
+ public static ObjectParseResult parseObject(final byte[] pdf, final int index)
+ {
+ ObjectParseResult opr = new ObjectParseResult();
+ opr.start_index = index;
+
+ opr.header = parseObjectHeader(pdf, opr.start_index);
+ opr.content_index = opr.header.next_index;
+
+ int cur_index = skipWhitespace(pdf, opr.content_index);
+
+ opr.object = parseUnknownObject(pdf, cur_index);
+
+ cur_index = skipWhitespace(pdf, opr.object.next_index);
+
+ opr.end_of_content_index = cur_index;
+ assert ByteArrayUtils.compareByteArrays(pdf, opr.end_of_content_index, PDFNames.ENDOBJ_STR);
+
+ cur_index = opr.end_of_content_index + PDFNames.ENDOBJ_STR.length;
+
+ opr.next_index = cur_index;
+ //assert isNewline(pdf, cur_index);
+ //opr.next_index = skipNewline(pdf, cur_index);
+
+ return opr;
+ }
+
+ public static ParseResult parseUnknownObject(final byte[] pdf, final int index)
+ {
+ if (ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.DICT_START_STR))
+ {
+ DictionaryParseResult dpr = parseDictionary(pdf, index);
+
+ int possible_stream_index = skipWhitespace(pdf, dpr.next_index);
+ if (ByteArrayUtils.compareByteArrays(pdf, possible_stream_index, PDFNames.STREAM_STR))
+ {
+ return parseStream(pdf, possible_stream_index, dpr);
+ }
+
+ return dpr;
+ }
+
+ if (ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.NULL_STR))
+ {
+ return parseNull(pdf, index);
+ }
+
+ if (ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.TRUE_STR) || ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.FALSE_STR))
+ {
+ return parseBoolean(pdf, index);
+ }
+
+ final byte first_byte = pdf[index];
+
+ if (isNumeric(first_byte) || isSign(first_byte))
+ {
+
+ // try to parse a Indirect reference first - if this fails, parse a number
+ if (isIndirectObjectReference(pdf, index))
+ {
+ return parseIndirectObjectReference(pdf, index);
+ }
+
+ return parseNumberFromByteArray(pdf, index);
+ }
+
+ ParseResult pr = null;
+
+ switch (first_byte)
+ {
+ case PDFNames.DELIMITER_STRING_OPEN:
+ pr = parseLiteralString(pdf, index);
+ break;
+ case PDFNames.DELIMITER_HEXSTRING_OPEN:
+ pr = parseHexString(pdf, index);
+ break;
+ case PDFNames.DELIMITER_ARRAY_OPEN:
+ pr = parseArray(pdf, index);
+ break;
+ case PDFNames.DELIMITER_NAME:
+ pr = parseName(pdf, index);
+ break;
+ default:
+ throw new RuntimeException("Unknown first_byte " + first_byte + "' when parsing an unknown object at index=" + index + ".");
+ // assert false : "nyi or invalid char";
+ }
+ assert pr != null;
+
+ return pr;
+ }
+
+ /**
+ * Parses a literal string.
+ *
+ * <p>
+ * A literal string is a string of ASCII characters enclosed by '(' and ')'.
+ * Balanced pairs of '(' and ')' are allowed within the string. Unbalanced '('
+ * or ')' must be escaped as '\(' or '\)'.
+ * </p>
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static LiteralStringParseResult parseLiteralString(final byte[] pdf,
+ final int index)
+ {
+ LiteralStringParseResult lspr = new LiteralStringParseResult();
+ lspr.start_index = index;
+
+ assert pdf[lspr.start_index] == PDFNames.DELIMITER_STRING_OPEN;
+
+ lspr.content_start_index = lspr.start_index + 1;
+
+ int cur_index = lspr.content_start_index;
+ int parenthesis_stack = 0;
+ for (;;)
+ {
+ if (pdf[cur_index] == '\\' && (pdf[cur_index + 1] == PDFNames.DELIMITER_STRING_CLOSE || pdf[cur_index + 1] == PDFNames.DELIMITER_STRING_OPEN))
+ {
+ cur_index += 2;
+ continue;
+ }
+ if (pdf[cur_index] == PDFNames.DELIMITER_STRING_OPEN)
+ {
+ parenthesis_stack++;
+ }
+ if (pdf[cur_index] == PDFNames.DELIMITER_STRING_CLOSE)
+ {
+ assert parenthesis_stack >= 0;
+
+ if (parenthesis_stack == 0)
+ {
+ break;
+ }
+
+ assert parenthesis_stack > 0;
+ parenthesis_stack--;
+
+ }
+
+ cur_index++;
+ }
+
+ lspr.content_end_index = cur_index;
+ assert pdf[lspr.content_end_index] == PDFNames.DELIMITER_STRING_CLOSE;
+
+ lspr.next_index = lspr.content_end_index + 1;
+
+ return lspr;
+ }
+
+ protected static boolean isHex(final byte data)
+ {
+ return isNumeric(data) || ('a' <= data && data <= 'f') || ('A' <= data && data <= 'f');
+ }
+
+ /**
+ * Parses a hexadecimal string.
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ */
+ public static HexStringParseResult parseHexString(final byte[] pdf,
+ final int index)
+ {
+ HexStringParseResult hspr = new HexStringParseResult();
+ hspr.start_index = index;
+
+ assert pdf[hspr.start_index] == PDFNames.DELIMITER_HEXSTRING_OPEN;
+
+ hspr.content_start_index = hspr.start_index + 1;
+
+ int cur_index = hspr.content_start_index;
+ while (isHex(pdf[cur_index]) || isWhitespace(pdf[cur_index]))
+ {
+ cur_index++;
+ }
+
+ hspr.content_end_index = cur_index;
+ assert pdf[hspr.content_end_index] == PDFNames.DELIMITER_HEXSTRING_CLOSE;
+
+ hspr.next_index = hspr.content_end_index + 1;
+
+ return hspr;
+ }
+
+ public static ArrayParseResult parseArray(final byte[] pdf, final int index)
+ {
+ ArrayParseResult apr = new ArrayParseResult();
+ apr.start_index = index;
+ assert pdf[apr.start_index] == PDFNames.DELIMITER_ARRAY_OPEN;
+
+ apr.content_start_index = apr.start_index + 1;
+
+ apr.elements = new ArrayList();
+
+ int cur_index = skipWhitespace(pdf, apr.content_start_index);
+ for (;;)
+ {
+ if (pdf[cur_index] == PDFNames.DELIMITER_ARRAY_CLOSE)
+ {
+ break;
+ }
+
+ ParseResult pr = parseUnknownObject(pdf, cur_index);
+ apr.elements.add(pr);
+
+ cur_index = skipWhitespace(pdf, pr.next_index);
+ }
+ assert pdf[cur_index] == PDFNames.DELIMITER_ARRAY_CLOSE;
+
+ apr.content_end_index = cur_index;
+ assert pdf[apr.content_end_index] == PDFNames.DELIMITER_ARRAY_CLOSE;
+
+ apr.next_index = apr.content_end_index + 1;
+ return apr;
+ }
+
+ /**
+ * Parses a PDF Name.
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of this parsing operation.
+ */
+ public static NameParseResult parseName(final byte[] pdf, final int index)
+ {
+ NameParseResult npr = new NameParseResult();
+ npr.start_index = index;
+
+ assert pdf[npr.start_index] == PDFNames.DELIMITER_NAME;
+
+ npr.name_start_index = npr.start_index + 1;
+
+ assert isRegular(pdf[npr.name_start_index]);
+
+ int cur_index = npr.name_start_index;
+ while (isRegular(pdf[cur_index]))
+ {
+ cur_index++;
+ }
+ assert !isRegular(pdf[cur_index]);
+
+ npr.next_index = cur_index;
+
+ return npr;
+ }
+
+ public static DictionaryParseResult parseDictionary(final byte[] pdf,
+ final int index)
+ {
+ DictionaryParseResult dpr = new DictionaryParseResult();
+ dpr.start_index = index;
+
+ assert ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.DICT_START_STR);
+
+ dpr.content_start_index = dpr.start_index + PDFNames.DICT_START_STR.length;
+
+ dpr.names = new ArrayList();
+ dpr.values = new ArrayList();
+
+ int cur_index = skipWhitespace(pdf, dpr.content_start_index);
+ for (;;)
+ {
+ if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.DICT_END_STR))
+ {
+ break;
+ }
+
+ NameParseResult npr = parseName(pdf, cur_index);
+ dpr.names.add(npr);
+
+ cur_index = npr.next_index;
+ cur_index = skipWhitespace(pdf, cur_index);
+
+ ParseResult pr = parseUnknownObject(pdf, cur_index);
+ dpr.values.add(pr);
+
+ cur_index = pr.next_index;
+ cur_index = skipWhitespace(pdf, cur_index);
+ }
+
+ dpr.content_end_index = cur_index;
+ assert ByteArrayUtils.compareByteArrays(pdf, dpr.content_end_index, PDFNames.DICT_END_STR);
+ dpr.next_index = dpr.content_end_index + PDFNames.DICT_END_STR.length;
+
+ return dpr;
+ }
+
+ /**
+ * Parses a stream.
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @param dpr
+ * The DictionaryParseResult of the stream's dictionary. This
+ * dictionary must precede the stream keyword. Usually this is
+ * provided in the stream object's dictionary via the /Length field.
+ * @return Returns the result of this parsing operation.
+ */
+ public static StreamParseResult parseStream(final byte[] pdf,
+ final int index, final DictionaryParseResult dpr)
+ {
+ StreamParseResult spr = new StreamParseResult();
+ spr.stream_dictionary = dpr;
+ spr.start_index = spr.stream_dictionary.start_index;
+ spr.stream_start_index = index;
+ assert ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.STREAM_STR);
+
+ // assert that the provided dictionary really belongs to this stream
+ assert spr.stream_start_index == skipWhitespace(pdf, spr.stream_dictionary.next_index);
+
+ // see PDF Spec 1.4 chapter 3.2.7
+ assert pdf[spr.stream_start_index + PDFNames.STREAM_STR.length] == PDFNames.WHITESPACE_LF || (pdf[spr.stream_start_index + PDFNames.STREAM_STR.length] == PDFNames.WHITESPACE_CR && pdf[spr.stream_start_index + PDFNames.STREAM_STR.length + 1] == PDFNames.WHITESPACE_LF);
+ spr.content_start_index = skipNewline(pdf, spr.stream_start_index + PDFNames.STREAM_STR.length);
+
+ int length = -1;
+ for (int i = 0; i < spr.stream_dictionary.names.size(); i++)
+ {
+ NameParseResult name = (NameParseResult) spr.stream_dictionary.names.get(i);
+ if (ByteArrayUtils.compareByteArrays(pdf, name.name_start_index, PDFNames.LENGTH_STR))
+ {
+ ParseResult pr = (ParseResult) spr.stream_dictionary.values.get(i);
+ NumberParseResult npr = null;
+ if (pr instanceof IndirectObjectReferenceParseResult)
+ {
+ log.debug("An object stream with indirect length - cannot parse this instantly - parse later again.");
+ spr.content_end_index = -1;
+ spr.next_index = -1;
+ return spr;
+
+ }
+ else
+ {
+ npr = (NumberParseResult) pr;
+ }
+ assert npr != null;
+
+ length = npr.number;
+ break;
+ }
+
+ }
+ assert length >= 0;
+
+ spr.content_end_index = spr.content_start_index + length;
+
+ int endstr_index = spr.content_end_index;
+ if (isNewline(pdf, endstr_index))
+ {
+ endstr_index = skipWhitespace(pdf, endstr_index);
+ }
+ assert ByteArrayUtils.compareByteArrays(pdf, endstr_index, PDFNames.ENDSTREAM_STR);
+
+ spr.next_index = endstr_index + PDFNames.ENDSTREAM_STR.length;
+
+ return spr;
+ }
+
+ public static NullParseResult parseNull(final byte[] pdf, final int index)
+ {
+ NullParseResult npr = new NullParseResult();
+ npr.start_index = index;
+
+ assert ByteArrayUtils.compareByteArrays(pdf, npr.start_index, PDFNames.NULL_STR);
+
+ npr.next_index = npr.start_index + PDFNames.NULL_STR.length;
+
+ return npr;
+ }
+
+ public static int getObjectOffsetFromXRefByIndirectObjectReference(
+ XRefSectionParseResult xpr, IndirectObjectReference ior)
+ {
+ Iterator it = xpr.xref_subsections.iterator();
+ while (it.hasNext())
+ {
+ XRefSubSectionParseResult section = (XRefSubSectionParseResult) it.next();
+
+ for (int i = 0; i < section.xref_lines.size(); i++)
+ {
+ if (section.start_obj_number + i == ior.object_number)
+ {
+ XRefLineParseResult lpr = (XRefLineParseResult) section.xref_lines.get(i);
+ return lpr.object_offset;
+ }
+ }
+ }
+
+ return -1;
+ }
+
+ public static HeaderParseResult parseHeader(final byte[] pdf, final int index)
+ {
+ HeaderParseResult hpr = new HeaderParseResult();
+ hpr.start_index = index;
+
+ assert pdf[hpr.start_index] == PDFNames.COMMENT;
+
+ assert ByteArrayUtils.compareByteArrays(pdf, hpr.start_index + 1, PDFNames.PDF_VERSION_STR);
+
+ hpr.major_index = hpr.start_index + 1 + PDFNames.PDF_VERSION_STR.length;
+
+ IntegerParseResult major_ipr = parseUnsignedInteger(pdf, hpr.major_index);
+ hpr.major = major_ipr.number;
+ assert hpr.major >= 1;
+
+ assert pdf[major_ipr.next_index] == PDFNames.PDF_VERSION_SEPARATOR;
+
+ hpr.minor_index = major_ipr.next_index + 1;
+
+ IntegerParseResult minor_ipr = parseUnsignedInteger(pdf, hpr.minor_index);
+ hpr.minor = minor_ipr.number;
+ assert hpr.minor >= 0;
+
+ assert isWhitespace(pdf[minor_ipr.next_index]);
+ hpr.binary_characters_index = skipWhitespace(pdf, minor_ipr.next_index);
+
+ assert pdf[hpr.binary_characters_index] == PDFNames.COMMENT;
+
+ hpr.next_index = skipToNewline(pdf, hpr.binary_characters_index);
+ return hpr;
+ }
+
+ /**
+ * Parses a PDF footer.
+ *
+ * <p>
+ * A PDF footer starts with the xref, followed by the trailer, the startxref
+ * and the EOF marker.
+ * </p>
+ *
+ * @param pdf
+ * The PDF data.
+ * @param index
+ * The index.
+ * @return Returns the result of the parsing operation.
+ *
+ * @see FooterParseResult
+ */
+ public static FooterParseResult parseFooter(final byte[] pdf, final int index)
+ {
+ FooterParseResult fpr = new FooterParseResult();
+ fpr.start_index = index;
+
+ fpr.xpr = PDFUtils.parseXRefSection(pdf, fpr.start_index);
+
+ fpr.tpr = PDFUtils.parseTrailer(pdf, fpr.xpr.next_index);
+
+ fpr.sxpr = PDFUtils.parseStartXRef(pdf, fpr.tpr.next_index);
+
+ fpr.eofpr = PDFUtils.parseEOF(pdf, fpr.sxpr.next_index);
+
+ fpr.next_index = fpr.eofpr.next_index;
+ return fpr;
+ }
+
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ArrayParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ArrayParseResult.java
new file mode 100644
index 0000000..9d0a745
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ArrayParseResult.java
@@ -0,0 +1,42 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: ArrayParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+import java.util.List;
+
+/**
+ * The result of parsing a hex string.
+ *
+ * @see at.knowcenter.wag.exactparser.parsing.results.LiteralStringParseResult
+ *
+ * @author wprinz
+ */
+public class ArrayParseResult extends ContainerParseResult {
+
+ public List elements = null;
+
+
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/BooleanParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/BooleanParseResult.java
new file mode 100644
index 0000000..e0bc276
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/BooleanParseResult.java
@@ -0,0 +1,38 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: BooleanParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+/**
+ * Parse result of parsing a boolean value.
+ *
+ * @author wprinz
+ */
+public class BooleanParseResult extends ParseResult
+{
+
+ public boolean value = false;
+
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ContainerParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ContainerParseResult.java
new file mode 100644
index 0000000..1974ade
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ContainerParseResult.java
@@ -0,0 +1,45 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: ContainerParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+/**
+ * Base class of container parse results.
+ *
+ * <p>
+ * Containers are types that include some content.
+ * E.g. literal strings include string data as content,
+ * arrays include elements as content etc.
+ * </p>
+ *
+ * @author wprinz
+ */
+public class ContainerParseResult extends ParseResult {
+
+ public int content_start_index = -1;
+ public int content_end_index = -1;
+
+
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/DictionaryParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/DictionaryParseResult.java
new file mode 100644
index 0000000..47101e0
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/DictionaryParseResult.java
@@ -0,0 +1,41 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: DictionaryParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+import java.util.List;
+
+/**
+ * The result of parsing a dictionary.
+ *
+ * @author wprinz
+ */
+public class DictionaryParseResult extends ContainerParseResult
+{
+
+ public List names = null;
+
+ public List values = null;
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/EOFParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/EOFParseResult.java
new file mode 100644
index 0000000..dea1d22
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/EOFParseResult.java
@@ -0,0 +1,47 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: EOFParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+/**
+ * The result of parsing the End Of File marker.
+ *
+ * @author wprinz
+ */
+public class EOFParseResult extends ParseResult
+{
+
+ /**
+ * The index of the byte after the EOF marker.
+ *
+ * <p>
+ * A newline is not necessary after the EOF marker, but if it is present it will be considered
+ * as part of it.
+ * So eof_end_index marks this newline.
+ * If eof_end_index == next_index, then no new line is present.
+ * </p>
+ */
+ public int eof_end_index = -1;
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/FooterParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/FooterParseResult.java
new file mode 100644
index 0000000..2a52aa6
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/FooterParseResult.java
@@ -0,0 +1,53 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: FooterParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+
+/**
+ * The result of parsing a PDF footer block.
+ *
+ * <p>
+ * A PDF footer block starts with the xref table followed by the trailer, the
+ * startxref and finally the EOF marker. Usually the footer should be at the end
+ * of the file. All object offsets in the footer's xref table should be before
+ * the footer itself. Nevertheless, there are PDF Writers (e.g. Microsoft Word)
+ * that put the footer at the beginning of the document so that all indirect
+ * objects are after the EOF marker.
+ * </p>
+ *
+ * @author wprinz
+ */
+public class FooterParseResult extends ParseResult
+{
+
+ public StartXRefParseResult sxpr = null;
+
+ public EOFParseResult eofpr = null;
+
+ public XRefSectionParseResult xpr = null;
+
+ public TrailerParseResult tpr = null;
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HeaderParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HeaderParseResult.java
new file mode 100644
index 0000000..3befda3
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HeaderParseResult.java
@@ -0,0 +1,48 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: HeaderParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+/**
+ * The result of parsing the PDF header.
+ *
+ * <p>
+ * The header contains the PDF version and is usually followed by some binary
+ * characers.
+ * </p>
+ *
+ * @author wprinz
+ */
+public class HeaderParseResult extends ParseResult
+{
+ public int major_index = -1;
+ public int minor_index = -1;
+
+ public int major = -1;
+ public int minor = -1;
+
+ public int binary_characters_index = -1;
+
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HexStringParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HexStringParseResult.java
new file mode 100644
index 0000000..27dbf70
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HexStringParseResult.java
@@ -0,0 +1,36 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: HexStringParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+/**
+ * The result of parsing a hex string.
+ *
+ * @see at.knowcenter.wag.exactparser.parsing.results.LiteralStringParseResult
+ *
+ * @author wprinz
+ */
+public class HexStringParseResult extends ContainerParseResult {
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IndirectObjectReferenceParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IndirectObjectReferenceParseResult.java
new file mode 100644
index 0000000..797678e
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IndirectObjectReferenceParseResult.java
@@ -0,0 +1,44 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: IndirectObjectReferenceParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+import at.knowcenter.wag.exactparser.parsing.IndirectObjectReference;
+
+/**
+ * The ParseResult of parsing an indirect object reference.
+ *
+ * @author wprinz
+ */
+public class IndirectObjectReferenceParseResult extends ParseResult {
+
+ public IndirectObjectReference ior;
+
+ //@Override
+ public String toString()
+ {
+ return ior.toString() + " R";
+ }
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IntegerParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IntegerParseResult.java
new file mode 100644
index 0000000..48ea7d2
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IntegerParseResult.java
@@ -0,0 +1,36 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: IntegerParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+/**
+ * @author wprinz
+ */
+public class IntegerParseResult extends ParseResult
+{
+
+ public int number;
+
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/LiteralStringParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/LiteralStringParseResult.java
new file mode 100644
index 0000000..60fc277
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/LiteralStringParseResult.java
@@ -0,0 +1,37 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: LiteralStringParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+/**
+ * The result of parsing a simple string (ASCII string).
+ *
+ * @see at.knowcenter.wag.exactparser.parsing.results.HexStringParseResult
+ *
+ * @author wprinz
+ */
+public class LiteralStringParseResult extends ContainerParseResult {
+
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NameParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NameParseResult.java
new file mode 100644
index 0000000..e564285
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NameParseResult.java
@@ -0,0 +1,35 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: NameParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+/**
+ * @author wprinz
+ */
+public class NameParseResult extends ParseResult {
+
+ public int name_start_index = -1;
+
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NullParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NullParseResult.java
new file mode 100644
index 0000000..49d9dfb
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NullParseResult.java
@@ -0,0 +1,34 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: NullParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+/**
+ * The result of parsing a "null".
+ *
+ * @author wprinz
+ */
+public class NullParseResult extends ParseResult {
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NumberParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NumberParseResult.java
new file mode 100644
index 0000000..e88596c
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NumberParseResult.java
@@ -0,0 +1,41 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: NumberParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+/**
+ * The ParseResult of parsing an integer number.
+ *
+ * @author wprinz
+ */
+public class NumberParseResult extends ParseResult {
+ /**
+ * The (signed) integer number.
+ */
+ public int number;
+
+ // TODO: make better
+ public float floating;
+} \ No newline at end of file
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectHeaderParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectHeaderParseResult.java
new file mode 100644
index 0000000..0729108
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectHeaderParseResult.java
@@ -0,0 +1,51 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: ObjectHeaderParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+/**
+ * The ParseResult of a parsing an object header.
+ *
+ * <p>
+ * Note that this information regards only the object header and not the
+ * contents of the object itself. (meaning: next points to the contents and not
+ * to the end of the whole object)
+ * </p>
+ *
+ * @author Administrator
+ */
+public class ObjectHeaderParseResult extends ParseResult {
+
+ /**
+ * The object's object number.
+ */
+ public int object_number = -1;
+
+ /**
+ * The object's generation number.
+ */
+ public int generation_number = -1;
+
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectParseResult.java
new file mode 100644
index 0000000..2fdde34
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectParseResult.java
@@ -0,0 +1,50 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: ObjectParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+
+/**
+ * The ParseResult of parsing an indirect object.
+ * @author wprinz
+ */
+public class ObjectParseResult extends ParseResult {
+
+ public int content_index = -1;
+ public int end_of_content_index = -1;
+
+ public ObjectHeaderParseResult header = null;
+
+/* enum ObjectType
+ {
+ UNKNOWN_TO_PARSER,
+ OBJ_DICTIONARY
+ };
+
+ public ObjectType object_type = ObjectType.UNKNOWN_TO_PARSER;
+ */
+ public ParseResult object = null;
+
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ParseResult.java
new file mode 100644
index 0000000..12c4b19
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ParseResult.java
@@ -0,0 +1,50 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: ParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+/**
+ * Base class of all parse results.
+ *
+ * @author wprinz
+ */
+public class ParseResult {
+
+ /**
+ * The start index, where the parser started its work and where the parsed
+ * entity begins.
+ */
+ public int start_index = -1;
+
+ /**
+ * The index of the next entity following the currently parsed entity.
+ *
+ * <p>
+ * This is the index of the first byte not belonging to this entity anymore.
+ * </p>
+ */
+ public int next_index = -1;
+
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StartXRefParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StartXRefParseResult.java
new file mode 100644
index 0000000..a1f6792
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StartXRefParseResult.java
@@ -0,0 +1,36 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: StartXRefParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+
+/**
+ * The ParseResult of parsing a startxref entry.
+ * @author wprinz
+ */
+public class StartXRefParseResult extends ParseResult {
+
+ public int xref_index;
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StreamParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StreamParseResult.java
new file mode 100644
index 0000000..16da12a
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StreamParseResult.java
@@ -0,0 +1,41 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: StreamParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+
+/**
+ * The result of parsing a hex string.
+ *
+ * @see at.knowcenter.wag.exactparser.parsing.results.LiteralStringParseResult
+ *
+ * @author wprinz
+ */
+public class StreamParseResult extends ContainerParseResult {
+
+ public DictionaryParseResult stream_dictionary = null;
+
+ public int stream_start_index = -1;
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/TrailerParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/TrailerParseResult.java
new file mode 100644
index 0000000..4589ee8
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/TrailerParseResult.java
@@ -0,0 +1,84 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: TrailerParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+/**
+ * The ParseResult of parsing the trailer.
+ *
+ * @author wprinz
+ */
+public class TrailerParseResult extends ParseResult {
+
+ public int contents_index = -1;
+ public int contents_end_index = -1;
+
+ public DictionaryParseResult dpr = null;
+
+ public IndirectObjectReferenceParseResult info;
+
+ public IndirectObjectReferenceParseResult root;
+
+ /**
+ * The content of the "/Size" entry.
+ */
+ public int size;
+
+ /**
+ * Tells, if this PDF footer has a predecessor (as specified by
+ * the /Prev entry).
+ */
+ public boolean has_predecessor = false;
+
+ /**
+ * The index of the predecessor.
+ *
+ * <p>
+ * Only valid if has_predecessor is true.
+ * </p>
+ * <p>
+ * Use getPrev and setPrev to access this member variable.
+ * </p>
+ *
+ * @see #getPrev()
+ * @see #setPrev(int)
+ */
+ private int prev = -1;
+
+ public int getPrev() {
+ assert has_predecessor;
+ return prev;
+ }
+
+ public void setPrev(int prev) {
+ assert has_predecessor : "Set has_predecessor to true first.";
+ this.prev = prev;
+ }
+
+
+
+
+
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefLineParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefLineParseResult.java
new file mode 100644
index 0000000..8039153
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefLineParseResult.java
@@ -0,0 +1,40 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: XRefLineParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+/**
+ * The ParseResult of parsing a single xref line.
+ *
+ * @author wprinz
+ */
+public class XRefLineParseResult extends ParseResult {
+
+ public int object_offset;
+
+ public int generation_number;
+
+ public byte object_usage;
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSectionParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSectionParseResult.java
new file mode 100644
index 0000000..eedea81
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSectionParseResult.java
@@ -0,0 +1,66 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: XRefSectionParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * The ParseResult of an xref parsing operation.
+ *
+ * <p>
+ * This contains one whole xref table section. An xref section starts with the
+ * word xref and contains one or more xref sub-sections.
+ * </p>
+ * <p>
+ * Due to Incremental Updates, there may be more than one xref section in a
+ * document. All xref section together are called the xref table. Using this
+ * aggregated xref table, an application has the full access to all indirect
+ * objects in the document.
+ * </p>
+ * <p>
+ * In many PDF libraries and applications one xref section is also informally
+ * called xref table.
+ * </p>
+ *
+ * @author wprinz
+ */
+public class XRefSectionParseResult extends ParseResult
+{
+
+ public List xref_subsections = new ArrayList();
+
+ /**
+ * Appends another cross-reference (xref) sub-section to the xref table.
+ *
+ * @param xref_section
+ * The xref section to be appended.
+ */
+ public void appendXRefSubSection(XRefSubSectionParseResult xref_section)
+ {
+ xref_subsections.add(xref_section);
+ }
+}
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSubSectionParseResult.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSubSectionParseResult.java
new file mode 100644
index 0000000..ec19004
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSubSectionParseResult.java
@@ -0,0 +1,59 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: XRefSubSectionParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing.results;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Contains an xref sub-section.
+ *
+ * <p>
+ * An xref sub-section is an ordered list of xref lines. The object numbers of the
+ * corresponding objects are numbered incrementally.
+ * </p>
+ * <p>
+ * xref sections are important in Incremental Updates because they allow to
+ * specify explicitely which objects (object numbers) are contained in the xref.
+ * </p>
+ *
+ * @author wprinz
+ */
+public class XRefSubSectionParseResult extends ParseResult {
+
+ public int start_obj_number;
+
+ public int num_objects;
+
+ public List xref_lines = new ArrayList();
+
+ public void appendXRefLine(XRefLineParseResult xref_line) {
+ assert xref_lines.size() < num_objects;
+
+ xref_lines.add(xref_line);
+ }
+
+}