From 6025b6016517c6d898d8957d1d7e03ba71431912 Mon Sep 17 00:00:00 2001 From: tknall Date: Fri, 1 Dec 2006 12:20:24 +0000 Subject: Initial import of release 2.2. git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@4 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../knowcenter/wag/exactparser/ByteArrayUtils.java | 140 ++ .../knowcenter/wag/exactparser/ParseDocument.java | 265 ++++ .../parsing/IndirectObjectReference.java | 49 + .../wag/exactparser/parsing/PDFNames.java | 176 +++ .../wag/exactparser/parsing/PDFUtils.java | 1393 ++++++++++++++++++++ .../parsing/results/ArrayParseResult.java | 34 + .../parsing/results/BooleanParseResult.java | 30 + .../parsing/results/ContainerParseResult.java | 37 + .../parsing/results/DictionaryParseResult.java | 33 + .../parsing/results/EOFParseResult.java | 39 + .../parsing/results/FooterParseResult.java | 45 + .../parsing/results/HeaderParseResult.java | 40 + .../parsing/results/HexStringParseResult.java | 28 + .../IndirectObjectReferenceParseResult.java | 36 + .../parsing/results/IntegerParseResult.java | 28 + .../parsing/results/LiteralStringParseResult.java | 29 + .../parsing/results/NameParseResult.java | 27 + .../parsing/results/NullParseResult.java | 26 + .../parsing/results/NumberParseResult.java | 33 + .../parsing/results/ObjectHeaderParseResult.java | 43 + .../parsing/results/ObjectParseResult.java | 42 + .../exactparser/parsing/results/ParseResult.java | 42 + .../parsing/results/StartXRefParseResult.java | 28 + .../parsing/results/StreamParseResult.java | 33 + .../parsing/results/TrailerParseResult.java | 76 ++ .../parsing/results/XRefLineParseResult.java | 32 + .../parsing/results/XRefSectionParseResult.java | 58 + .../parsing/results/XRefSubSectionParseResult.java | 51 + 28 files changed, 2893 insertions(+) create mode 100644 src/main/java/at/knowcenter/wag/exactparser/ByteArrayUtils.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/IndirectObjectReference.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/ArrayParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/BooleanParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/ContainerParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/DictionaryParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/EOFParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/FooterParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/HeaderParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/HexStringParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/IndirectObjectReferenceParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/IntegerParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/LiteralStringParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/NameParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/NullParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/NumberParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectHeaderParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/ParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/StartXRefParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/StreamParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/TrailerParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefLineParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSectionParseResult.java create mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSubSectionParseResult.java (limited to 'src/main/java/at/knowcenter/wag/exactparser') diff --git a/src/main/java/at/knowcenter/wag/exactparser/ByteArrayUtils.java b/src/main/java/at/knowcenter/wag/exactparser/ByteArrayUtils.java new file mode 100644 index 0000000..e6b32ec --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/ByteArrayUtils.java @@ -0,0 +1,140 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: ByteArrayUtils.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser; + +import java.io.UnsupportedEncodingException; + +/** + * Abstract class that contains utility methods for handling byte arrays. + * + * @author wprinz + */ +public abstract class ByteArrayUtils { + + public static final String BYTE_ARRAY_ENCODING = "ISO-8859-1"; + + /** + * Converts the byte array to a String. + * + * @param data + * The byte array. + * @return Returns the String. + * @throws UnsupportedEncodingException + * Forwarded exception + */ + public static String convertByteArrayToString(final byte[] data) throws UnsupportedEncodingException { + return new String(data, BYTE_ARRAY_ENCODING); + } + + /** + * Finds the first occurance of search in data starting to search from the + * given index. + * + * @param data + * The big array. + * @param index + * The index to start searching from. + * @param search + * The sought array. + * @return Returns the index of the found occurence or -1 if nothing was + * found. + */ + public static int indexOf(final byte[] data, final int index, final byte[] search) { + for (int i = index; i <= data.length - search.length; i++) { + if (compareByteArrays(data, i, search)) { + return i; + } + } + return -1; + } + + /** + * Finds the last occurance of the array. + * + * @param data + * The source array to be searched. + * @param search + * The sought array. + * @return Returns the index of the last occurance - or -1 if nothing was + * found. + */ + public static int lastIndexOf(final byte[] data, byte[] search) { + for (int index = data.length - search.length; index >= 0; index--) { + if (compareByteArrays(data, index, search)) { + return index; + } + } + return -1; + } + + /** + * Compares the two byte arrays for equality. + * + * @param data + * The source array. + * @param index + * In index into the source array marking where the comparison should + * start. + * @param search + * The sought array. + * @return Returns true if the first search.length bytes of data+index and + * search match exactly. Returns false otherwise. + */ + public static boolean compareByteArrays(final byte[] data, final int index, byte[] search) { + if (index < 0 || index >= data.length) { + throw new IndexOutOfBoundsException("The index " + index + " is out of bounds"); + } + + if (search.length > data.length) { + return false; + } + + if (search.length > data.length - index) { + return false; + } + + for (int i = 0; i < search.length; i++) { + if (data[index + i] != search[i]) { + return false; + } + } + + return true; + } + + /** + * Checks, if the sought data byte is contained within the byte array. + * + * @param byte_array + * The byte array. + * @param data + * A data byte sought within the byte array. + * @return Returns true, if the data byte was found (at least once) in the + * byte array, false otherwise. + */ + public static boolean contains(final byte[] byte_array, final byte data) { + for (int i = 0; i < byte_array.length; i++) { + byte b = byte_array[i]; + if (b == data) { + return true; + } + } + return false; + } + +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java b/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java new file mode 100644 index 0000000..a1ad10d --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java @@ -0,0 +1,265 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: ParseDocument.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import at.knowcenter.wag.exactparser.parsing.PDFUtils; +import at.knowcenter.wag.exactparser.parsing.results.DictionaryParseResult; +import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult; +import at.knowcenter.wag.exactparser.parsing.results.IndirectObjectReferenceParseResult; +import at.knowcenter.wag.exactparser.parsing.results.NameParseResult; +import at.knowcenter.wag.exactparser.parsing.results.NumberParseResult; +import at.knowcenter.wag.exactparser.parsing.results.ObjectParseResult; +import at.knowcenter.wag.exactparser.parsing.results.StartXRefParseResult; +import at.knowcenter.wag.exactparser.parsing.results.TrailerParseResult; +import at.knowcenter.wag.exactparser.parsing.results.XRefSectionParseResult; + + +/** + * Test class. + * @author wprinz + */ +public class ParseDocument +{ + + public static final String DOCUMENT = "C:/wprinz/temp.pdf"; + + public static final byte[] EGIZ_DICT_NAME = { 'E', 'G', 'I', 'Z', 'S', 'i', + 'g', 'D', 'i', 'c', 't' }; + + public static final byte[] EGIZ_ODS_NAME = { 'O', 'D', 'S' }; + + public static final byte[] EGIZ_XOBJ_NAME = { 'S', 'i', 'g', 'X', 'O', 'b', + 'j', 'e', 'c', 't' }; + + /** + * @param args + */ + public static void main(String[] args) + { + + try + { + File in = new File(DOCUMENT); + FileInputStream fis = new FileInputStream(in); + byte[] pdf = new byte[(int) in.length()]; + fis.read(pdf); + fis.close(); + fis = null; + + List blocks = parseDocument(pdf); + + Iterator it = blocks.iterator(); + while (it.hasNext()) + { + FooterParseResult bpr = (FooterParseResult) it.next(); + + System.out.print("block from " + bpr.start_index + " to " + bpr.next_index); + + if (bpr.tpr.root != null) + { + int root_index = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(bpr.xpr, bpr.tpr.root.ior); + ObjectParseResult root_opr = PDFUtils.parseObject(pdf, root_index); + DictionaryParseResult root_dpr = (DictionaryParseResult) root_opr.object; + + int egiz_index = PDFUtils.indexOfName(pdf, root_dpr.names, EGIZ_DICT_NAME); + if (egiz_index >= 0) + { + System.out.print(" == EGIZDict"); + } + } + + System.out.println(); + } + + } + catch (IOException e) + { + e.printStackTrace(); + } + } + + public static List parseDocument(final byte[] pdf) throws IOException + { + //HeaderParseResult hpr = PDFUtils.parseHeader(pdf, 0); + //System.out.println("PDF-version = " + hpr.major + "." + hpr.minor); + + + List blocks = new ArrayList(); + + int last_start_xref = PDFUtils.findLastStartXRef(pdf); + StartXRefParseResult last_sxpr = PDFUtils.parseStartXRef(pdf, last_start_xref); + int xref_index = last_sxpr.xref_index; + + for (;;) + { + FooterParseResult fpr = PDFUtils.parseFooter(pdf, xref_index); + blocks.add(0, fpr); + + //System.out.println("tpr.has_predecessor = " + fpr.tpr.has_predecessor); + if (!fpr.tpr.has_predecessor) + { + // eventually parse the PDF header here. + break; + } + + //System.out.println("tpr.prev = " + fpr.tpr.getPrev()); + + xref_index = fpr.tpr.getPrev(); + } + + return blocks; + } + + // public static void parseEGIZ() + // { + // + // int root_index = + // PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(bpr.xpr, + // bpr.tpr.root.ior); + // ObjectParseResult root_opr = PDFUtils.parseObject(pdf, root_index); + // DictionaryParseResult root_dpr = (DictionaryParseResult) root_opr.object; + // + // int egiz_index = PDFUtils.indexOfName(pdf, root_dpr.names, EGIZ_DICT_NAME); + // if (egiz_index >= 0) + // { + // IndirectObjectReferenceParseResult egiz_iorpr = + // (IndirectObjectReferenceParseResult) root_dpr.values.get(egiz_index); + // System.out.println("EGIZ signature info at = " + egiz_iorpr); + // + // int egiz_dict_index = + // PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(bpr.xpr, + // egiz_iorpr.ior); + // ObjectParseResult opr = PDFUtils.parseObject(pdf, egiz_dict_index); + // DictionaryParseResult egiz_dict = (DictionaryParseResult) opr.object; + // + // for (int i = 0; i < egiz_dict.names.size(); i++) + // { + // NameParseResult npr = egiz_dict.names.get(i); + // int len = npr.next_index - npr.name_start_index; + // byte[] name = new byte[len]; + // System.arraycopy(pdf, npr.name_start_index, name, 0, len); + // System.out.print(" " + new String(name, "US-ASCII") + " = "); + // + // System.out.println(egiz_dict.values.get(i)); + // } + // + // // int key = PDFUtils.indexOfName(pdf, egiz_dict.names, new byte [] { 'K', + // // 'e', 'y'}); + // // IndirectObjectReferenceParseResult key_iorpr = + // // (IndirectObjectReferenceParseResult) egiz_dict.values.get(key); + // // int key_offset = + // // PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(xpr, + // // key_iorpr.ior); + // // ObjectParseResult key_opr = PDFUtils.parseObject(pdf, key_offset); + // // StreamParseResult spr = (StreamParseResult) key_opr.object; + // // System.out.println(" key stream from " + spr.content_start_index + " to + // // " + spr.content_end_index); + // // + // // int data_len = spr.content_end_index - spr.content_start_index; + // // byte [] data = new byte[data_len]; + // // System.arraycopy(pdf, spr.content_start_index, data, 0, data_len); + // // System.out.println(new String(data, "US-ASCII")); + // + // } + // else + // { + // System.out.println("No EGIZ block found."); + // } + // + // } + + public static byte[] getOriginalDocument(final File file_name) throws IOException + { + FileInputStream fis = new FileInputStream(file_name); + byte[] pdf = new byte[(int) file_name.length()]; + fis.read(pdf); + fis.close(); + fis = null; + + int last_start_xref = PDFUtils.findLastStartXRef(pdf); + + StartXRefParseResult sxpr = PDFUtils.parseStartXRef(pdf, last_start_xref); + + XRefSectionParseResult xpr = PDFUtils.parseXRefSection(pdf, sxpr.xref_index); + + TrailerParseResult tpr = PDFUtils.parseTrailer(pdf, xpr.next_index); + + System.out.println("tpr.info = " + tpr.info); + System.out.println("tpr.root = " + tpr.root); + System.out.println("tpr.size = " + tpr.size); + + System.out.println("tpr.has_predecessor = " + tpr.has_predecessor); + if (tpr.has_predecessor) + { + System.out.println("tpr.prev = " + tpr.getPrev()); + } + + int root_index = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(xpr, tpr.root.ior); + ObjectParseResult root_opr = PDFUtils.parseObject(pdf, root_index); + DictionaryParseResult root_dpr = (DictionaryParseResult) root_opr.object; + + byte[] EGIZ_TYPE = new String("EGIZSigDict").getBytes("US-ASCII"); + int egiz_index = PDFUtils.indexOfName(pdf, root_dpr.names, EGIZ_TYPE); + if (egiz_index >= 0) + { + System.out.println("The document is EGIZ-signed. ==> extract original document"); + + IndirectObjectReferenceParseResult egiz_iorpr = (IndirectObjectReferenceParseResult) root_dpr.values.get(egiz_index); + System.out.println("EGIZ signature info at = " + egiz_iorpr); + + int egiz_dict_index = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(xpr, egiz_iorpr.ior); + ObjectParseResult opr = PDFUtils.parseObject(pdf, egiz_dict_index); + DictionaryParseResult egiz_dict = (DictionaryParseResult) opr.object; + + for (int i = 0; i < egiz_dict.names.size(); i++) + { + NameParseResult npr = (NameParseResult) egiz_dict.names.get(i); + int len = npr.next_index - npr.name_start_index; + byte[] name = new byte[len]; + System.arraycopy(pdf, npr.name_start_index, name, 0, len); + System.out.print(" " + new String(name, "US-ASCII") + " = "); + + System.out.println(egiz_dict.values.get(i)); + } + + // Original document size + int key = PDFUtils.indexOfName(pdf, egiz_dict.names, new byte[] { 'O', + 'D', 'S' }); + NumberParseResult ods = (NumberParseResult) egiz_dict.values.get(key); + + int original_document_size = ods.number; + System.out.println("Original Document Size = " + original_document_size); + + byte[] original = new byte[original_document_size]; + System.arraycopy(pdf, 0, original, 0, original_document_size); + + return original; + } + + System.out.println("No EGIZ block found. ==> the whold document is the original document"); + return pdf; + } + +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/IndirectObjectReference.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/IndirectObjectReference.java new file mode 100644 index 0000000..fa68bf6 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/IndirectObjectReference.java @@ -0,0 +1,49 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: IndirectObjectReference.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing; + +/** + * The IndirectObjectReference class completely holds a so called object + * identifier of an indirect object. + * + *

+ * An indirect object is an object not contained within another object. In + * accordance, a direct object is structurally part of another object. For + * example, a direct String object that is the value of some key in a dictionary + * object. + *

+ *

+ * An object identifier uniquely identifies a specific indirect object by the + * object number and the generation number. In PDF such an object identifier may + * be used to reference to the object. + *

+ * + * @author wprinz + */ +public class IndirectObjectReference { + + public int object_number; + + public int generation_number; + + //@Override + public String toString() { + return object_number + " " + generation_number; + } + +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java new file mode 100644 index 0000000..66e1931 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java @@ -0,0 +1,176 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: PDFNames.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing; + +/** + * Abstract class that contains several frequently used PDF constants. + * + *

+ * The PDF specification partitions the character set (ASCII) into three groups: + *

+ * + *

+ * Newlines consist per default of CR and LF, but also LF and even CR alone are + * allowed. It seems that all variations of newlines may exist within a single + * document. + *

+ * + * @author wprinz + */ +public abstract class PDFNames +{ + + /** + * The standard encoding of PDF tokens and names. + * + *

+ * PDF is usually an 8 bit format. Binary data etc. can be saves just as it + * is. Nevertheless all PDF tokens ('xref', 'obj', etc.) and PDF Names + * ('/Size', '/Pages', '/Type', etc.) must be in 7 bit ASCII US encoding. + *

+ *

+ * Therefor, whenever using Java Strings to convert e.g. numbers to such PDF + * tokens use this encoding constant. + *

+ *

+ * The same applies for PDF token/name byte arrays that are retransfromed to + * Java Strings. + *

+ */ + public static final String PDF_STANDARD_ENCODING = "US-ASCII"; + + // Whitespace characters + + // TABLE 3.1 White-space characters + // DECIMAL HEXADECIMAL OCTAL NAME + // 0 00 000 Null (NUL) + // 9 09 011 Tab (HT) + // 10 0A 012 Line feed (LF) + // 12 0C 014 Form feed (FF) + // 13 0D 015 Carriage return + // 32 20 040 Space (SP) + + public static final byte WHITESPACE_NUL = 0x00; + + public static final byte WHITESPACE_HT = 0x09; + + public static final byte WHITESPACE_LF = 0x0A; + + public static final byte WHITESPACE_FF = 0x0C; + + public static final byte WHITESPACE_CR = 0x0D; + + public static final byte WHITESPACE_SP = 0x20; + + public static final byte[] WHITESPACE_CHARACTERS = { WHITESPACE_NUL, + WHITESPACE_HT, WHITESPACE_LF, WHITESPACE_FF, WHITESPACE_CR, WHITESPACE_SP }; + + // comment character + + public static final byte COMMENT = '%'; + + // PDF-version + + public static final byte[] PDF_VERSION_STR = { 'P', 'D', 'F', '-' }; + + public static final byte PDF_VERSION_SEPARATOR = '.'; + + // delimiter characters + + public static final byte DELIMITER_STRING_OPEN = '('; + + public static final byte DELIMITER_STRING_CLOSE = ')'; + + public static final byte DELIMITER_HEXSTRING_OPEN = '<'; + + public static final byte DELIMITER_HEXSTRING_CLOSE = '>'; + + public static final byte DELIMITER_ARRAY_OPEN = '['; + + public static final byte DELIMITER_ARRAY_CLOSE = ']'; + + public static final byte DELIMITER_CURLY_OPEN = '{'; + + public static final byte DELIMITER_CURLY_CLOSE = '}'; + + public static final byte DELIMITER_NAME = '/'; + + public static final byte[] DELIMITER_CHARACTERS = { DELIMITER_STRING_OPEN, + DELIMITER_STRING_CLOSE, DELIMITER_HEXSTRING_OPEN, + DELIMITER_HEXSTRING_CLOSE, DELIMITER_ARRAY_OPEN, DELIMITER_ARRAY_CLOSE, + DELIMITER_CURLY_OPEN, DELIMITER_CURLY_CLOSE, DELIMITER_NAME }; + + // Footer + + public static final byte[] XREF_STR = { 'x', 'r', 'e', 'f' }; + + public static final byte[] TRAILER_STR = { 't', 'r', 'a', 'i', 'l', 'e', 'r' }; + + public static final byte[] STARTXREF_STR = { 's', 't', 'a', 'r', 't', 'x', + 'r', 'e', 'f' }; + + public static final byte[] EOF_STR = { '%', '%', 'E', 'O', 'F' }; + + // objects + + public static final byte[] OBJ_STR = { 'o', 'b', 'j' }; + + public static final byte[] ENDOBJ_STR = { 'e', 'n', 'd', 'o', 'b', 'j' }; + + public static final byte[] DICT_START_STR = { DELIMITER_HEXSTRING_OPEN, + DELIMITER_HEXSTRING_OPEN }; + + public static final byte[] DICT_END_STR = { DELIMITER_HEXSTRING_CLOSE, + DELIMITER_HEXSTRING_CLOSE }; + + public static final byte[] STREAM_STR = { 's', 't', 'r', 'e', 'a', 'm' }; + + public static final byte[] ENDSTREAM_STR = { 'e', 'n', 'd', 's', 't', 'r', + 'e', 'a', 'm' }; + + public static final byte[] NULL_STR = { 'n', 'u', 'l', 'l' }; + + public static final byte[] TRUE_STR = { 't', 'r', 'u', 'e' }; + + public static final byte[] FALSE_STR = { 'f', 'a', 'l', 's', 'e' }; + + // indirect object references + + public static final byte[] REFERENCE_STR = { 'R' }; + + // Dictionary keys + + public static final byte[] SIZE_STR = { 'S', 'i', 'z', 'e' }; + + public static final byte[] PREV_STR = { 'P', 'r', 'e', 'v' }; + + public static final byte[] ROOT_STR = { 'R', 'o', 'o', 't' }; + + public static final byte[] INFO_STR = { 'I', 'n', 'f', 'o' }; + + public static final byte[] LENGTH_STR = { 'L', 'e', 'n', 'g', 't', 'h' }; + +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java new file mode 100644 index 0000000..9a2f738 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java @@ -0,0 +1,1393 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: PDFUtils.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import at.knowcenter.wag.exactparser.ByteArrayUtils; +import at.knowcenter.wag.exactparser.parsing.results.ArrayParseResult; +import at.knowcenter.wag.exactparser.parsing.results.BooleanParseResult; +import at.knowcenter.wag.exactparser.parsing.results.DictionaryParseResult; +import at.knowcenter.wag.exactparser.parsing.results.EOFParseResult; +import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult; +import at.knowcenter.wag.exactparser.parsing.results.HeaderParseResult; +import at.knowcenter.wag.exactparser.parsing.results.HexStringParseResult; +import at.knowcenter.wag.exactparser.parsing.results.IndirectObjectReferenceParseResult; +import at.knowcenter.wag.exactparser.parsing.results.IntegerParseResult; +import at.knowcenter.wag.exactparser.parsing.results.LiteralStringParseResult; +import at.knowcenter.wag.exactparser.parsing.results.NameParseResult; +import at.knowcenter.wag.exactparser.parsing.results.NullParseResult; +import at.knowcenter.wag.exactparser.parsing.results.NumberParseResult; +import at.knowcenter.wag.exactparser.parsing.results.ObjectHeaderParseResult; +import at.knowcenter.wag.exactparser.parsing.results.ObjectParseResult; +import at.knowcenter.wag.exactparser.parsing.results.ParseResult; +import at.knowcenter.wag.exactparser.parsing.results.StartXRefParseResult; +import at.knowcenter.wag.exactparser.parsing.results.StreamParseResult; +import at.knowcenter.wag.exactparser.parsing.results.TrailerParseResult; +import at.knowcenter.wag.exactparser.parsing.results.XRefLineParseResult; +import at.knowcenter.wag.exactparser.parsing.results.XRefSectionParseResult; +import at.knowcenter.wag.exactparser.parsing.results.XRefSubSectionParseResult; + + + +/** + * Abstract class that contains several static utility methods for parsing and + * analyzing PDF documents on the lowest level. + * + *

+ * Most operations require random access to the PDF data (mostly to verify the + * synthax). So the whole PDF document has to be provided as a byte array. The + * term "pdf+index" states a specific position index within this byte array. + *

+ * + * @author wprinz + * + */ +public abstract class PDFUtils +{ + + public static boolean isWhitespace(final byte data) + { + return ByteArrayUtils.contains(PDFNames.WHITESPACE_CHARACTERS, data); + } + + public static boolean isDelimiter(final byte data) + { + return ByteArrayUtils.contains(PDFNames.DELIMITER_CHARACTERS, data); + } + + protected static boolean isRegular(final byte data) + { + return !(isWhitespace(data) || isDelimiter(data)); + } + + /** + * Skips whitespace. + * + *

+ * Skips all whitespace, which may be none, one or multiple whitespace + * characters. + *

+ *

+ * Note that this also skips newline characters (which belong to whitespace as + * well). + *

+ * + * @param data + * The PDF data. + * @param index + * The index. + * @return Returns the index of the first non whitespace character. This may + * be equal to index if no whitespaces were skipped at all. + */ + public static int skipWhitespace(final byte[] data, final int index) + { + int non_whitespace_index = index; + while (isWhitespace(data[non_whitespace_index])) + { + non_whitespace_index++; + } + return non_whitespace_index; + } + + /** + * Skips bytes until whitespace is reached. + * + *

+ * Skips all non whitespace characters, which may be none at all. + *

+ * + * @param data + * The PDF data. + * @param index + * The index. + * @return Returns the index of the first whitespace character. This may be + * equal to index if no non whitespaces were skipped at all. + */ + public static int skipToWhitespace(final byte[] data, final int index) + { + int whitespace_index = index; + while (!isWhitespace(data[whitespace_index])) + { + whitespace_index++; + } + return whitespace_index; + } + + protected static final byte[] LINE_TERMINATOR_CRLF = { + PDFNames.WHITESPACE_CR, PDFNames.WHITESPACE_LF }; + + protected static final byte[] LINE_TERMINATOR_CRALONE = { PDFNames.WHITESPACE_CR }; + + protected static final byte[] LINE_TERMINATOR_LF = { PDFNames.WHITESPACE_LF }; + + public static boolean isNewline(final byte[] data, final int index) + { + if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_LF)) + { + return true; + } + if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRLF)) + { + return true; + } + // although not specified by PDF, some applications use the CR alone as line + // terminator + if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRALONE)) + { + return true; + } + return false; + } + + public static int skipNewline(final byte[] data, final int index) + { + if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_LF)) + { + return index + LINE_TERMINATOR_LF.length; + } + if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRLF)) + { + return index + LINE_TERMINATOR_CRLF.length; + } + // although not specified by PDF, some applications use the CR alone as line + // terminator + if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRALONE)) + { + return index + LINE_TERMINATOR_CRALONE.length; + } + + assert false : "don't call this if you don't expect a newline - call skipWhitespace instead"; + return index; + } + + public static int skipToNewline(final byte[] data, final int index) + { + int current_index = index; + for (;;) + { + if (ByteArrayUtils.compareByteArrays(data, current_index, LINE_TERMINATOR_LF)) + { + return current_index + LINE_TERMINATOR_LF.length; + } + if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRLF)) + { + return index + LINE_TERMINATOR_CRLF.length; + } + // although not specified by PDF, some applications use the CR alone as + // line terminator + if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRALONE)) + { + return index + LINE_TERMINATOR_CRALONE.length; + } + current_index++; + } + } + + /** + * Parses a boolean value. + * + * @param pdf + * The PDF data. + * @param index + * The index. + * @return Returns the result of the parsing operation. + */ + public static BooleanParseResult parseBoolean(final byte[] pdf, + final int index) + { + BooleanParseResult bpr = new BooleanParseResult(); + bpr.start_index = index; + + if (ByteArrayUtils.compareByteArrays(pdf, bpr.start_index, PDFNames.TRUE_STR)) + { + bpr.value = true; + bpr.next_index = bpr.start_index + PDFNames.TRUE_STR.length; + + return bpr; + } + if (ByteArrayUtils.compareByteArrays(pdf, bpr.start_index, PDFNames.FALSE_STR)) + { + bpr.value = false; + bpr.next_index = bpr.start_index + PDFNames.FALSE_STR.length; + + return bpr; + } + + throw new RuntimeException("Boolean couldn't be parsed at index " + index); + } + + public static boolean isSign(final byte data) + { + return data == '+' || data == '-'; + } + + public static boolean isNumeric(final byte data) + { + return '0' <= data && data <= '9'; + } + + /** + * Reads the (positive integer) number from the data. The number must be + * terminated by the end of line. + * + * @param data + * The data. + * @param index + * The index. + * @return Returns the read number. + */ + public static int readNumberFromByteArray(final byte[] data, final int index) + { + NumberParseResult npr = parseNumberFromByteArray(data, index); + + assert npr.number >= 0; + return npr.number; + } + + /** + * Parses an unsigned integer. + * + *

+ * The integer must be a block of successive number characters. It must not be + * preceded by a sign (not even '+'). + *

+ * + * @param pdf + * The PDF data. + * @param index + * The index. + * @return Returns the result of the parsing operation. + */ + public static IntegerParseResult parseUnsignedInteger(final byte[] pdf, + final int index) + { + assert isNumeric(pdf[index]); + + String number = ""; + + int cur_index = index; + while (isNumeric(pdf[cur_index])) + { + + number += (char) pdf[cur_index]; + + cur_index++; + } + + // TODO: make better + int int_value = Integer.parseInt(number); + + assert int_value >= 0; + + IntegerParseResult ipr = new IntegerParseResult(); + ipr.start_index = index; + ipr.next_index = cur_index; + ipr.number = int_value; + return ipr; + } + + /** + * Parses a (potentially) signed integer. + * + *

+ * The integer must be a block of successive number characters. It may be + * preceded by a sign character ('+' or '-'). + *

+ * + * @param pdf + * The PDF data. + * @param index + * The index. + * @return Returns the result of the parsing operation. + */ + public static IntegerParseResult parseInteger(final byte[] pdf, + final int index) + { + assert isSign(pdf[index]) || isNumeric(pdf[index]); + + int sign = +1; + int number_start = index; + if (pdf[index] == '+') + { + sign = +1; + number_start++; + } + else + { + if (pdf[index] == '-') + { + sign = -1; + number_start++; + } + else + { + assert isNumeric(pdf[index]); + } + } + + IntegerParseResult ipr = parseUnsignedInteger(pdf, number_start); + ipr.start_index = index; + ipr.number *= sign; + return ipr; + } + + /** + * Parses an arbitrary number; + * + * @param pdf + * The PDF data. + * @param index + * The index. + * @return Returns the result of the parsing operation. + */ + public static NumberParseResult parseNumberFromByteArray(final byte[] pdf, + int index) + { + String number = ""; + + assert isSign(pdf[index]) || isNumeric(pdf[index]); + + int sign = +1; + if (pdf[index] == '+') + { + sign = +1; + index++; + } + else + { + if (pdf[index] == '-') + { + sign = -1; + index++; + } + else + { + assert isNumeric(pdf[index]); + } + } + + while (isNumeric(pdf[index]) || pdf[index] == '.') + { + + number += (char) pdf[index]; + + index++; + } + + NumberParseResult npr = new NumberParseResult(); + npr.next_index = index; + // TODO: make better + try + { + npr.number = Integer.parseInt(number) * sign; + } + catch (NumberFormatException e) + { + npr.floating = Float.parseFloat(number) * sign; + } + + return npr; + } + + /** + * Searches the last occurrence of the "startxref" entry ... in other words + * starts the search from the end of the document and works reversely. + * + * @param pdf + * The complete PDF file data. + * @return Returns the offset (byte index) of the "startxref" entry. + */ + public static int findLastStartXRef(final byte[] pdf) + { + return ByteArrayUtils.lastIndexOf(pdf, PDFNames.STARTXREF_STR); + } + + /** + * Parses the xref section at pdf+index. + * + *

+ * An xref section starts with 'xref' and contains one or more xref + * sub-sections. + *

+ * + * @param pdf + * The PDF data. + * @param index + * The start index of the xref table. + * @return Returns the result of the parsing operation. + */ + public static XRefSectionParseResult parseXRefSection(final byte[] pdf, + final int index) + { + at.knowcenter.wag.exactparser.parsing.results.XRefSectionParseResult xpr = new XRefSectionParseResult(); + xpr.start_index = index; + + assert ByteArrayUtils.compareByteArrays(pdf, xpr.start_index, PDFNames.XREF_STR); + assert isNewline(pdf, xpr.start_index + PDFNames.XREF_STR.length); + + int cur_index = skipWhitespace(pdf, xpr.start_index + PDFNames.XREF_STR.length); + // skipNewline(pdf, xpr.start_index + PDFNames.XREF_STR.length); + + for (;;) + { + // trailer ends the xref section. + if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.TRAILER_STR)) + { + break; + } + + // no trailer ==> another xref section + + XRefSubSectionParseResult sspr = parseXRefSubSection(pdf, cur_index); + xpr.appendXRefSubSection(sspr); + + cur_index = sspr.next_index; + } + + xpr.next_index = cur_index; + assert ByteArrayUtils.compareByteArrays(pdf, xpr.next_index, PDFNames.TRAILER_STR); + + return xpr; + } + + /** + * Parses a xref sub-section. + * + * @param pdf + * The PDF data. + * @param index + * The index. + * @return Returns the result of the parsing operation. + */ + public static XRefSubSectionParseResult parseXRefSubSection(final byte[] pdf, + final int index) + { + XRefSubSectionParseResult sspr = new XRefSubSectionParseResult(); + sspr.start_index = index; + + NumberParseResult start_obj_num_npr = parseNumberFromByteArray(pdf, sspr.start_index); + sspr.start_obj_number = start_obj_num_npr.number; + assert sspr.start_obj_number >= 0; + + assert isWhitespace(pdf[start_obj_num_npr.next_index]); + int num_obj_index = skipWhitespace(pdf, start_obj_num_npr.next_index); + + NumberParseResult num_obj_npr = parseNumberFromByteArray(pdf, num_obj_index); + sspr.num_objects = num_obj_npr.number; + + // assert isNewline(pdf, num_obj_npr.next_index); + assert isWhitespace(pdf[num_obj_npr.next_index]); + int start_of_line = skipWhitespace(pdf, num_obj_npr.next_index); + // skipNewline(pdf, num_obj_npr.next_index); + + for (int i = 0; i < sspr.num_objects; i++) + { + final int cur_object_number = sspr.start_obj_number + i; + + XRefLineParseResult lpr = parseXrefLine(pdf, start_of_line); + sspr.appendXRefLine(lpr); + + // System.out.println("xref line of object " + (oc.start_obj_number + i) + + // " at " + lpr.start_index + ": " + lpr.object_offset + " " + + // lpr.generation_number + " " + (char) lpr.object_usage); + + if (lpr.object_usage == 'n') + { + // check the line - this simple check may make problems with object + // streams and xref streams + ObjectHeaderParseResult ohpr = parseObjectHeader(pdf, lpr.object_offset); + assert ohpr.object_number == cur_object_number; + assert ohpr.generation_number == lpr.generation_number; + } + + start_of_line = lpr.next_index; + } + + sspr.next_index = start_of_line; + return sspr; + } + + /** + * Parses a single 20 bytes xref line at pdf+index. + * + * @param pdf + * The PDF data. + * @param index + * The index. + * @return Returns the result of the parsing operation. + */ + public static XRefLineParseResult parseXrefLine(final byte[] pdf, + final int index) + { + XRefLineParseResult lpr = new XRefLineParseResult(); + + lpr.start_index = index; + + IntegerParseResult object_offset_ipr = parseUnsignedInteger(pdf, lpr.start_index); + lpr.object_offset = object_offset_ipr.number; + assert lpr.object_offset >= 0; + assert lpr.object_offset < pdf.length; + assert object_offset_ipr.next_index == lpr.start_index + 10; + + assert pdf[object_offset_ipr.next_index] == PDFNames.WHITESPACE_SP; // Standard + // explicitely + // says 1 + // single + // SPACE + int generation_number_index = object_offset_ipr.next_index + 1; + + IntegerParseResult generation_number_ipr = parseUnsignedInteger(pdf, generation_number_index); + lpr.generation_number = generation_number_ipr.number; + assert generation_number_ipr.next_index == lpr.start_index + 16; + + assert pdf[generation_number_ipr.next_index] == PDFNames.WHITESPACE_SP; + int usage_index = generation_number_ipr.next_index + 1; + + lpr.object_usage = pdf[usage_index]; + assert lpr.object_usage == 'n' || lpr.object_usage == 'f'; + + if (pdf[usage_index + 1] == PDFNames.WHITESPACE_SP) + { + assert pdf[usage_index + 2] == PDFNames.WHITESPACE_CR || pdf[usage_index + 2] == PDFNames.WHITESPACE_LF; + } + else + { + assert pdf[usage_index + 1] == PDFNames.WHITESPACE_CR; + assert pdf[usage_index + 2] == PDFNames.WHITESPACE_LF; + } + + lpr.next_index = usage_index + 3; + + assert lpr.next_index == lpr.start_index + 20; + + return lpr; + } + + public static int indexOfName(final byte[] pdf, List names, + byte[] sought) + { + for (int i = 0; i < names.size(); i++) + { + NameParseResult name = (NameParseResult) names.get(i); + if (ByteArrayUtils.compareByteArrays(pdf, name.name_start_index, sought)) + { + return i; + } + } + return -1; + } + + public static TrailerParseResult parseTrailer(final byte[] pdf, + final int index) + { + TrailerParseResult tpr = new TrailerParseResult(); + tpr.start_index = index; + tpr.has_predecessor = false; + + assert ByteArrayUtils.compareByteArrays(pdf, tpr.start_index, PDFNames.TRAILER_STR); + + // assert isWhitespace(pdf[tpr.start_index + PDFNames.TRAILER_STR.length]); + tpr.contents_index = skipWhitespace(pdf, tpr.start_index + PDFNames.TRAILER_STR.length); + + int trailer_dict_index = skipWhitespace(pdf, tpr.contents_index); + + assert ByteArrayUtils.compareByteArrays(pdf, trailer_dict_index, PDFNames.DICT_START_STR); + + tpr.dpr = parseDictionary(pdf, trailer_dict_index); + + int cur_index = tpr.dpr.next_index; + + int info_index = indexOfName(pdf, tpr.dpr.names, PDFNames.INFO_STR); + if (info_index >= 0) + { + tpr.info = (IndirectObjectReferenceParseResult) tpr.dpr.values.get(info_index); + } + + int root_index = indexOfName(pdf, tpr.dpr.names, PDFNames.ROOT_STR); + if (root_index >= 0) + { + tpr.root = (IndirectObjectReferenceParseResult) tpr.dpr.values.get(root_index); + } + + tpr.size = ((NumberParseResult) tpr.dpr.values.get(indexOfName(pdf, tpr.dpr.names, PDFNames.SIZE_STR))).number; + + int prev_index = indexOfName(pdf, tpr.dpr.names, PDFNames.PREV_STR); + if (prev_index >= 0) + { + tpr.has_predecessor = true; + tpr.setPrev(((NumberParseResult) tpr.dpr.values.get(prev_index)).number); + } + + // + // int cur_index = skipWhitespace(pdf, trailer_dict_index + + // PDFNames.DICT_START_STR.length); + // for (;;) { + // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, + // PDFNames.DICT_END_STR)) { + // cur_index += PDFNames.DICT_END_STR.length; + // break; + // } + // + // assert pdf[cur_index] == PDFNames.DELIMITER_NAME; + // cur_index++; + // + // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.INFO_STR)) + // { + // assert isWhitespace(pdf[cur_index + PDFNames.INFO_STR.length]); + // int ir_index = skipWhitespace(pdf, cur_index + PDFNames.INFO_STR.length); + // + // IndirectObjectReferenceParseResult iorpr = + // parseIndirectObjectReference(pdf, ir_index); + // tpr.info = iorpr; + // + // cur_index = skipWhitespace(pdf, iorpr.next_index); + // continue; + // } + // + // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.ROOT_STR)) + // { + // assert isWhitespace(pdf[cur_index + PDFNames.ROOT_STR.length]); + // int ir_index = skipWhitespace(pdf, cur_index + PDFNames.ROOT_STR.length); + // + // IndirectObjectReferenceParseResult iorpr = + // parseIndirectObjectReference(pdf, ir_index); + // tpr.root = iorpr; + // + // cur_index = skipWhitespace(pdf, iorpr.next_index); + // continue; + // } + // + // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.SIZE_STR)) + // { + // assert isWhitespace(pdf[cur_index + PDFNames.SIZE_STR.length]); + // int size_index = skipWhitespace(pdf, cur_index + + // PDFNames.SIZE_STR.length); + // + // NumberParseResult npr = parseNumberFromByteArray(pdf, size_index); + // tpr.size = npr.number; + // assert tpr.size > 0; + // + // cur_index = skipWhitespace(pdf, npr.next_index); + // continue; + // } + // + // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.PREV_STR)) + // { + // assert isWhitespace(pdf[cur_index + PDFNames.PREV_STR.length]); + // int prev_index = skipWhitespace(pdf, cur_index + + // PDFNames.PREV_STR.length); + // + // NumberParseResult npr = parseNumberFromByteArray(pdf, prev_index); + // tpr.has_predecessor = true; + // tpr.setPrev(npr.number); + // assert tpr.getPrev() >= 0; + // assert tpr.getPrev() < pdf.length; + // + // assert ByteArrayUtils.compareByteArrays(pdf, tpr.getPrev(), + // PDFNames.XREF_STR); + // + // cur_index = skipWhitespace(pdf, npr.next_index); + // continue; + // } + // + // // unrecognized type + // // skip to next delimiter + // // TODO: this will not work with nested dicts. + // while (pdf[cur_index] != PDFNames.DELIMITER_NAME) { + // cur_index++; + // } + // } + + tpr.contents_end_index = cur_index; + tpr.next_index = skipWhitespace(pdf, tpr.contents_end_index); + + assert ByteArrayUtils.compareByteArrays(pdf, tpr.next_index, PDFNames.STARTXREF_STR); + return tpr; + } + + /** + * Parses the startxref section at pdf+index. + * + * @param pdf + * The complete PDF file data. + * @param index + * The index of the startxref section. + * @return Returns the retsult of the parsing operation. + */ + public static StartXRefParseResult parseStartXRef(final byte[] pdf, + final int index) + { + StartXRefParseResult spr = new StartXRefParseResult(); + spr.next_index = index; + + assert ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.STARTXREF_STR); + assert isNewline(pdf, index + PDFNames.STARTXREF_STR.length); + + int index_of_number = skipWhitespace(pdf, index + PDFNames.STARTXREF_STR.length); + // skipNewline(pdf, index + PDFNames.STARTXREF_STR.length); + NumberParseResult npr = parseNumberFromByteArray(pdf, index_of_number); + spr.xref_index = npr.number; + + assert isNewline(pdf, npr.next_index); + spr.next_index = skipWhitespace(pdf, npr.next_index); + // skipNewline(pdf, npr.next_index); + + assert ByteArrayUtils.compareByteArrays(pdf, spr.next_index, PDFNames.EOF_STR); + + assert spr.xref_index >= 0; + assert spr.xref_index < pdf.length; + + // A linearized document sets the startxref value of the first page's footer + // to 0. + if (spr.xref_index != 0) + { + assert ByteArrayUtils.compareByteArrays(pdf, spr.xref_index, PDFNames.XREF_STR); + } + + return spr; + } + + /** + * Parses the End Of File (EOF) marker at pdf+index. + * + * @param pdf + * The PDF data. + * @param index + * The index where to start the parsing. + * @return Returns the result of the parsing operation. + */ + public static EOFParseResult parseEOF(final byte[] pdf, final int index) + { + EOFParseResult eofpr = new EOFParseResult(); + eofpr.start_index = index; + + assert ByteArrayUtils.compareByteArrays(pdf, eofpr.start_index, PDFNames.EOF_STR); + + eofpr.eof_end_index = eofpr.start_index + PDFNames.EOF_STR.length; + + // Note: The EOF marker is not necessarily terminated with a + // newline. + + // perhaps explicitely determine a newline. + + eofpr.next_index = eofpr.eof_end_index; + + return eofpr; + } + + public static boolean isIndirectObjectReference(final byte[] pdf, + final int index) + { + IndirectObjectReferenceParseResult iorpr = new IndirectObjectReferenceParseResult(); + iorpr.ior = new IndirectObjectReference(); + iorpr.start_index = index; + + if (!PDFUtils.isNumeric(pdf[iorpr.start_index])) + { + return false; + } + NumberParseResult object_number_npr = parseNumberFromByteArray(pdf, iorpr.start_index); + iorpr.ior.object_number = object_number_npr.number; + if (iorpr.ior.object_number <= 0) + { + return false; + } + + if (!isWhitespace(pdf[object_number_npr.next_index])) + { + return false; + } + int generation_number_index = skipWhitespace(pdf, object_number_npr.next_index); + + if (!PDFUtils.isNumeric(pdf[generation_number_index])) + { + return false; + } + NumberParseResult generation_number_npr = parseNumberFromByteArray(pdf, generation_number_index); + iorpr.ior.generation_number = generation_number_npr.number; + if (iorpr.ior.generation_number < 0) + { + return false; + } + + if (!isWhitespace(pdf[generation_number_npr.next_index])) + { + return false; + } + int R_index = skipWhitespace(pdf, generation_number_npr.next_index); + + if (!ByteArrayUtils.compareByteArrays(pdf, R_index, PDFNames.REFERENCE_STR)) + { + return false; + } + + iorpr.next_index = R_index + PDFNames.REFERENCE_STR.length; + + return true; + } + + /** + * Parses an indirect object reference. + * + * @param pdf + * The PDF data. + * @param index + * The index. + * @return Returns the result of the parsing operation. + */ + public static IndirectObjectReferenceParseResult parseIndirectObjectReference( + final byte[] pdf, final int index) + { + + assert isIndirectObjectReference(pdf, index); + + IndirectObjectReferenceParseResult iorpr = new IndirectObjectReferenceParseResult(); + iorpr.ior = new IndirectObjectReference(); + iorpr.start_index = index; + + NumberParseResult object_number_npr = parseNumberFromByteArray(pdf, iorpr.start_index); + iorpr.ior.object_number = object_number_npr.number; + assert iorpr.ior.object_number > 0; + + assert isWhitespace(pdf[object_number_npr.next_index]); + int generation_number_index = skipWhitespace(pdf, object_number_npr.next_index); + + NumberParseResult generation_number_npr = parseNumberFromByteArray(pdf, generation_number_index); + iorpr.ior.generation_number = generation_number_npr.number; + assert iorpr.ior.generation_number >= 0; + + assert isWhitespace(pdf[generation_number_npr.next_index]); + int R_index = skipWhitespace(pdf, generation_number_npr.next_index); + + assert ByteArrayUtils.compareByteArrays(pdf, R_index, PDFNames.REFERENCE_STR); + + iorpr.next_index = R_index + PDFNames.REFERENCE_STR.length; + + return iorpr; + } + + /** + * Parses the object header at pdf+index. + * + * @param pdf + * The PDF data. + * @param index + * The index. + * @return Returns the result of the parsing operation. + */ + public static ObjectHeaderParseResult parseObjectHeader(final byte[] pdf, + final int index) + { + ObjectHeaderParseResult ohpr = new ObjectHeaderParseResult(); + + ohpr.start_index = index; + + NumberParseResult object_number_npr = parseNumberFromByteArray(pdf, ohpr.start_index); + ohpr.object_number = object_number_npr.number; + assert ohpr.object_number > 0; + + assert isWhitespace(pdf[object_number_npr.next_index]); + int generation_number_index = skipWhitespace(pdf, object_number_npr.next_index); + + NumberParseResult generation_number_npr = parseNumberFromByteArray(pdf, generation_number_index); + ohpr.generation_number = generation_number_npr.number; + assert ohpr.generation_number >= 0; + + assert isWhitespace(pdf[generation_number_npr.next_index]); + int obj_index = skipWhitespace(pdf, generation_number_npr.next_index); + + assert ByteArrayUtils.compareByteArrays(pdf, obj_index, PDFNames.OBJ_STR); + + // not all pdfwriters make a newline after obj... + // assert isNewline(pdf, obj_index + PDFNames.OBJ_STR.length); + // ohpr.next_index = skipNewline(pdf, obj_index + PDFNames.OBJ_STR.length); + ohpr.next_index = skipWhitespace(pdf, obj_index + PDFNames.OBJ_STR.length); + + return ohpr; + } + + public static ObjectParseResult parseObject(final byte[] pdf, final int index) + { + ObjectParseResult opr = new ObjectParseResult(); + opr.start_index = index; + + opr.header = parseObjectHeader(pdf, opr.start_index); + opr.content_index = opr.header.next_index; + + int cur_index = skipWhitespace(pdf, opr.content_index); + + opr.object = parseUnknownObject(pdf, cur_index); + + cur_index = skipWhitespace(pdf, opr.object.next_index); + + opr.end_of_content_index = cur_index; + assert ByteArrayUtils.compareByteArrays(pdf, opr.end_of_content_index, PDFNames.ENDOBJ_STR); + + cur_index = opr.end_of_content_index + PDFNames.ENDOBJ_STR.length; + + opr.next_index = cur_index; + //assert isNewline(pdf, cur_index); + //opr.next_index = skipNewline(pdf, cur_index); + + return opr; + } + + public static ParseResult parseUnknownObject(final byte[] pdf, final int index) + { + if (ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.DICT_START_STR)) + { + DictionaryParseResult dpr = parseDictionary(pdf, index); + + int possible_stream_index = skipWhitespace(pdf, dpr.next_index); + if (ByteArrayUtils.compareByteArrays(pdf, possible_stream_index, PDFNames.STREAM_STR)) + { + return parseStream(pdf, possible_stream_index, dpr); + } + + return dpr; + } + + if (ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.NULL_STR)) + { + return parseNull(pdf, index); + } + + if (ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.TRUE_STR) || ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.FALSE_STR)) + { + return parseBoolean(pdf, index); + } + + final byte first_byte = pdf[index]; + + if (isNumeric(first_byte) || isSign(first_byte)) + { + + // try to parse a Indirect reference first - if this fails, parse a number + if (isIndirectObjectReference(pdf, index)) + { + return parseIndirectObjectReference(pdf, index); + } + + return parseNumberFromByteArray(pdf, index); + } + + ParseResult pr = null; + + switch (first_byte) + { + case PDFNames.DELIMITER_STRING_OPEN: + pr = parseLiteralString(pdf, index); + break; + case PDFNames.DELIMITER_HEXSTRING_OPEN: + pr = parseHexString(pdf, index); + break; + case PDFNames.DELIMITER_ARRAY_OPEN: + pr = parseArray(pdf, index); + break; + case PDFNames.DELIMITER_NAME: + pr = parseName(pdf, index); + break; + default: + throw new RuntimeException("Unknown first_byte when parsing an unknown object at index=" + index + "."); + // assert false : "nyi or invalid char"; + } + assert pr != null; + + return pr; + } + + /** + * Parses a literal string. + * + *

+ * A literal string is a string of ASCII characters enclosed by '(' and ')'. + * Balanced pairs of '(' and ')' are allowed within the string. Unbalanced '(' + * or ')' must be escaped as '\(' or '\)'. + *

+ * + * @param pdf + * The PDF data. + * @param index + * The index. + * @return Returns the result of the parsing operation. + */ + public static LiteralStringParseResult parseLiteralString(final byte[] pdf, + final int index) + { + LiteralStringParseResult lspr = new LiteralStringParseResult(); + lspr.start_index = index; + + assert pdf[lspr.start_index] == PDFNames.DELIMITER_STRING_OPEN; + + lspr.content_start_index = lspr.start_index + 1; + + int cur_index = lspr.content_start_index; + int parenthesis_stack = 0; + for (;;) + { + if (pdf[cur_index] == '\\' && (pdf[cur_index + 1] == PDFNames.DELIMITER_STRING_CLOSE || pdf[cur_index + 1] == PDFNames.DELIMITER_STRING_OPEN)) + { + cur_index += 2; + continue; + } + if (pdf[cur_index] == PDFNames.DELIMITER_STRING_OPEN) + { + parenthesis_stack++; + } + if (pdf[cur_index] == PDFNames.DELIMITER_STRING_CLOSE) + { + assert parenthesis_stack >= 0; + + if (parenthesis_stack == 0) + { + break; + } + + assert parenthesis_stack > 0; + parenthesis_stack--; + + } + + cur_index++; + } + + lspr.content_end_index = cur_index; + assert pdf[lspr.content_end_index] == PDFNames.DELIMITER_STRING_CLOSE; + + lspr.next_index = lspr.content_end_index + 1; + + return lspr; + } + + protected static boolean isHex(final byte data) + { + return isNumeric(data) || ('a' <= data && data <= 'f') || ('A' <= data && data <= 'f'); + } + + /** + * Parses a hexadecimal string. + * + * @param pdf + * The PDF data. + * @param index + * The index. + * @return Returns the result of the parsing operation. + */ + public static HexStringParseResult parseHexString(final byte[] pdf, + final int index) + { + HexStringParseResult hspr = new HexStringParseResult(); + hspr.start_index = index; + + assert pdf[hspr.start_index] == PDFNames.DELIMITER_HEXSTRING_OPEN; + + hspr.content_start_index = hspr.start_index + 1; + + int cur_index = hspr.content_start_index; + while (isHex(pdf[cur_index]) || isWhitespace(pdf[cur_index])) + { + cur_index++; + } + + hspr.content_end_index = cur_index; + assert pdf[hspr.content_end_index] == PDFNames.DELIMITER_HEXSTRING_CLOSE; + + hspr.next_index = hspr.content_end_index + 1; + + return hspr; + } + + public static ArrayParseResult parseArray(final byte[] pdf, final int index) + { + ArrayParseResult apr = new ArrayParseResult(); + apr.start_index = index; + assert pdf[apr.start_index] == PDFNames.DELIMITER_ARRAY_OPEN; + + apr.content_start_index = apr.start_index + 1; + + apr.elements = new ArrayList(); + + int cur_index = skipWhitespace(pdf, apr.content_start_index); + for (;;) + { + if (pdf[cur_index] == PDFNames.DELIMITER_ARRAY_CLOSE) + { + break; + } + + ParseResult pr = parseUnknownObject(pdf, cur_index); + apr.elements.add(pr); + + cur_index = skipWhitespace(pdf, pr.next_index); + } + assert pdf[cur_index] == PDFNames.DELIMITER_ARRAY_CLOSE; + + apr.content_end_index = cur_index; + assert pdf[apr.content_end_index] == PDFNames.DELIMITER_ARRAY_CLOSE; + + apr.next_index = apr.content_end_index + 1; + return apr; + } + + /** + * Parses a PDF Name. + * + * @param pdf + * The PDF data. + * @param index + * The index. + * @return Returns the result of this parsing operation. + */ + public static NameParseResult parseName(final byte[] pdf, final int index) + { + NameParseResult npr = new NameParseResult(); + npr.start_index = index; + + assert pdf[npr.start_index] == PDFNames.DELIMITER_NAME; + + npr.name_start_index = npr.start_index + 1; + + assert isRegular(pdf[npr.name_start_index]); + + int cur_index = npr.name_start_index; + while (isRegular(pdf[cur_index])) + { + cur_index++; + } + assert !isRegular(pdf[cur_index]); + + npr.next_index = cur_index; + + return npr; + } + + public static DictionaryParseResult parseDictionary(final byte[] pdf, + final int index) + { + DictionaryParseResult dpr = new DictionaryParseResult(); + dpr.start_index = index; + + assert ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.DICT_START_STR); + + dpr.content_start_index = dpr.start_index + PDFNames.DICT_START_STR.length; + + dpr.names = new ArrayList(); + dpr.values = new ArrayList(); + + int cur_index = skipWhitespace(pdf, dpr.content_start_index); + for (;;) + { + if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.DICT_END_STR)) + { + break; + } + + NameParseResult npr = parseName(pdf, cur_index); + dpr.names.add(npr); + + cur_index = npr.next_index; + cur_index = skipWhitespace(pdf, cur_index); + + ParseResult pr = parseUnknownObject(pdf, cur_index); + dpr.values.add(pr); + + cur_index = pr.next_index; + cur_index = skipWhitespace(pdf, cur_index); + } + + dpr.content_end_index = cur_index; + assert ByteArrayUtils.compareByteArrays(pdf, dpr.content_end_index, PDFNames.DICT_END_STR); + dpr.next_index = dpr.content_end_index + PDFNames.DICT_END_STR.length; + + return dpr; + } + + /** + * Parses a stream. + * + * @param pdf + * The PDF data. + * @param index + * The index. + * @param dpr + * The DictionaryParseResult of the stream's dictionary. This + * dictionary must precede the stream keyword. Usually this is + * provided in the stream object's dictionary via the /Length field. + * @return Returns the result of this parsing operation. + */ + public static StreamParseResult parseStream(final byte[] pdf, + final int index, final DictionaryParseResult dpr) + { + StreamParseResult spr = new StreamParseResult(); + spr.stream_dictionary = dpr; + spr.start_index = spr.stream_dictionary.start_index; + spr.stream_start_index = index; + assert ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.STREAM_STR); + + // assert that the provided dictionary really belongs to this stream + assert spr.stream_start_index == skipWhitespace(pdf, spr.stream_dictionary.next_index); + + // see PDF Spec 1.4 chapter 3.2.7 + assert pdf[spr.stream_start_index + PDFNames.STREAM_STR.length] == PDFNames.WHITESPACE_LF || (pdf[spr.stream_start_index + PDFNames.STREAM_STR.length] == PDFNames.WHITESPACE_CR && pdf[spr.stream_start_index + PDFNames.STREAM_STR.length + 1] == PDFNames.WHITESPACE_LF); + spr.content_start_index = skipNewline(pdf, spr.stream_start_index + PDFNames.STREAM_STR.length); + + int length = -1; + for (int i = 0; i < spr.stream_dictionary.names.size(); i++) + { + NameParseResult name = (NameParseResult) spr.stream_dictionary.names.get(i); + if (ByteArrayUtils.compareByteArrays(pdf, name.name_start_index, PDFNames.LENGTH_STR)) + { + ParseResult pr = (ParseResult) spr.stream_dictionary.values.get(i); + NumberParseResult npr = null; + if (pr instanceof IndirectObjectReferenceParseResult) + { + System.out.println("An object stream with indirect length - cannot parse this instantly - parse later again."); + spr.content_end_index = -1; + spr.next_index = -1; + return spr; + + } + else + { + npr = (NumberParseResult) pr; + } + assert npr != null; + + length = npr.number; + break; + } + + } + assert length >= 0; + + spr.content_end_index = spr.content_start_index + length; + + int endstr_index = spr.content_end_index; + if (isNewline(pdf, endstr_index)) + { + endstr_index = skipWhitespace(pdf, endstr_index); + } + assert ByteArrayUtils.compareByteArrays(pdf, endstr_index, PDFNames.ENDSTREAM_STR); + + spr.next_index = endstr_index + PDFNames.ENDSTREAM_STR.length; + + return spr; + } + + public static NullParseResult parseNull(final byte[] pdf, final int index) + { + NullParseResult npr = new NullParseResult(); + npr.start_index = index; + + assert ByteArrayUtils.compareByteArrays(pdf, npr.start_index, PDFNames.NULL_STR); + + npr.next_index = npr.start_index + PDFNames.NULL_STR.length; + + return npr; + } + + public static int getObjectOffsetFromXRefByIndirectObjectReference( + XRefSectionParseResult xpr, IndirectObjectReference ior) + { + Iterator it = xpr.xref_subsections.iterator(); + while (it.hasNext()) + { + XRefSubSectionParseResult section = (XRefSubSectionParseResult) it.next(); + + for (int i = 0; i < section.xref_lines.size(); i++) + { + if (section.start_obj_number + i == ior.object_number) + { + XRefLineParseResult lpr = (XRefLineParseResult) section.xref_lines.get(i); + return lpr.object_offset; + } + } + } + + return -1; + } + + public static HeaderParseResult parseHeader(final byte[] pdf, final int index) + { + HeaderParseResult hpr = new HeaderParseResult(); + hpr.start_index = index; + + assert pdf[hpr.start_index] == PDFNames.COMMENT; + + assert ByteArrayUtils.compareByteArrays(pdf, hpr.start_index + 1, PDFNames.PDF_VERSION_STR); + + hpr.major_index = hpr.start_index + 1 + PDFNames.PDF_VERSION_STR.length; + + IntegerParseResult major_ipr = parseUnsignedInteger(pdf, hpr.major_index); + hpr.major = major_ipr.number; + assert hpr.major >= 1; + + assert pdf[major_ipr.next_index] == PDFNames.PDF_VERSION_SEPARATOR; + + hpr.minor_index = major_ipr.next_index + 1; + + IntegerParseResult minor_ipr = parseUnsignedInteger(pdf, hpr.minor_index); + hpr.minor = minor_ipr.number; + assert hpr.minor >= 0; + + assert isWhitespace(pdf[minor_ipr.next_index]); + hpr.binary_characters_index = skipWhitespace(pdf, minor_ipr.next_index); + + assert pdf[hpr.binary_characters_index] == PDFNames.COMMENT; + + hpr.next_index = skipToNewline(pdf, hpr.binary_characters_index); + return hpr; + } + + /** + * Parses a PDF footer. + * + *

+ * A PDF footer starts with the xref, followed by the trailer, the startxref + * and the EOF marker. + *

+ * + * @param pdf + * The PDF data. + * @param index + * The index. + * @return Returns the result of the parsing operation. + * + * @see FooterParseResult + */ + public static FooterParseResult parseFooter(final byte[] pdf, final int index) + { + FooterParseResult fpr = new FooterParseResult(); + fpr.start_index = index; + + fpr.xpr = PDFUtils.parseXRefSection(pdf, fpr.start_index); + + fpr.tpr = PDFUtils.parseTrailer(pdf, fpr.xpr.next_index); + + fpr.sxpr = PDFUtils.parseStartXRef(pdf, fpr.tpr.next_index); + + fpr.eofpr = PDFUtils.parseEOF(pdf, fpr.sxpr.next_index); + + fpr.next_index = fpr.eofpr.next_index; + return fpr; + } + +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ArrayParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ArrayParseResult.java new file mode 100644 index 0000000..53d2838 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ArrayParseResult.java @@ -0,0 +1,34 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: ArrayParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + +import java.util.List; + +/** + * The result of parsing a hex string. + * + * @see at.knowcenter.wag.exactparser.parsing.results.LiteralStringParseResult + * + * @author wprinz + */ +public class ArrayParseResult extends ContainerParseResult { + + public List elements = null; + + +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/BooleanParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/BooleanParseResult.java new file mode 100644 index 0000000..5b6c31d --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/BooleanParseResult.java @@ -0,0 +1,30 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: BooleanParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + +/** + * Parse result of parsing a boolean value. + * + * @author wprinz + */ +public class BooleanParseResult extends ParseResult +{ + + public boolean value = false; + +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ContainerParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ContainerParseResult.java new file mode 100644 index 0000000..3ca8dc2 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ContainerParseResult.java @@ -0,0 +1,37 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: ContainerParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + +/** + * Base class of container parse results. + * + *

+ * Containers are types that include some content. + * E.g. literal strings include string data as content, + * arrays include elements as content etc. + *

+ * + * @author wprinz + */ +public class ContainerParseResult extends ParseResult { + + public int content_start_index = -1; + public int content_end_index = -1; + + +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/DictionaryParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/DictionaryParseResult.java new file mode 100644 index 0000000..b976bd2 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/DictionaryParseResult.java @@ -0,0 +1,33 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: DictionaryParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + +import java.util.List; + +/** + * The result of parsing a dictionary. + * + * @author wprinz + */ +public class DictionaryParseResult extends ContainerParseResult +{ + + public List names = null; + + public List values = null; +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/EOFParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/EOFParseResult.java new file mode 100644 index 0000000..19d864d --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/EOFParseResult.java @@ -0,0 +1,39 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: EOFParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + +/** + * The result of parsing the End Of File marker. + * + * @author wprinz + */ +public class EOFParseResult extends ParseResult +{ + + /** + * The index of the byte after the EOF marker. + * + *

+ * A newline is not necessary after the EOF marker, but if it is present it will be considered + * as part of it. + * So eof_end_index marks this newline. + * If eof_end_index == next_index, then no new line is present. + *

+ */ + public int eof_end_index = -1; +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/FooterParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/FooterParseResult.java new file mode 100644 index 0000000..d8eb2e1 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/FooterParseResult.java @@ -0,0 +1,45 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: FooterParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + + +/** + * The result of parsing a PDF footer block. + * + *

+ * A PDF footer block starts with the xref table followed by the trailer, the + * startxref and finally the EOF marker. Usually the footer should be at the end + * of the file. All object offsets in the footer's xref table should be before + * the footer itself. Nevertheless, there are PDF Writers (e.g. Microsoft Word) + * that put the footer at the beginning of the document so that all indirect + * objects are after the EOF marker. + *

+ * + * @author wprinz + */ +public class FooterParseResult extends ParseResult +{ + + public StartXRefParseResult sxpr = null; + + public EOFParseResult eofpr = null; + + public XRefSectionParseResult xpr = null; + + public TrailerParseResult tpr = null; +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HeaderParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HeaderParseResult.java new file mode 100644 index 0000000..893fa07 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HeaderParseResult.java @@ -0,0 +1,40 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: HeaderParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + +/** + * The result of parsing the PDF header. + * + *

+ * The header contains the PDF version and is usually followed by some binary + * characers. + *

+ * + * @author wprinz + */ +public class HeaderParseResult extends ParseResult +{ + public int major_index = -1; + public int minor_index = -1; + + public int major = -1; + public int minor = -1; + + public int binary_characters_index = -1; + +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HexStringParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HexStringParseResult.java new file mode 100644 index 0000000..fdaaf66 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HexStringParseResult.java @@ -0,0 +1,28 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: HexStringParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + +/** + * The result of parsing a hex string. + * + * @see at.knowcenter.wag.exactparser.parsing.results.LiteralStringParseResult + * + * @author wprinz + */ +public class HexStringParseResult extends ContainerParseResult { +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IndirectObjectReferenceParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IndirectObjectReferenceParseResult.java new file mode 100644 index 0000000..d839004 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IndirectObjectReferenceParseResult.java @@ -0,0 +1,36 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: IndirectObjectReferenceParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + +import at.knowcenter.wag.exactparser.parsing.IndirectObjectReference; + +/** + * The ParseResult of parsing an indirect object reference. + * + * @author wprinz + */ +public class IndirectObjectReferenceParseResult extends ParseResult { + + public IndirectObjectReference ior; + + //@Override + public String toString() + { + return ior.toString() + " R"; + } +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IntegerParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IntegerParseResult.java new file mode 100644 index 0000000..5eec5e5 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IntegerParseResult.java @@ -0,0 +1,28 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: IntegerParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + +/** + * @author wprinz + */ +public class IntegerParseResult extends ParseResult +{ + + public int number; + +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/LiteralStringParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/LiteralStringParseResult.java new file mode 100644 index 0000000..0c7872d --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/LiteralStringParseResult.java @@ -0,0 +1,29 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: LiteralStringParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + +/** + * The result of parsing a simple string (ASCII string). + * + * @see at.knowcenter.wag.exactparser.parsing.results.HexStringParseResult + * + * @author wprinz + */ +public class LiteralStringParseResult extends ContainerParseResult { + +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NameParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NameParseResult.java new file mode 100644 index 0000000..9a8aa39 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NameParseResult.java @@ -0,0 +1,27 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: NameParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + +/** + * @author wprinz + */ +public class NameParseResult extends ParseResult { + + public int name_start_index = -1; + +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NullParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NullParseResult.java new file mode 100644 index 0000000..fd6e57d --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NullParseResult.java @@ -0,0 +1,26 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: NullParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + +/** + * The result of parsing a "null". + * + * @author wprinz + */ +public class NullParseResult extends ParseResult { +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NumberParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NumberParseResult.java new file mode 100644 index 0000000..a6882c1 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NumberParseResult.java @@ -0,0 +1,33 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: NumberParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + +/** + * The ParseResult of parsing an integer number. + * + * @author wprinz + */ +public class NumberParseResult extends ParseResult { + /** + * The (signed) integer number. + */ + public int number; + + // TODO: make better + public float floating; +} \ No newline at end of file diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectHeaderParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectHeaderParseResult.java new file mode 100644 index 0000000..5a2265a --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectHeaderParseResult.java @@ -0,0 +1,43 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: ObjectHeaderParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + +/** + * The ParseResult of a parsing an object header. + * + *

+ * Note that this information regards only the object header and not the + * contents of the object itself. (meaning: next points to the contents and not + * to the end of the whole object) + *

+ * + * @author Administrator + */ +public class ObjectHeaderParseResult extends ParseResult { + + /** + * The object's object number. + */ + public int object_number = -1; + + /** + * The object's generation number. + */ + public int generation_number = -1; + +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectParseResult.java new file mode 100644 index 0000000..4d9c224 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectParseResult.java @@ -0,0 +1,42 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: ObjectParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + + +/** + * The ParseResult of parsing an indirect object. + * @author wprinz + */ +public class ObjectParseResult extends ParseResult { + + public int content_index = -1; + public int end_of_content_index = -1; + + public ObjectHeaderParseResult header = null; + +/* enum ObjectType + { + UNKNOWN_TO_PARSER, + OBJ_DICTIONARY + }; + + public ObjectType object_type = ObjectType.UNKNOWN_TO_PARSER; + */ + public ParseResult object = null; + +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ParseResult.java new file mode 100644 index 0000000..d7ad4e9 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ParseResult.java @@ -0,0 +1,42 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: ParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + +/** + * Base class of all parse results. + * + * @author wprinz + */ +public class ParseResult { + + /** + * The start index, where the parser started its work and where the parsed + * entity begins. + */ + public int start_index = -1; + + /** + * The index of the next entity following the currently parsed entity. + * + *

+ * This is the index of the first byte not belonging to this entity anymore. + *

+ */ + public int next_index = -1; + +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StartXRefParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StartXRefParseResult.java new file mode 100644 index 0000000..801e04b --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StartXRefParseResult.java @@ -0,0 +1,28 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: StartXRefParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + + +/** + * The ParseResult of parsing a startxref entry. + * @author wprinz + */ +public class StartXRefParseResult extends ParseResult { + + public int xref_index; +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StreamParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StreamParseResult.java new file mode 100644 index 0000000..6682d55 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StreamParseResult.java @@ -0,0 +1,33 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: StreamParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + + +/** + * The result of parsing a hex string. + * + * @see at.knowcenter.wag.exactparser.parsing.results.LiteralStringParseResult + * + * @author wprinz + */ +public class StreamParseResult extends ContainerParseResult { + + public DictionaryParseResult stream_dictionary = null; + + public int stream_start_index = -1; +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/TrailerParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/TrailerParseResult.java new file mode 100644 index 0000000..d958cdb --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/TrailerParseResult.java @@ -0,0 +1,76 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: TrailerParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + +/** + * The ParseResult of parsing the trailer. + * + * @author wprinz + */ +public class TrailerParseResult extends ParseResult { + + public int contents_index = -1; + public int contents_end_index = -1; + + public DictionaryParseResult dpr = null; + + public IndirectObjectReferenceParseResult info; + + public IndirectObjectReferenceParseResult root; + + /** + * The content of the "/Size" entry. + */ + public int size; + + /** + * Tells, if this PDF footer has a predecessor (as specified by + * the /Prev entry). + */ + public boolean has_predecessor = false; + + /** + * The index of the predecessor. + * + *

+ * Only valid if has_predecessor is true. + *

+ *

+ * Use getPrev and setPrev to access this member variable. + *

+ * + * @see #getPrev() + * @see #setPrev(int) + */ + private int prev = -1; + + public int getPrev() { + assert has_predecessor; + return prev; + } + + public void setPrev(int prev) { + assert has_predecessor : "Set has_predecessor to true first."; + this.prev = prev; + } + + + + + +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefLineParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefLineParseResult.java new file mode 100644 index 0000000..e04e88d --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefLineParseResult.java @@ -0,0 +1,32 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: XRefLineParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + +/** + * The ParseResult of parsing a single xref line. + * + * @author wprinz + */ +public class XRefLineParseResult extends ParseResult { + + public int object_offset; + + public int generation_number; + + public byte object_usage; +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSectionParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSectionParseResult.java new file mode 100644 index 0000000..8b2858c --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSectionParseResult.java @@ -0,0 +1,58 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: XRefSectionParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + +import java.util.ArrayList; +import java.util.List; + +/** + * The ParseResult of an xref parsing operation. + * + *

+ * This contains one whole xref table section. An xref section starts with the + * word xref and contains one or more xref sub-sections. + *

+ *

+ * Due to Incremental Updates, there may be more than one xref section in a + * document. All xref section together are called the xref table. Using this + * aggregated xref table, an application has the full access to all indirect + * objects in the document. + *

+ *

+ * In many PDF libraries and applications one xref section is also informally + * called xref table. + *

+ * + * @author wprinz + */ +public class XRefSectionParseResult extends ParseResult +{ + + public List xref_subsections = new ArrayList(); + + /** + * Appends another cross-reference (xref) sub-section to the xref table. + * + * @param xref_section + * The xref section to be appended. + */ + public void appendXRefSubSection(XRefSubSectionParseResult xref_section) + { + xref_subsections.add(xref_section); + } +} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSubSectionParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSubSectionParseResult.java new file mode 100644 index 0000000..41982c4 --- /dev/null +++ b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSubSectionParseResult.java @@ -0,0 +1,51 @@ +/** + * Copyright (c) 2006 by Know-Center, Graz, Austria + * + * This software is the confidential and proprietary information of Know-Center, + * Graz, Austria. You shall not disclose such Confidential Information and shall + * use it only in accordance with the terms of the license agreement you entered + * into with Know-Center. + * + * KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF + * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY + * LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS + * DERIVATIVES. + * + * $Id: XRefSubSectionParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ + */ +package at.knowcenter.wag.exactparser.parsing.results; + +import java.util.ArrayList; +import java.util.List; + +/** + * Contains an xref sub-section. + * + *

+ * An xref sub-section is an ordered list of xref lines. The object numbers of the + * corresponding objects are numbered incrementally. + *

+ *

+ * xref sections are important in Incremental Updates because they allow to + * specify explicitely which objects (object numbers) are contained in the xref. + *

+ * + * @author wprinz + */ +public class XRefSubSectionParseResult extends ParseResult { + + public int start_obj_number; + + public int num_objects; + + public List xref_lines = new ArrayList(); + + public void appendXRefLine(XRefLineParseResult xref_line) { + assert xref_lines.size() < num_objects; + + xref_lines.add(xref_line); + } + +} -- cgit v1.2.3