From 535a04fa05f739ec16dd81666e3b0f82dfbd442d Mon Sep 17 00:00:00 2001 From: tknall Date: Wed, 9 Jan 2013 15:41:29 +0000 Subject: pdf-as-lib maven project files moved to pdf-as-lib git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/pdf-as/trunk@926 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../knowcenter/wag/exactparser/ByteArrayUtils.java | 148 --- .../knowcenter/wag/exactparser/ParseDocument.java | 272 ---- .../parsing/IndirectObjectReference.java | 57 - .../wag/exactparser/parsing/PDFNames.java | 184 --- .../wag/exactparser/parsing/PDFUtils.java | 1405 -------------------- .../parsing/results/ArrayParseResult.java | 42 - .../parsing/results/BooleanParseResult.java | 38 - .../parsing/results/ContainerParseResult.java | 45 - .../parsing/results/DictionaryParseResult.java | 41 - .../parsing/results/EOFParseResult.java | 47 - .../parsing/results/FooterParseResult.java | 53 - .../parsing/results/HeaderParseResult.java | 48 - .../parsing/results/HexStringParseResult.java | 36 - .../IndirectObjectReferenceParseResult.java | 44 - .../parsing/results/IntegerParseResult.java | 36 - .../parsing/results/LiteralStringParseResult.java | 37 - .../parsing/results/NameParseResult.java | 35 - .../parsing/results/NullParseResult.java | 34 - .../parsing/results/NumberParseResult.java | 41 - .../parsing/results/ObjectHeaderParseResult.java | 51 - .../parsing/results/ObjectParseResult.java | 50 - .../exactparser/parsing/results/ParseResult.java | 50 - .../parsing/results/StartXRefParseResult.java | 36 - .../parsing/results/StreamParseResult.java | 41 - .../parsing/results/TrailerParseResult.java | 84 -- .../parsing/results/XRefLineParseResult.java | 40 - .../parsing/results/XRefSectionParseResult.java | 66 - .../parsing/results/XRefSubSectionParseResult.java | 59 - 28 files changed, 3120 deletions(-) delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/ByteArrayUtils.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/IndirectObjectReference.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/ArrayParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/BooleanParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/ContainerParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/DictionaryParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/EOFParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/FooterParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/HeaderParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/HexStringParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/IndirectObjectReferenceParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/IntegerParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/LiteralStringParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/NameParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/NullParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/NumberParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectHeaderParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/ParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/StartXRefParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/StreamParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/TrailerParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefLineParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSectionParseResult.java delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSubSectionParseResult.java (limited to 'src/main/java/at/knowcenter/wag/exactparser') diff --git a/src/main/java/at/knowcenter/wag/exactparser/ByteArrayUtils.java b/src/main/java/at/knowcenter/wag/exactparser/ByteArrayUtils.java deleted file mode 100644 index 4442650..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/ByteArrayUtils.java +++ /dev/null @@ -1,148 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: ByteArrayUtils.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser; - -import java.io.UnsupportedEncodingException; - -/** - * Abstract class that contains utility methods for handling byte arrays. - * - * @author wprinz - */ -public abstract class ByteArrayUtils { - - public static final String BYTE_ARRAY_ENCODING = "ISO-8859-1"; - - /** - * Converts the byte array to a String. - * - * @param data - * The byte array. - * @return Returns the String. - * @throws UnsupportedEncodingException - * Forwarded exception - */ - public static String convertByteArrayToString(final byte[] data) throws UnsupportedEncodingException { - return new String(data, BYTE_ARRAY_ENCODING); - } - - /** - * Finds the first occurance of search in data starting to search from the - * given index. - * - * @param data - * The big array. - * @param index - * The index to start searching from. - * @param search - * The sought array. - * @return Returns the index of the found occurence or -1 if nothing was - * found. - */ - public static int indexOf(final byte[] data, final int index, final byte[] search) { - for (int i = index; i <= data.length - search.length; i++) { - if (compareByteArrays(data, i, search)) { - return i; - } - } - return -1; - } - - /** - * Finds the last occurance of the array. - * - * @param data - * The source array to be searched. - * @param search - * The sought array. - * @return Returns the index of the last occurance - or -1 if nothing was - * found. - */ - public static int lastIndexOf(final byte[] data, byte[] search) { - for (int index = data.length - search.length; index >= 0; index--) { - if (compareByteArrays(data, index, search)) { - return index; - } - } - return -1; - } - - /** - * Compares the two byte arrays for equality. - * - * @param data - * The source array. - * @param index - * In index into the source array marking where the comparison should - * start. - * @param search - * The sought array. - * @return Returns true if the first search.length bytes of data+index and - * search match exactly. Returns false otherwise. - */ - public static boolean compareByteArrays(final byte[] data, final int index, byte[] search) { - if (index < 0 || index >= data.length) { - throw new IndexOutOfBoundsException("The index " + index + " is out of bounds"); - } - - if (search.length > data.length) { - return false; - } - - if (search.length > data.length - index) { - return false; - } - - for (int i = 0; i < search.length; i++) { - if (data[index + i] != search[i]) { - return false; - } - } - - return true; - } - - /** - * Checks, if the sought data byte is contained within the byte array. - * - * @param byte_array - * The byte array. - * @param data - * A data byte sought within the byte array. - * @return Returns true, if the data byte was found (at least once) in the - * byte array, false otherwise. - */ - public static boolean contains(final byte[] byte_array, final byte data) { - for (int i = 0; i < byte_array.length; i++) { - byte b = byte_array[i]; - if (b == data) { - return true; - } - } - return false; - } - -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java b/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java deleted file mode 100644 index fbaa4de..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java +++ /dev/null @@ -1,272 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: ParseDocument.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import at.knowcenter.wag.exactparser.parsing.PDFUtils; -import at.knowcenter.wag.exactparser.parsing.results.DictionaryParseResult; -import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult; -import at.knowcenter.wag.exactparser.parsing.results.IndirectObjectReferenceParseResult; -import at.knowcenter.wag.exactparser.parsing.results.NameParseResult; -import at.knowcenter.wag.exactparser.parsing.results.NumberParseResult; -import at.knowcenter.wag.exactparser.parsing.results.ObjectParseResult; -import at.knowcenter.wag.exactparser.parsing.results.StartXRefParseResult; -import at.knowcenter.wag.exactparser.parsing.results.TrailerParseResult; -import at.knowcenter.wag.exactparser.parsing.results.XRefSectionParseResult; - - -/** - * Test class. - * @author wprinz - */ -public class ParseDocument -{ - - public static final String DOCUMENT = "C:/wprinz/temp.pdf"; - - public static final byte[] EGIZ_DICT_NAME = { 'E', 'G', 'I', 'Z', 'S', 'i', - 'g', 'D', 'i', 'c', 't' }; - - public static final byte[] EGIZ_ODS_NAME = { 'O', 'D', 'S' }; - - public static final byte[] EGIZ_XOBJ_NAME = { 'S', 'i', 'g', 'X', 'O', 'b', - 'j', 'e', 'c', 't' }; - - /** - * @param args - */ - public static void main(String[] args) - { - - try - { - File in = new File(DOCUMENT); - FileInputStream fis = new FileInputStream(in); - byte[] pdf = new byte[(int) in.length()]; - fis.read(pdf); - fis.close(); - fis = null; - - List blocks = parseDocument(pdf); - - Iterator it = blocks.iterator(); - while (it.hasNext()) - { - FooterParseResult bpr = (FooterParseResult) it.next(); - - System.out.print("block from " + bpr.start_index + " to " + bpr.next_index); - - if (bpr.tpr.root != null) - { - int root_index = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(bpr.xpr, bpr.tpr.root.ior); - ObjectParseResult root_opr = PDFUtils.parseObject(pdf, root_index); - DictionaryParseResult root_dpr = (DictionaryParseResult) root_opr.object; - - int egiz_index = PDFUtils.indexOfName(pdf, root_dpr.names, EGIZ_DICT_NAME); - if (egiz_index >= 0) - { - System.out.print(" == EGIZDict"); - } - } - - System.out.println(); - } - - } - catch (IOException e) - { - e.printStackTrace(); - } - } - - public static List parseDocument(final byte[] pdf) throws IOException - { - //HeaderParseResult hpr = PDFUtils.parseHeader(pdf, 0); - //System.out.println("PDF-version = " + hpr.major + "." + hpr.minor); - - List blocks = new ArrayList(); - - int last_start_xref = PDFUtils.findLastStartXRef(pdf); - StartXRefParseResult last_sxpr = PDFUtils.parseStartXRef(pdf, last_start_xref); - int xref_index = last_sxpr.xref_index; - - for (;;) - { - FooterParseResult fpr = PDFUtils.parseFooter(pdf, xref_index); - blocks.add(0, fpr); - - //System.out.println("tpr.has_predecessor = " + fpr.tpr.has_predecessor); - if (!fpr.tpr.has_predecessor) - { - // eventually parse the PDF header here. - break; - } - - //System.out.println("tpr.prev = " + fpr.tpr.getPrev()); - - xref_index = fpr.tpr.getPrev(); - } - - return blocks; - } - - // public static void parseEGIZ() - // { - // - // int root_index = - // PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(bpr.xpr, - // bpr.tpr.root.ior); - // ObjectParseResult root_opr = PDFUtils.parseObject(pdf, root_index); - // DictionaryParseResult root_dpr = (DictionaryParseResult) root_opr.object; - // - // int egiz_index = PDFUtils.indexOfName(pdf, root_dpr.names, EGIZ_DICT_NAME); - // if (egiz_index >= 0) - // { - // IndirectObjectReferenceParseResult egiz_iorpr = - // (IndirectObjectReferenceParseResult) root_dpr.values.get(egiz_index); - // System.out.println("EGIZ signature info at = " + egiz_iorpr); - // - // int egiz_dict_index = - // PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(bpr.xpr, - // egiz_iorpr.ior); - // ObjectParseResult opr = PDFUtils.parseObject(pdf, egiz_dict_index); - // DictionaryParseResult egiz_dict = (DictionaryParseResult) opr.object; - // - // for (int i = 0; i < egiz_dict.names.size(); i++) - // { - // NameParseResult npr = egiz_dict.names.get(i); - // int len = npr.next_index - npr.name_start_index; - // byte[] name = new byte[len]; - // System.arraycopy(pdf, npr.name_start_index, name, 0, len); - // System.out.print(" " + new String(name, "US-ASCII") + " = "); - // - // System.out.println(egiz_dict.values.get(i)); - // } - // - // // int key = PDFUtils.indexOfName(pdf, egiz_dict.names, new byte [] { 'K', - // // 'e', 'y'}); - // // IndirectObjectReferenceParseResult key_iorpr = - // // (IndirectObjectReferenceParseResult) egiz_dict.values.get(key); - // // int key_offset = - // // PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(xpr, - // // key_iorpr.ior); - // // ObjectParseResult key_opr = PDFUtils.parseObject(pdf, key_offset); - // // StreamParseResult spr = (StreamParseResult) key_opr.object; - // // System.out.println(" key stream from " + spr.content_start_index + " to - // // " + spr.content_end_index); - // // - // // int data_len = spr.content_end_index - spr.content_start_index; - // // byte [] data = new byte[data_len]; - // // System.arraycopy(pdf, spr.content_start_index, data, 0, data_len); - // // System.out.println(new String(data, "US-ASCII")); - // - // } - // else - // { - // System.out.println("No EGIZ block found."); - // } - // - // } - - public static byte[] getOriginalDocument(final File file_name) throws IOException - { - FileInputStream fis = new FileInputStream(file_name); - byte[] pdf = new byte[(int) file_name.length()]; - fis.read(pdf); - fis.close(); - fis = null; - - int last_start_xref = PDFUtils.findLastStartXRef(pdf); - - StartXRefParseResult sxpr = PDFUtils.parseStartXRef(pdf, last_start_xref); - - XRefSectionParseResult xpr = PDFUtils.parseXRefSection(pdf, sxpr.xref_index); - - TrailerParseResult tpr = PDFUtils.parseTrailer(pdf, xpr.next_index); - - System.out.println("tpr.info = " + tpr.info); - System.out.println("tpr.root = " + tpr.root); - System.out.println("tpr.size = " + tpr.size); - - System.out.println("tpr.has_predecessor = " + tpr.has_predecessor); - if (tpr.has_predecessor) - { - System.out.println("tpr.prev = " + tpr.getPrev()); - } - - int root_index = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(xpr, tpr.root.ior); - ObjectParseResult root_opr = PDFUtils.parseObject(pdf, root_index); - DictionaryParseResult root_dpr = (DictionaryParseResult) root_opr.object; - - byte[] EGIZ_TYPE = new String("EGIZSigDict").getBytes("US-ASCII"); - int egiz_index = PDFUtils.indexOfName(pdf, root_dpr.names, EGIZ_TYPE); - if (egiz_index >= 0) - { - System.out.println("The document is EGIZ-signed. ==> extract original document"); - - IndirectObjectReferenceParseResult egiz_iorpr = (IndirectObjectReferenceParseResult) root_dpr.values.get(egiz_index); - System.out.println("EGIZ signature info at = " + egiz_iorpr); - - int egiz_dict_index = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(xpr, egiz_iorpr.ior); - ObjectParseResult opr = PDFUtils.parseObject(pdf, egiz_dict_index); - DictionaryParseResult egiz_dict = (DictionaryParseResult) opr.object; - - for (int i = 0; i < egiz_dict.names.size(); i++) - { - NameParseResult npr = (NameParseResult) egiz_dict.names.get(i); - int len = npr.next_index - npr.name_start_index; - byte[] name = new byte[len]; - System.arraycopy(pdf, npr.name_start_index, name, 0, len); - System.out.print(" " + new String(name, "US-ASCII") + " = "); - - System.out.println(egiz_dict.values.get(i)); - } - - // Original document size - int key = PDFUtils.indexOfName(pdf, egiz_dict.names, new byte[] { 'O', - 'D', 'S' }); - NumberParseResult ods = (NumberParseResult) egiz_dict.values.get(key); - - int original_document_size = ods.number; - System.out.println("Original Document Size = " + original_document_size); - - byte[] original = new byte[original_document_size]; - System.arraycopy(pdf, 0, original, 0, original_document_size); - - return original; - } - - System.out.println("No EGIZ block found. ==> the whold document is the original document"); - return pdf; - } - -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/IndirectObjectReference.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/IndirectObjectReference.java deleted file mode 100644 index 2bfdf56..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/IndirectObjectReference.java +++ /dev/null @@ -1,57 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: IndirectObjectReference.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing; - -/** - * The IndirectObjectReference class completely holds a so called object - * identifier of an indirect object. - * - *

- * An indirect object is an object not contained within another object. In - * accordance, a direct object is structurally part of another object. For - * example, a direct String object that is the value of some key in a dictionary - * object. - *

- *

- * An object identifier uniquely identifies a specific indirect object by the - * object number and the generation number. In PDF such an object identifier may - * be used to reference to the object. - *

- * - * @author wprinz - */ -public class IndirectObjectReference { - - public int object_number; - - public int generation_number; - - //@Override - public String toString() { - return object_number + " " + generation_number; - } - -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java deleted file mode 100644 index 0ee5863..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java +++ /dev/null @@ -1,184 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: PDFNames.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing; - -/** - * Abstract class that contains several frequently used PDF constants. - * - *

- * The PDF specification partitions the character set (ASCII) into three groups: - *

- * - *

- * Newlines consist per default of CR and LF, but also LF and even CR alone are - * allowed. It seems that all variations of newlines may exist within a single - * document. - *

- * - * @author wprinz - */ -public abstract class PDFNames -{ - - /** - * The standard encoding of PDF tokens and names. - * - *

- * PDF is usually an 8 bit format. Binary data etc. can be saves just as it - * is. Nevertheless all PDF tokens ('xref', 'obj', etc.) and PDF Names - * ('/Size', '/Pages', '/Type', etc.) must be in 7 bit ASCII US encoding. - *

- *

- * Therefor, whenever using Java Strings to convert e.g. numbers to such PDF - * tokens use this encoding constant. - *

- *

- * The same applies for PDF token/name byte arrays that are retransfromed to - * Java Strings. - *

- */ - public static final String PDF_STANDARD_ENCODING = "US-ASCII"; - - // Whitespace characters - - // TABLE 3.1 White-space characters - // DECIMAL HEXADECIMAL OCTAL NAME - // 0 00 000 Null (NUL) - // 9 09 011 Tab (HT) - // 10 0A 012 Line feed (LF) - // 12 0C 014 Form feed (FF) - // 13 0D 015 Carriage return - // 32 20 040 Space (SP) - - public static final byte WHITESPACE_NUL = 0x00; - - public static final byte WHITESPACE_HT = 0x09; - - public static final byte WHITESPACE_LF = 0x0A; - - public static final byte WHITESPACE_FF = 0x0C; - - public static final byte WHITESPACE_CR = 0x0D; - - public static final byte WHITESPACE_SP = 0x20; - - public static final byte[] WHITESPACE_CHARACTERS = { WHITESPACE_NUL, - WHITESPACE_HT, WHITESPACE_LF, WHITESPACE_FF, WHITESPACE_CR, WHITESPACE_SP }; - - // comment character - - public static final byte COMMENT = '%'; - - // PDF-version - - public static final byte[] PDF_VERSION_STR = { 'P', 'D', 'F', '-' }; - - public static final byte PDF_VERSION_SEPARATOR = '.'; - - // delimiter characters - - public static final byte DELIMITER_STRING_OPEN = '('; - - public static final byte DELIMITER_STRING_CLOSE = ')'; - - public static final byte DELIMITER_HEXSTRING_OPEN = '<'; - - public static final byte DELIMITER_HEXSTRING_CLOSE = '>'; - - public static final byte DELIMITER_ARRAY_OPEN = '['; - - public static final byte DELIMITER_ARRAY_CLOSE = ']'; - - public static final byte DELIMITER_CURLY_OPEN = '{'; - - public static final byte DELIMITER_CURLY_CLOSE = '}'; - - public static final byte DELIMITER_NAME = '/'; - - public static final byte[] DELIMITER_CHARACTERS = { DELIMITER_STRING_OPEN, - DELIMITER_STRING_CLOSE, DELIMITER_HEXSTRING_OPEN, - DELIMITER_HEXSTRING_CLOSE, DELIMITER_ARRAY_OPEN, DELIMITER_ARRAY_CLOSE, - DELIMITER_CURLY_OPEN, DELIMITER_CURLY_CLOSE, DELIMITER_NAME }; - - // Footer - - public static final byte[] XREF_STR = { 'x', 'r', 'e', 'f' }; - - public static final byte[] TRAILER_STR = { 't', 'r', 'a', 'i', 'l', 'e', 'r' }; - - public static final byte[] STARTXREF_STR = { 's', 't', 'a', 'r', 't', 'x', - 'r', 'e', 'f' }; - - public static final byte[] EOF_STR = { '%', '%', 'E', 'O', 'F' }; - - // objects - - public static final byte[] OBJ_STR = { 'o', 'b', 'j' }; - - public static final byte[] ENDOBJ_STR = { 'e', 'n', 'd', 'o', 'b', 'j' }; - - public static final byte[] DICT_START_STR = { DELIMITER_HEXSTRING_OPEN, - DELIMITER_HEXSTRING_OPEN }; - - public static final byte[] DICT_END_STR = { DELIMITER_HEXSTRING_CLOSE, - DELIMITER_HEXSTRING_CLOSE }; - - public static final byte[] STREAM_STR = { 's', 't', 'r', 'e', 'a', 'm' }; - - public static final byte[] ENDSTREAM_STR = { 'e', 'n', 'd', 's', 't', 'r', - 'e', 'a', 'm' }; - - public static final byte[] NULL_STR = { 'n', 'u', 'l', 'l' }; - - public static final byte[] TRUE_STR = { 't', 'r', 'u', 'e' }; - - public static final byte[] FALSE_STR = { 'f', 'a', 'l', 's', 'e' }; - - // indirect object references - - public static final byte[] REFERENCE_STR = { 'R' }; - - // Dictionary keys - - public static final byte[] SIZE_STR = { 'S', 'i', 'z', 'e' }; - - public static final byte[] PREV_STR = { 'P', 'r', 'e', 'v' }; - - public static final byte[] ROOT_STR = { 'R', 'o', 'o', 't' }; - - public static final byte[] INFO_STR = { 'I', 'n', 'f', 'o' }; - - public static final byte[] LENGTH_STR = { 'L', 'e', 'n', 'g', 't', 'h' }; - -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java deleted file mode 100644 index de356c9..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java +++ /dev/null @@ -1,1405 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: PDFUtils.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.log4j.Logger; - -import at.knowcenter.wag.exactparser.ByteArrayUtils; -import at.knowcenter.wag.exactparser.parsing.results.ArrayParseResult; -import at.knowcenter.wag.exactparser.parsing.results.BooleanParseResult; -import at.knowcenter.wag.exactparser.parsing.results.DictionaryParseResult; -import at.knowcenter.wag.exactparser.parsing.results.EOFParseResult; -import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult; -import at.knowcenter.wag.exactparser.parsing.results.HeaderParseResult; -import at.knowcenter.wag.exactparser.parsing.results.HexStringParseResult; -import at.knowcenter.wag.exactparser.parsing.results.IndirectObjectReferenceParseResult; -import at.knowcenter.wag.exactparser.parsing.results.IntegerParseResult; -import at.knowcenter.wag.exactparser.parsing.results.LiteralStringParseResult; -import at.knowcenter.wag.exactparser.parsing.results.NameParseResult; -import at.knowcenter.wag.exactparser.parsing.results.NullParseResult; -import at.knowcenter.wag.exactparser.parsing.results.NumberParseResult; -import at.knowcenter.wag.exactparser.parsing.results.ObjectHeaderParseResult; -import at.knowcenter.wag.exactparser.parsing.results.ObjectParseResult; -import at.knowcenter.wag.exactparser.parsing.results.ParseResult; -import at.knowcenter.wag.exactparser.parsing.results.StartXRefParseResult; -import at.knowcenter.wag.exactparser.parsing.results.StreamParseResult; -import at.knowcenter.wag.exactparser.parsing.results.TrailerParseResult; -import at.knowcenter.wag.exactparser.parsing.results.XRefLineParseResult; -import at.knowcenter.wag.exactparser.parsing.results.XRefSectionParseResult; -import at.knowcenter.wag.exactparser.parsing.results.XRefSubSectionParseResult; - - - -/** - * Abstract class that contains several static utility methods for parsing and - * analyzing PDF documents on the lowest level. - * - *

- * Most operations require random access to the PDF data (mostly to verify the - * synthax). So the whole PDF document has to be provided as a byte array. The - * term "pdf+index" states a specific position index within this byte array. - *

- * - * @author wprinz - * - */ -public abstract class PDFUtils -{ - private static Logger log = Logger.getLogger(PDFUtils.class); - - public static boolean isWhitespace(final byte data) - { - return ByteArrayUtils.contains(PDFNames.WHITESPACE_CHARACTERS, data); - } - - public static boolean isDelimiter(final byte data) - { - return ByteArrayUtils.contains(PDFNames.DELIMITER_CHARACTERS, data); - } - - protected static boolean isRegular(final byte data) - { - return !(isWhitespace(data) || isDelimiter(data)); - } - - /** - * Skips whitespace. - * - *

- * Skips all whitespace, which may be none, one or multiple whitespace - * characters. - *

- *

- * Note that this also skips newline characters (which belong to whitespace as - * well). - *

- * - * @param data - * The PDF data. - * @param index - * The index. - * @return Returns the index of the first non whitespace character. This may - * be equal to index if no whitespaces were skipped at all. - */ - public static int skipWhitespace(final byte[] data, final int index) - { - int non_whitespace_index = index; - while (isWhitespace(data[non_whitespace_index])) - { - non_whitespace_index++; - } - return non_whitespace_index; - } - - /** - * Skips bytes until whitespace is reached. - * - *

- * Skips all non whitespace characters, which may be none at all. - *

- * - * @param data - * The PDF data. - * @param index - * The index. - * @return Returns the index of the first whitespace character. This may be - * equal to index if no non whitespaces were skipped at all. - */ - public static int skipToWhitespace(final byte[] data, final int index) - { - int whitespace_index = index; - while (!isWhitespace(data[whitespace_index])) - { - whitespace_index++; - } - return whitespace_index; - } - - protected static final byte[] LINE_TERMINATOR_CRLF = { - PDFNames.WHITESPACE_CR, PDFNames.WHITESPACE_LF }; - - protected static final byte[] LINE_TERMINATOR_CRALONE = { PDFNames.WHITESPACE_CR }; - - protected static final byte[] LINE_TERMINATOR_LF = { PDFNames.WHITESPACE_LF }; - - public static boolean isNewline(final byte[] data, final int index) - { - if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_LF)) - { - return true; - } - if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRLF)) - { - return true; - } - // although not specified by PDF, some applications use the CR alone as line - // terminator - if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRALONE)) - { - return true; - } - return false; - } - - public static int skipNewline(final byte[] data, final int index) - { - if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_LF)) - { - return index + LINE_TERMINATOR_LF.length; - } - if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRLF)) - { - return index + LINE_TERMINATOR_CRLF.length; - } - // although not specified by PDF, some applications use the CR alone as line - // terminator - if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRALONE)) - { - return index + LINE_TERMINATOR_CRALONE.length; - } - - assert false : "don't call this if you don't expect a newline - call skipWhitespace instead"; - return index; - } - - public static int skipToNewline(final byte[] data, final int index) - { - int current_index = index; - for (;;) - { - if (ByteArrayUtils.compareByteArrays(data, current_index, LINE_TERMINATOR_LF)) - { - return current_index + LINE_TERMINATOR_LF.length; - } - if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRLF)) - { - return index + LINE_TERMINATOR_CRLF.length; - } - // although not specified by PDF, some applications use the CR alone as - // line terminator - if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRALONE)) - { - return index + LINE_TERMINATOR_CRALONE.length; - } - current_index++; - } - } - - /** - * Parses a boolean value. - * - * @param pdf - * The PDF data. - * @param index - * The index. - * @return Returns the result of the parsing operation. - */ - public static BooleanParseResult parseBoolean(final byte[] pdf, - final int index) - { - BooleanParseResult bpr = new BooleanParseResult(); - bpr.start_index = index; - - if (ByteArrayUtils.compareByteArrays(pdf, bpr.start_index, PDFNames.TRUE_STR)) - { - bpr.value = true; - bpr.next_index = bpr.start_index + PDFNames.TRUE_STR.length; - - return bpr; - } - if (ByteArrayUtils.compareByteArrays(pdf, bpr.start_index, PDFNames.FALSE_STR)) - { - bpr.value = false; - bpr.next_index = bpr.start_index + PDFNames.FALSE_STR.length; - - return bpr; - } - - throw new RuntimeException("Boolean couldn't be parsed at index " + index); - } - - public static boolean isSign(final byte data) - { - return data == '+' || data == '-'; - } - - public static boolean isNumeric(final byte data) - { - return '0' <= data && data <= '9'; - } - - /** - * Reads the (positive integer) number from the data. The number must be - * terminated by the end of line. - * - * @param data - * The data. - * @param index - * The index. - * @return Returns the read number. - */ - public static int readNumberFromByteArray(final byte[] data, final int index) - { - NumberParseResult npr = parseNumberFromByteArray(data, index); - - assert npr.number >= 0; - return npr.number; - } - - /** - * Parses an unsigned integer. - * - *

- * The integer must be a block of successive number characters. It must not be - * preceded by a sign (not even '+'). - *

- * - * @param pdf - * The PDF data. - * @param index - * The index. - * @return Returns the result of the parsing operation. - */ - public static IntegerParseResult parseUnsignedInteger(final byte[] pdf, - final int index) - { - assert isNumeric(pdf[index]); - - String number = ""; - - int cur_index = index; - while (isNumeric(pdf[cur_index])) - { - - number += (char) pdf[cur_index]; - - cur_index++; - } - - // TODO: make better - int int_value = Integer.parseInt(number); - - assert int_value >= 0; - - IntegerParseResult ipr = new IntegerParseResult(); - ipr.start_index = index; - ipr.next_index = cur_index; - ipr.number = int_value; - return ipr; - } - - /** - * Parses a (potentially) signed integer. - * - *

- * The integer must be a block of successive number characters. It may be - * preceded by a sign character ('+' or '-'). - *

- * - * @param pdf - * The PDF data. - * @param index - * The index. - * @return Returns the result of the parsing operation. - */ - public static IntegerParseResult parseInteger(final byte[] pdf, - final int index) - { - assert isSign(pdf[index]) || isNumeric(pdf[index]); - - int sign = +1; - int number_start = index; - if (pdf[index] == '+') - { - sign = +1; - number_start++; - } - else - { - if (pdf[index] == '-') - { - sign = -1; - number_start++; - } - else - { - assert isNumeric(pdf[index]); - } - } - - IntegerParseResult ipr = parseUnsignedInteger(pdf, number_start); - ipr.start_index = index; - ipr.number *= sign; - return ipr; - } - - /** - * Parses an arbitrary number; - * - * @param pdf - * The PDF data. - * @param index - * The index. - * @return Returns the result of the parsing operation. - */ - public static NumberParseResult parseNumberFromByteArray(final byte[] pdf, - int index) - { - String number = ""; - - assert isSign(pdf[index]) || isNumeric(pdf[index]); - - int sign = +1; - if (pdf[index] == '+') - { - sign = +1; - index++; - } - else - { - if (pdf[index] == '-') - { - sign = -1; - index++; - } - else - { - assert isNumeric(pdf[index]); - } - } - - while (isNumeric(pdf[index]) || pdf[index] == '.') - { - - char digit = (char) pdf[index]; - number += digit; - - index++; - } - - NumberParseResult npr = new NumberParseResult(); - npr.next_index = index; - // TODO: make better - try - { - npr.number = Integer.parseInt(number) * sign; - } - catch (NumberFormatException e) - { - npr.floating = Float.parseFloat(number) * sign; - } - - return npr; - } - - /** - * Searches the last occurrence of the "startxref" entry ... in other words - * starts the search from the end of the document and works reversely. - * - * @param pdf - * The complete PDF file data. - * @return Returns the offset (byte index) of the "startxref" entry. - */ - public static int findLastStartXRef(final byte[] pdf) - { - return ByteArrayUtils.lastIndexOf(pdf, PDFNames.STARTXREF_STR); - } - - /** - * Parses the xref section at pdf+index. - * - *

- * An xref section starts with 'xref' and contains one or more xref - * sub-sections. - *

- * - * @param pdf - * The PDF data. - * @param index - * The start index of the xref table. - * @return Returns the result of the parsing operation. - */ - public static XRefSectionParseResult parseXRefSection(final byte[] pdf, - final int index) - { - at.knowcenter.wag.exactparser.parsing.results.XRefSectionParseResult xpr = new XRefSectionParseResult(); - xpr.start_index = index; - - assert ByteArrayUtils.compareByteArrays(pdf, xpr.start_index, PDFNames.XREF_STR); - assert isNewline(pdf, xpr.start_index + PDFNames.XREF_STR.length); - - int cur_index = skipWhitespace(pdf, xpr.start_index + PDFNames.XREF_STR.length); - // skipNewline(pdf, xpr.start_index + PDFNames.XREF_STR.length); - - for (;;) - { - // trailer ends the xref section. - if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.TRAILER_STR)) - { - break; - } - - // no trailer ==> another xref section - - XRefSubSectionParseResult sspr = parseXRefSubSection(pdf, cur_index); - xpr.appendXRefSubSection(sspr); - - cur_index = sspr.next_index; - } - - xpr.next_index = cur_index; - assert ByteArrayUtils.compareByteArrays(pdf, xpr.next_index, PDFNames.TRAILER_STR); - - return xpr; - } - - /** - * Parses a xref sub-section. - * - * @param pdf - * The PDF data. - * @param index - * The index. - * @return Returns the result of the parsing operation. - */ - public static XRefSubSectionParseResult parseXRefSubSection(final byte[] pdf, - final int index) - { - XRefSubSectionParseResult sspr = new XRefSubSectionParseResult(); - sspr.start_index = index; - - NumberParseResult start_obj_num_npr = parseNumberFromByteArray(pdf, sspr.start_index); - sspr.start_obj_number = start_obj_num_npr.number; - assert sspr.start_obj_number >= 0; - - assert isWhitespace(pdf[start_obj_num_npr.next_index]); - int num_obj_index = skipWhitespace(pdf, start_obj_num_npr.next_index); - - NumberParseResult num_obj_npr = parseNumberFromByteArray(pdf, num_obj_index); - sspr.num_objects = num_obj_npr.number; - - // assert isNewline(pdf, num_obj_npr.next_index); - assert isWhitespace(pdf[num_obj_npr.next_index]); - int start_of_line = skipWhitespace(pdf, num_obj_npr.next_index); - // skipNewline(pdf, num_obj_npr.next_index); - - for (int i = 0; i < sspr.num_objects; i++) - { - final int cur_object_number = sspr.start_obj_number + i; - - XRefLineParseResult lpr = parseXrefLine(pdf, start_of_line); - sspr.appendXRefLine(lpr); - - // System.out.println("xref line of object " + (oc.start_obj_number + i) + - // " at " + lpr.start_index + ": " + lpr.object_offset + " " + - // lpr.generation_number + " " + (char) lpr.object_usage); - - if (lpr.object_usage == 'n') - { - // check the line - this simple check may make problems with object - // streams and xref streams - ObjectHeaderParseResult ohpr = parseObjectHeader(pdf, lpr.object_offset); - assert ohpr.object_number == cur_object_number; - assert ohpr.generation_number == lpr.generation_number; - } - - start_of_line = lpr.next_index; - } - - sspr.next_index = start_of_line; - return sspr; - } - - /** - * Parses a single 20 bytes xref line at pdf+index. - * - * @param pdf - * The PDF data. - * @param index - * The index. - * @return Returns the result of the parsing operation. - */ - public static XRefLineParseResult parseXrefLine(final byte[] pdf, - final int index) - { - XRefLineParseResult lpr = new XRefLineParseResult(); - - lpr.start_index = index; - - IntegerParseResult object_offset_ipr = parseUnsignedInteger(pdf, lpr.start_index); - lpr.object_offset = object_offset_ipr.number; - assert lpr.object_offset >= 0; - assert lpr.object_offset < pdf.length; - assert object_offset_ipr.next_index == lpr.start_index + 10; - - assert pdf[object_offset_ipr.next_index] == PDFNames.WHITESPACE_SP; // Standard - // explicitely - // says 1 - // single - // SPACE - int generation_number_index = object_offset_ipr.next_index + 1; - - IntegerParseResult generation_number_ipr = parseUnsignedInteger(pdf, generation_number_index); - lpr.generation_number = generation_number_ipr.number; - assert generation_number_ipr.next_index == lpr.start_index + 16; - - assert pdf[generation_number_ipr.next_index] == PDFNames.WHITESPACE_SP; - int usage_index = generation_number_ipr.next_index + 1; - - lpr.object_usage = pdf[usage_index]; - assert lpr.object_usage == 'n' || lpr.object_usage == 'f'; - - if (pdf[usage_index + 1] == PDFNames.WHITESPACE_SP) - { - assert pdf[usage_index + 2] == PDFNames.WHITESPACE_CR || pdf[usage_index + 2] == PDFNames.WHITESPACE_LF; - } - else - { - assert pdf[usage_index + 1] == PDFNames.WHITESPACE_CR; - assert pdf[usage_index + 2] == PDFNames.WHITESPACE_LF; - } - - lpr.next_index = usage_index + 3; - - assert lpr.next_index == lpr.start_index + 20; - - return lpr; - } - - public static int indexOfName(final byte[] pdf, List names, - byte[] sought) - { - for (int i = 0; i < names.size(); i++) - { - NameParseResult name = (NameParseResult) names.get(i); - if (ByteArrayUtils.compareByteArrays(pdf, name.name_start_index, sought)) - { - return i; - } - } - return -1; - } - - public static TrailerParseResult parseTrailer(final byte[] pdf, - final int index) - { - TrailerParseResult tpr = new TrailerParseResult(); - tpr.start_index = index; - tpr.has_predecessor = false; - - assert ByteArrayUtils.compareByteArrays(pdf, tpr.start_index, PDFNames.TRAILER_STR); - - // assert isWhitespace(pdf[tpr.start_index + PDFNames.TRAILER_STR.length]); - tpr.contents_index = skipWhitespace(pdf, tpr.start_index + PDFNames.TRAILER_STR.length); - - int trailer_dict_index = skipWhitespace(pdf, tpr.contents_index); - - assert ByteArrayUtils.compareByteArrays(pdf, trailer_dict_index, PDFNames.DICT_START_STR); - - tpr.dpr = parseDictionary(pdf, trailer_dict_index); - - int cur_index = tpr.dpr.next_index; - - int info_index = indexOfName(pdf, tpr.dpr.names, PDFNames.INFO_STR); - if (info_index >= 0) - { - tpr.info = (IndirectObjectReferenceParseResult) tpr.dpr.values.get(info_index); - } - - int root_index = indexOfName(pdf, tpr.dpr.names, PDFNames.ROOT_STR); - if (root_index >= 0) - { - tpr.root = (IndirectObjectReferenceParseResult) tpr.dpr.values.get(root_index); - } - - tpr.size = ((NumberParseResult) tpr.dpr.values.get(indexOfName(pdf, tpr.dpr.names, PDFNames.SIZE_STR))).number; - - int prev_index = indexOfName(pdf, tpr.dpr.names, PDFNames.PREV_STR); - if (prev_index >= 0) - { - tpr.has_predecessor = true; - tpr.setPrev(((NumberParseResult) tpr.dpr.values.get(prev_index)).number); - } - - // - // int cur_index = skipWhitespace(pdf, trailer_dict_index + - // PDFNames.DICT_START_STR.length); - // for (;;) { - // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, - // PDFNames.DICT_END_STR)) { - // cur_index += PDFNames.DICT_END_STR.length; - // break; - // } - // - // assert pdf[cur_index] == PDFNames.DELIMITER_NAME; - // cur_index++; - // - // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.INFO_STR)) - // { - // assert isWhitespace(pdf[cur_index + PDFNames.INFO_STR.length]); - // int ir_index = skipWhitespace(pdf, cur_index + PDFNames.INFO_STR.length); - // - // IndirectObjectReferenceParseResult iorpr = - // parseIndirectObjectReference(pdf, ir_index); - // tpr.info = iorpr; - // - // cur_index = skipWhitespace(pdf, iorpr.next_index); - // continue; - // } - // - // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.ROOT_STR)) - // { - // assert isWhitespace(pdf[cur_index + PDFNames.ROOT_STR.length]); - // int ir_index = skipWhitespace(pdf, cur_index + PDFNames.ROOT_STR.length); - // - // IndirectObjectReferenceParseResult iorpr = - // parseIndirectObjectReference(pdf, ir_index); - // tpr.root = iorpr; - // - // cur_index = skipWhitespace(pdf, iorpr.next_index); - // continue; - // } - // - // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.SIZE_STR)) - // { - // assert isWhitespace(pdf[cur_index + PDFNames.SIZE_STR.length]); - // int size_index = skipWhitespace(pdf, cur_index + - // PDFNames.SIZE_STR.length); - // - // NumberParseResult npr = parseNumberFromByteArray(pdf, size_index); - // tpr.size = npr.number; - // assert tpr.size > 0; - // - // cur_index = skipWhitespace(pdf, npr.next_index); - // continue; - // } - // - // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.PREV_STR)) - // { - // assert isWhitespace(pdf[cur_index + PDFNames.PREV_STR.length]); - // int prev_index = skipWhitespace(pdf, cur_index + - // PDFNames.PREV_STR.length); - // - // NumberParseResult npr = parseNumberFromByteArray(pdf, prev_index); - // tpr.has_predecessor = true; - // tpr.setPrev(npr.number); - // assert tpr.getPrev() >= 0; - // assert tpr.getPrev() < pdf.length; - // - // assert ByteArrayUtils.compareByteArrays(pdf, tpr.getPrev(), - // PDFNames.XREF_STR); - // - // cur_index = skipWhitespace(pdf, npr.next_index); - // continue; - // } - // - // // unrecognized type - // // skip to next delimiter - // // TODO: this will not work with nested dicts. - already deprecated - // while (pdf[cur_index] != PDFNames.DELIMITER_NAME) { - // cur_index++; - // } - // } - - tpr.contents_end_index = cur_index; - tpr.next_index = skipWhitespace(pdf, tpr.contents_end_index); - - assert ByteArrayUtils.compareByteArrays(pdf, tpr.next_index, PDFNames.STARTXREF_STR); - return tpr; - } - - /** - * Parses the startxref section at pdf+index. - * - * @param pdf - * The complete PDF file data. - * @param index - * The index of the startxref section. - * @return Returns the retsult of the parsing operation. - */ - public static StartXRefParseResult parseStartXRef(final byte[] pdf, - final int index) - { - StartXRefParseResult spr = new StartXRefParseResult(); - spr.next_index = index; - - assert ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.STARTXREF_STR); - assert isNewline(pdf, index + PDFNames.STARTXREF_STR.length); - - int index_of_number = skipWhitespace(pdf, index + PDFNames.STARTXREF_STR.length); - // skipNewline(pdf, index + PDFNames.STARTXREF_STR.length); - NumberParseResult npr = parseNumberFromByteArray(pdf, index_of_number); - spr.xref_index = npr.number; - - assert isNewline(pdf, npr.next_index); - spr.next_index = skipWhitespace(pdf, npr.next_index); - // skipNewline(pdf, npr.next_index); - - assert ByteArrayUtils.compareByteArrays(pdf, spr.next_index, PDFNames.EOF_STR); - - assert spr.xref_index >= 0; - assert spr.xref_index < pdf.length; - - // A linearized document sets the startxref value of the first page's footer - // to 0. - if (spr.xref_index != 0) - { - assert ByteArrayUtils.compareByteArrays(pdf, spr.xref_index, PDFNames.XREF_STR); - } - - return spr; - } - - /** - * Parses the End Of File (EOF) marker at pdf+index. - * - * @param pdf - * The PDF data. - * @param index - * The index where to start the parsing. - * @return Returns the result of the parsing operation. - */ - public static EOFParseResult parseEOF(final byte[] pdf, final int index) - { - EOFParseResult eofpr = new EOFParseResult(); - eofpr.start_index = index; - - assert ByteArrayUtils.compareByteArrays(pdf, eofpr.start_index, PDFNames.EOF_STR); - - eofpr.eof_end_index = eofpr.start_index + PDFNames.EOF_STR.length; - - // Note: The EOF marker is not necessarily terminated with a - // newline. - - // perhaps explicitely determine a newline. - - eofpr.next_index = eofpr.eof_end_index; - - return eofpr; - } - - public static boolean isIndirectObjectReference(final byte[] pdf, - final int index) - { - IndirectObjectReferenceParseResult iorpr = new IndirectObjectReferenceParseResult(); - iorpr.ior = new IndirectObjectReference(); - iorpr.start_index = index; - - if (!PDFUtils.isNumeric(pdf[iorpr.start_index])) - { - return false; - } - NumberParseResult object_number_npr = parseNumberFromByteArray(pdf, iorpr.start_index); - iorpr.ior.object_number = object_number_npr.number; - if (iorpr.ior.object_number <= 0) - { - return false; - } - - if (!isWhitespace(pdf[object_number_npr.next_index])) - { - return false; - } - int generation_number_index = skipWhitespace(pdf, object_number_npr.next_index); - - if (!PDFUtils.isNumeric(pdf[generation_number_index])) - { - return false; - } - NumberParseResult generation_number_npr = parseNumberFromByteArray(pdf, generation_number_index); - iorpr.ior.generation_number = generation_number_npr.number; - if (iorpr.ior.generation_number < 0) - { - return false; - } - - if (!isWhitespace(pdf[generation_number_npr.next_index])) - { - return false; - } - int R_index = skipWhitespace(pdf, generation_number_npr.next_index); - - if (!ByteArrayUtils.compareByteArrays(pdf, R_index, PDFNames.REFERENCE_STR)) - { - return false; - } - - iorpr.next_index = R_index + PDFNames.REFERENCE_STR.length; - - return true; - } - - /** - * Parses an indirect object reference. - * - * @param pdf - * The PDF data. - * @param index - * The index. - * @return Returns the result of the parsing operation. - */ - public static IndirectObjectReferenceParseResult parseIndirectObjectReference( - final byte[] pdf, final int index) - { - - assert isIndirectObjectReference(pdf, index); - - IndirectObjectReferenceParseResult iorpr = new IndirectObjectReferenceParseResult(); - iorpr.ior = new IndirectObjectReference(); - iorpr.start_index = index; - - NumberParseResult object_number_npr = parseNumberFromByteArray(pdf, iorpr.start_index); - iorpr.ior.object_number = object_number_npr.number; - assert iorpr.ior.object_number > 0; - - assert isWhitespace(pdf[object_number_npr.next_index]); - int generation_number_index = skipWhitespace(pdf, object_number_npr.next_index); - - NumberParseResult generation_number_npr = parseNumberFromByteArray(pdf, generation_number_index); - iorpr.ior.generation_number = generation_number_npr.number; - assert iorpr.ior.generation_number >= 0; - - assert isWhitespace(pdf[generation_number_npr.next_index]); - int R_index = skipWhitespace(pdf, generation_number_npr.next_index); - - assert ByteArrayUtils.compareByteArrays(pdf, R_index, PDFNames.REFERENCE_STR); - - iorpr.next_index = R_index + PDFNames.REFERENCE_STR.length; - - return iorpr; - } - - /** - * Parses the object header at pdf+index. - * - * @param pdf - * The PDF data. - * @param index - * The index. - * @return Returns the result of the parsing operation. - */ - public static ObjectHeaderParseResult parseObjectHeader(final byte[] pdf, - final int index) - { - ObjectHeaderParseResult ohpr = new ObjectHeaderParseResult(); - - ohpr.start_index = index; - - NumberParseResult object_number_npr = parseNumberFromByteArray(pdf, ohpr.start_index); - ohpr.object_number = object_number_npr.number; - assert ohpr.object_number > 0; - - assert isWhitespace(pdf[object_number_npr.next_index]); - int generation_number_index = skipWhitespace(pdf, object_number_npr.next_index); - - NumberParseResult generation_number_npr = parseNumberFromByteArray(pdf, generation_number_index); - ohpr.generation_number = generation_number_npr.number; - assert ohpr.generation_number >= 0; - - assert isWhitespace(pdf[generation_number_npr.next_index]); - int obj_index = skipWhitespace(pdf, generation_number_npr.next_index); - - assert ByteArrayUtils.compareByteArrays(pdf, obj_index, PDFNames.OBJ_STR); - - // not all pdfwriters make a newline after obj... - // assert isNewline(pdf, obj_index + PDFNames.OBJ_STR.length); - // ohpr.next_index = skipNewline(pdf, obj_index + PDFNames.OBJ_STR.length); - ohpr.next_index = skipWhitespace(pdf, obj_index + PDFNames.OBJ_STR.length); - - return ohpr; - } - - public static ObjectParseResult parseObject(final byte[] pdf, final int index) - { - ObjectParseResult opr = new ObjectParseResult(); - opr.start_index = index; - - opr.header = parseObjectHeader(pdf, opr.start_index); - opr.content_index = opr.header.next_index; - - int cur_index = skipWhitespace(pdf, opr.content_index); - - opr.object = parseUnknownObject(pdf, cur_index); - - cur_index = skipWhitespace(pdf, opr.object.next_index); - - opr.end_of_content_index = cur_index; - assert ByteArrayUtils.compareByteArrays(pdf, opr.end_of_content_index, PDFNames.ENDOBJ_STR); - - cur_index = opr.end_of_content_index + PDFNames.ENDOBJ_STR.length; - - opr.next_index = cur_index; - //assert isNewline(pdf, cur_index); - //opr.next_index = skipNewline(pdf, cur_index); - - return opr; - } - - public static ParseResult parseUnknownObject(final byte[] pdf, final int index) - { - if (ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.DICT_START_STR)) - { - DictionaryParseResult dpr = parseDictionary(pdf, index); - - int possible_stream_index = skipWhitespace(pdf, dpr.next_index); - if (ByteArrayUtils.compareByteArrays(pdf, possible_stream_index, PDFNames.STREAM_STR)) - { - return parseStream(pdf, possible_stream_index, dpr); - } - - return dpr; - } - - if (ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.NULL_STR)) - { - return parseNull(pdf, index); - } - - if (ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.TRUE_STR) || ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.FALSE_STR)) - { - return parseBoolean(pdf, index); - } - - final byte first_byte = pdf[index]; - - if (isNumeric(first_byte) || isSign(first_byte)) - { - - // try to parse a Indirect reference first - if this fails, parse a number - if (isIndirectObjectReference(pdf, index)) - { - return parseIndirectObjectReference(pdf, index); - } - - return parseNumberFromByteArray(pdf, index); - } - - ParseResult pr = null; - - switch (first_byte) - { - case PDFNames.DELIMITER_STRING_OPEN: - pr = parseLiteralString(pdf, index); - break; - case PDFNames.DELIMITER_HEXSTRING_OPEN: - pr = parseHexString(pdf, index); - break; - case PDFNames.DELIMITER_ARRAY_OPEN: - pr = parseArray(pdf, index); - break; - case PDFNames.DELIMITER_NAME: - pr = parseName(pdf, index); - break; - default: - throw new RuntimeException("Unknown first_byte " + first_byte + "' when parsing an unknown object at index=" + index + "."); - // assert false : "nyi or invalid char"; - } - assert pr != null; - - return pr; - } - - /** - * Parses a literal string. - * - *

- * A literal string is a string of ASCII characters enclosed by '(' and ')'. - * Balanced pairs of '(' and ')' are allowed within the string. Unbalanced '(' - * or ')' must be escaped as '\(' or '\)'. - *

- * - * @param pdf - * The PDF data. - * @param index - * The index. - * @return Returns the result of the parsing operation. - */ - public static LiteralStringParseResult parseLiteralString(final byte[] pdf, - final int index) - { - LiteralStringParseResult lspr = new LiteralStringParseResult(); - lspr.start_index = index; - - assert pdf[lspr.start_index] == PDFNames.DELIMITER_STRING_OPEN; - - lspr.content_start_index = lspr.start_index + 1; - - int cur_index = lspr.content_start_index; - int parenthesis_stack = 0; - for (;;) - { - if (pdf[cur_index] == '\\' && (pdf[cur_index + 1] == PDFNames.DELIMITER_STRING_CLOSE || pdf[cur_index + 1] == PDFNames.DELIMITER_STRING_OPEN)) - { - cur_index += 2; - continue; - } - if (pdf[cur_index] == PDFNames.DELIMITER_STRING_OPEN) - { - parenthesis_stack++; - } - if (pdf[cur_index] == PDFNames.DELIMITER_STRING_CLOSE) - { - assert parenthesis_stack >= 0; - - if (parenthesis_stack == 0) - { - break; - } - - assert parenthesis_stack > 0; - parenthesis_stack--; - - } - - cur_index++; - } - - lspr.content_end_index = cur_index; - assert pdf[lspr.content_end_index] == PDFNames.DELIMITER_STRING_CLOSE; - - lspr.next_index = lspr.content_end_index + 1; - - return lspr; - } - - protected static boolean isHex(final byte data) - { - return isNumeric(data) || ('a' <= data && data <= 'f') || ('A' <= data && data <= 'f'); - } - - /** - * Parses a hexadecimal string. - * - * @param pdf - * The PDF data. - * @param index - * The index. - * @return Returns the result of the parsing operation. - */ - public static HexStringParseResult parseHexString(final byte[] pdf, - final int index) - { - HexStringParseResult hspr = new HexStringParseResult(); - hspr.start_index = index; - - assert pdf[hspr.start_index] == PDFNames.DELIMITER_HEXSTRING_OPEN; - - hspr.content_start_index = hspr.start_index + 1; - - int cur_index = hspr.content_start_index; - while (isHex(pdf[cur_index]) || isWhitespace(pdf[cur_index])) - { - cur_index++; - } - - hspr.content_end_index = cur_index; - assert pdf[hspr.content_end_index] == PDFNames.DELIMITER_HEXSTRING_CLOSE; - - hspr.next_index = hspr.content_end_index + 1; - - return hspr; - } - - public static ArrayParseResult parseArray(final byte[] pdf, final int index) - { - ArrayParseResult apr = new ArrayParseResult(); - apr.start_index = index; - assert pdf[apr.start_index] == PDFNames.DELIMITER_ARRAY_OPEN; - - apr.content_start_index = apr.start_index + 1; - - apr.elements = new ArrayList(); - - int cur_index = skipWhitespace(pdf, apr.content_start_index); - for (;;) - { - if (pdf[cur_index] == PDFNames.DELIMITER_ARRAY_CLOSE) - { - break; - } - - ParseResult pr = parseUnknownObject(pdf, cur_index); - apr.elements.add(pr); - - cur_index = skipWhitespace(pdf, pr.next_index); - } - assert pdf[cur_index] == PDFNames.DELIMITER_ARRAY_CLOSE; - - apr.content_end_index = cur_index; - assert pdf[apr.content_end_index] == PDFNames.DELIMITER_ARRAY_CLOSE; - - apr.next_index = apr.content_end_index + 1; - return apr; - } - - /** - * Parses a PDF Name. - * - * @param pdf - * The PDF data. - * @param index - * The index. - * @return Returns the result of this parsing operation. - */ - public static NameParseResult parseName(final byte[] pdf, final int index) - { - NameParseResult npr = new NameParseResult(); - npr.start_index = index; - - assert pdf[npr.start_index] == PDFNames.DELIMITER_NAME; - - npr.name_start_index = npr.start_index + 1; - - assert isRegular(pdf[npr.name_start_index]); - - int cur_index = npr.name_start_index; - while (isRegular(pdf[cur_index])) - { - cur_index++; - } - assert !isRegular(pdf[cur_index]); - - npr.next_index = cur_index; - - return npr; - } - - public static DictionaryParseResult parseDictionary(final byte[] pdf, - final int index) - { - DictionaryParseResult dpr = new DictionaryParseResult(); - dpr.start_index = index; - - assert ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.DICT_START_STR); - - dpr.content_start_index = dpr.start_index + PDFNames.DICT_START_STR.length; - - dpr.names = new ArrayList(); - dpr.values = new ArrayList(); - - int cur_index = skipWhitespace(pdf, dpr.content_start_index); - for (;;) - { - if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.DICT_END_STR)) - { - break; - } - - NameParseResult npr = parseName(pdf, cur_index); - dpr.names.add(npr); - - cur_index = npr.next_index; - cur_index = skipWhitespace(pdf, cur_index); - - ParseResult pr = parseUnknownObject(pdf, cur_index); - dpr.values.add(pr); - - cur_index = pr.next_index; - cur_index = skipWhitespace(pdf, cur_index); - } - - dpr.content_end_index = cur_index; - assert ByteArrayUtils.compareByteArrays(pdf, dpr.content_end_index, PDFNames.DICT_END_STR); - dpr.next_index = dpr.content_end_index + PDFNames.DICT_END_STR.length; - - return dpr; - } - - /** - * Parses a stream. - * - * @param pdf - * The PDF data. - * @param index - * The index. - * @param dpr - * The DictionaryParseResult of the stream's dictionary. This - * dictionary must precede the stream keyword. Usually this is - * provided in the stream object's dictionary via the /Length field. - * @return Returns the result of this parsing operation. - */ - public static StreamParseResult parseStream(final byte[] pdf, - final int index, final DictionaryParseResult dpr) - { - StreamParseResult spr = new StreamParseResult(); - spr.stream_dictionary = dpr; - spr.start_index = spr.stream_dictionary.start_index; - spr.stream_start_index = index; - assert ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.STREAM_STR); - - // assert that the provided dictionary really belongs to this stream - assert spr.stream_start_index == skipWhitespace(pdf, spr.stream_dictionary.next_index); - - // see PDF Spec 1.4 chapter 3.2.7 - assert pdf[spr.stream_start_index + PDFNames.STREAM_STR.length] == PDFNames.WHITESPACE_LF || (pdf[spr.stream_start_index + PDFNames.STREAM_STR.length] == PDFNames.WHITESPACE_CR && pdf[spr.stream_start_index + PDFNames.STREAM_STR.length + 1] == PDFNames.WHITESPACE_LF); - spr.content_start_index = skipNewline(pdf, spr.stream_start_index + PDFNames.STREAM_STR.length); - - int length = -1; - for (int i = 0; i < spr.stream_dictionary.names.size(); i++) - { - NameParseResult name = (NameParseResult) spr.stream_dictionary.names.get(i); - if (ByteArrayUtils.compareByteArrays(pdf, name.name_start_index, PDFNames.LENGTH_STR)) - { - ParseResult pr = (ParseResult) spr.stream_dictionary.values.get(i); - NumberParseResult npr = null; - if (pr instanceof IndirectObjectReferenceParseResult) - { - log.debug("An object stream with indirect length - cannot parse this instantly - parse later again."); - spr.content_end_index = -1; - spr.next_index = -1; - return spr; - - } - else - { - npr = (NumberParseResult) pr; - } - assert npr != null; - - length = npr.number; - break; - } - - } - assert length >= 0; - - spr.content_end_index = spr.content_start_index + length; - - int endstr_index = spr.content_end_index; - if (isNewline(pdf, endstr_index)) - { - endstr_index = skipWhitespace(pdf, endstr_index); - } - assert ByteArrayUtils.compareByteArrays(pdf, endstr_index, PDFNames.ENDSTREAM_STR); - - spr.next_index = endstr_index + PDFNames.ENDSTREAM_STR.length; - - return spr; - } - - public static NullParseResult parseNull(final byte[] pdf, final int index) - { - NullParseResult npr = new NullParseResult(); - npr.start_index = index; - - assert ByteArrayUtils.compareByteArrays(pdf, npr.start_index, PDFNames.NULL_STR); - - npr.next_index = npr.start_index + PDFNames.NULL_STR.length; - - return npr; - } - - public static int getObjectOffsetFromXRefByIndirectObjectReference( - XRefSectionParseResult xpr, IndirectObjectReference ior) - { - Iterator it = xpr.xref_subsections.iterator(); - while (it.hasNext()) - { - XRefSubSectionParseResult section = (XRefSubSectionParseResult) it.next(); - - for (int i = 0; i < section.xref_lines.size(); i++) - { - if (section.start_obj_number + i == ior.object_number) - { - XRefLineParseResult lpr = (XRefLineParseResult) section.xref_lines.get(i); - return lpr.object_offset; - } - } - } - - return -1; - } - - public static HeaderParseResult parseHeader(final byte[] pdf, final int index) - { - HeaderParseResult hpr = new HeaderParseResult(); - hpr.start_index = index; - - assert pdf[hpr.start_index] == PDFNames.COMMENT; - - assert ByteArrayUtils.compareByteArrays(pdf, hpr.start_index + 1, PDFNames.PDF_VERSION_STR); - - hpr.major_index = hpr.start_index + 1 + PDFNames.PDF_VERSION_STR.length; - - IntegerParseResult major_ipr = parseUnsignedInteger(pdf, hpr.major_index); - hpr.major = major_ipr.number; - assert hpr.major >= 1; - - assert pdf[major_ipr.next_index] == PDFNames.PDF_VERSION_SEPARATOR; - - hpr.minor_index = major_ipr.next_index + 1; - - IntegerParseResult minor_ipr = parseUnsignedInteger(pdf, hpr.minor_index); - hpr.minor = minor_ipr.number; - assert hpr.minor >= 0; - - assert isWhitespace(pdf[minor_ipr.next_index]); - hpr.binary_characters_index = skipWhitespace(pdf, minor_ipr.next_index); - - assert pdf[hpr.binary_characters_index] == PDFNames.COMMENT; - - hpr.next_index = skipToNewline(pdf, hpr.binary_characters_index); - return hpr; - } - - /** - * Parses a PDF footer. - * - *

- * A PDF footer starts with the xref, followed by the trailer, the startxref - * and the EOF marker. - *

- * - * @param pdf - * The PDF data. - * @param index - * The index. - * @return Returns the result of the parsing operation. - * - * @see FooterParseResult - */ - public static FooterParseResult parseFooter(final byte[] pdf, final int index) - { - FooterParseResult fpr = new FooterParseResult(); - fpr.start_index = index; - - fpr.xpr = PDFUtils.parseXRefSection(pdf, fpr.start_index); - - fpr.tpr = PDFUtils.parseTrailer(pdf, fpr.xpr.next_index); - - fpr.sxpr = PDFUtils.parseStartXRef(pdf, fpr.tpr.next_index); - - fpr.eofpr = PDFUtils.parseEOF(pdf, fpr.sxpr.next_index); - - fpr.next_index = fpr.eofpr.next_index; - return fpr; - } - -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ArrayParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ArrayParseResult.java deleted file mode 100644 index 9d0a745..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ArrayParseResult.java +++ /dev/null @@ -1,42 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: ArrayParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - -import java.util.List; - -/** - * The result of parsing a hex string. - * - * @see at.knowcenter.wag.exactparser.parsing.results.LiteralStringParseResult - * - * @author wprinz - */ -public class ArrayParseResult extends ContainerParseResult { - - public List elements = null; - - -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/BooleanParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/BooleanParseResult.java deleted file mode 100644 index e0bc276..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/BooleanParseResult.java +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: BooleanParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - -/** - * Parse result of parsing a boolean value. - * - * @author wprinz - */ -public class BooleanParseResult extends ParseResult -{ - - public boolean value = false; - -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ContainerParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ContainerParseResult.java deleted file mode 100644 index 1974ade..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ContainerParseResult.java +++ /dev/null @@ -1,45 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: ContainerParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - -/** - * Base class of container parse results. - * - *

- * Containers are types that include some content. - * E.g. literal strings include string data as content, - * arrays include elements as content etc. - *

- * - * @author wprinz - */ -public class ContainerParseResult extends ParseResult { - - public int content_start_index = -1; - public int content_end_index = -1; - - -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/DictionaryParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/DictionaryParseResult.java deleted file mode 100644 index 47101e0..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/DictionaryParseResult.java +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: DictionaryParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - -import java.util.List; - -/** - * The result of parsing a dictionary. - * - * @author wprinz - */ -public class DictionaryParseResult extends ContainerParseResult -{ - - public List names = null; - - public List values = null; -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/EOFParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/EOFParseResult.java deleted file mode 100644 index dea1d22..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/EOFParseResult.java +++ /dev/null @@ -1,47 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: EOFParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - -/** - * The result of parsing the End Of File marker. - * - * @author wprinz - */ -public class EOFParseResult extends ParseResult -{ - - /** - * The index of the byte after the EOF marker. - * - *

- * A newline is not necessary after the EOF marker, but if it is present it will be considered - * as part of it. - * So eof_end_index marks this newline. - * If eof_end_index == next_index, then no new line is present. - *

- */ - public int eof_end_index = -1; -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/FooterParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/FooterParseResult.java deleted file mode 100644 index 2a52aa6..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/FooterParseResult.java +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: FooterParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - - -/** - * The result of parsing a PDF footer block. - * - *

- * A PDF footer block starts with the xref table followed by the trailer, the - * startxref and finally the EOF marker. Usually the footer should be at the end - * of the file. All object offsets in the footer's xref table should be before - * the footer itself. Nevertheless, there are PDF Writers (e.g. Microsoft Word) - * that put the footer at the beginning of the document so that all indirect - * objects are after the EOF marker. - *

- * - * @author wprinz - */ -public class FooterParseResult extends ParseResult -{ - - public StartXRefParseResult sxpr = null; - - public EOFParseResult eofpr = null; - - public XRefSectionParseResult xpr = null; - - public TrailerParseResult tpr = null; -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HeaderParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HeaderParseResult.java deleted file mode 100644 index 3befda3..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HeaderParseResult.java +++ /dev/null @@ -1,48 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: HeaderParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - -/** - * The result of parsing the PDF header. - * - *

- * The header contains the PDF version and is usually followed by some binary - * characers. - *

- * - * @author wprinz - */ -public class HeaderParseResult extends ParseResult -{ - public int major_index = -1; - public int minor_index = -1; - - public int major = -1; - public int minor = -1; - - public int binary_characters_index = -1; - -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HexStringParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HexStringParseResult.java deleted file mode 100644 index 27dbf70..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HexStringParseResult.java +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: HexStringParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - -/** - * The result of parsing a hex string. - * - * @see at.knowcenter.wag.exactparser.parsing.results.LiteralStringParseResult - * - * @author wprinz - */ -public class HexStringParseResult extends ContainerParseResult { -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IndirectObjectReferenceParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IndirectObjectReferenceParseResult.java deleted file mode 100644 index 797678e..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IndirectObjectReferenceParseResult.java +++ /dev/null @@ -1,44 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: IndirectObjectReferenceParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - -import at.knowcenter.wag.exactparser.parsing.IndirectObjectReference; - -/** - * The ParseResult of parsing an indirect object reference. - * - * @author wprinz - */ -public class IndirectObjectReferenceParseResult extends ParseResult { - - public IndirectObjectReference ior; - - //@Override - public String toString() - { - return ior.toString() + " R"; - } -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IntegerParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IntegerParseResult.java deleted file mode 100644 index 48ea7d2..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IntegerParseResult.java +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: IntegerParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - -/** - * @author wprinz - */ -public class IntegerParseResult extends ParseResult -{ - - public int number; - -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/LiteralStringParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/LiteralStringParseResult.java deleted file mode 100644 index 60fc277..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/LiteralStringParseResult.java +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: LiteralStringParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - -/** - * The result of parsing a simple string (ASCII string). - * - * @see at.knowcenter.wag.exactparser.parsing.results.HexStringParseResult - * - * @author wprinz - */ -public class LiteralStringParseResult extends ContainerParseResult { - -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NameParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NameParseResult.java deleted file mode 100644 index e564285..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NameParseResult.java +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: NameParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - -/** - * @author wprinz - */ -public class NameParseResult extends ParseResult { - - public int name_start_index = -1; - -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NullParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NullParseResult.java deleted file mode 100644 index 49d9dfb..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NullParseResult.java +++ /dev/null @@ -1,34 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: NullParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - -/** - * The result of parsing a "null". - * - * @author wprinz - */ -public class NullParseResult extends ParseResult { -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NumberParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NumberParseResult.java deleted file mode 100644 index e88596c..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NumberParseResult.java +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: NumberParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - -/** - * The ParseResult of parsing an integer number. - * - * @author wprinz - */ -public class NumberParseResult extends ParseResult { - /** - * The (signed) integer number. - */ - public int number; - - // TODO: make better - public float floating; -} \ No newline at end of file diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectHeaderParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectHeaderParseResult.java deleted file mode 100644 index 0729108..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectHeaderParseResult.java +++ /dev/null @@ -1,51 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: ObjectHeaderParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - -/** - * The ParseResult of a parsing an object header. - * - *

- * Note that this information regards only the object header and not the - * contents of the object itself. (meaning: next points to the contents and not - * to the end of the whole object) - *

- * - * @author Administrator - */ -public class ObjectHeaderParseResult extends ParseResult { - - /** - * The object's object number. - */ - public int object_number = -1; - - /** - * The object's generation number. - */ - public int generation_number = -1; - -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectParseResult.java deleted file mode 100644 index 2fdde34..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectParseResult.java +++ /dev/null @@ -1,50 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: ObjectParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - - -/** - * The ParseResult of parsing an indirect object. - * @author wprinz - */ -public class ObjectParseResult extends ParseResult { - - public int content_index = -1; - public int end_of_content_index = -1; - - public ObjectHeaderParseResult header = null; - -/* enum ObjectType - { - UNKNOWN_TO_PARSER, - OBJ_DICTIONARY - }; - - public ObjectType object_type = ObjectType.UNKNOWN_TO_PARSER; - */ - public ParseResult object = null; - -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ParseResult.java deleted file mode 100644 index 12c4b19..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ParseResult.java +++ /dev/null @@ -1,50 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: ParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - -/** - * Base class of all parse results. - * - * @author wprinz - */ -public class ParseResult { - - /** - * The start index, where the parser started its work and where the parsed - * entity begins. - */ - public int start_index = -1; - - /** - * The index of the next entity following the currently parsed entity. - * - *

- * This is the index of the first byte not belonging to this entity anymore. - *

- */ - public int next_index = -1; - -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StartXRefParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StartXRefParseResult.java deleted file mode 100644 index a1f6792..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StartXRefParseResult.java +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: StartXRefParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - - -/** - * The ParseResult of parsing a startxref entry. - * @author wprinz - */ -public class StartXRefParseResult extends ParseResult { - - public int xref_index; -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StreamParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StreamParseResult.java deleted file mode 100644 index 16da12a..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StreamParseResult.java +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: StreamParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - - -/** - * The result of parsing a hex string. - * - * @see at.knowcenter.wag.exactparser.parsing.results.LiteralStringParseResult - * - * @author wprinz - */ -public class StreamParseResult extends ContainerParseResult { - - public DictionaryParseResult stream_dictionary = null; - - public int stream_start_index = -1; -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/TrailerParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/TrailerParseResult.java deleted file mode 100644 index 4589ee8..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/TrailerParseResult.java +++ /dev/null @@ -1,84 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: TrailerParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - -/** - * The ParseResult of parsing the trailer. - * - * @author wprinz - */ -public class TrailerParseResult extends ParseResult { - - public int contents_index = -1; - public int contents_end_index = -1; - - public DictionaryParseResult dpr = null; - - public IndirectObjectReferenceParseResult info; - - public IndirectObjectReferenceParseResult root; - - /** - * The content of the "/Size" entry. - */ - public int size; - - /** - * Tells, if this PDF footer has a predecessor (as specified by - * the /Prev entry). - */ - public boolean has_predecessor = false; - - /** - * The index of the predecessor. - * - *

- * Only valid if has_predecessor is true. - *

- *

- * Use getPrev and setPrev to access this member variable. - *

- * - * @see #getPrev() - * @see #setPrev(int) - */ - private int prev = -1; - - public int getPrev() { - assert has_predecessor; - return prev; - } - - public void setPrev(int prev) { - assert has_predecessor : "Set has_predecessor to true first."; - this.prev = prev; - } - - - - - -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefLineParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefLineParseResult.java deleted file mode 100644 index 8039153..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefLineParseResult.java +++ /dev/null @@ -1,40 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: XRefLineParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - -/** - * The ParseResult of parsing a single xref line. - * - * @author wprinz - */ -public class XRefLineParseResult extends ParseResult { - - public int object_offset; - - public int generation_number; - - public byte object_usage; -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSectionParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSectionParseResult.java deleted file mode 100644 index eedea81..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSectionParseResult.java +++ /dev/null @@ -1,66 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: XRefSectionParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - -import java.util.ArrayList; -import java.util.List; - -/** - * The ParseResult of an xref parsing operation. - * - *

- * This contains one whole xref table section. An xref section starts with the - * word xref and contains one or more xref sub-sections. - *

- *

- * Due to Incremental Updates, there may be more than one xref section in a - * document. All xref section together are called the xref table. Using this - * aggregated xref table, an application has the full access to all indirect - * objects in the document. - *

- *

- * In many PDF libraries and applications one xref section is also informally - * called xref table. - *

- * - * @author wprinz - */ -public class XRefSectionParseResult extends ParseResult -{ - - public List xref_subsections = new ArrayList(); - - /** - * Appends another cross-reference (xref) sub-section to the xref table. - * - * @param xref_section - * The xref section to be appended. - */ - public void appendXRefSubSection(XRefSubSectionParseResult xref_section) - { - xref_subsections.add(xref_section); - } -} diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSubSectionParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSubSectionParseResult.java deleted file mode 100644 index ec19004..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSubSectionParseResult.java +++ /dev/null @@ -1,59 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: XRefSubSectionParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser.parsing.results; - -import java.util.ArrayList; -import java.util.List; - -/** - * Contains an xref sub-section. - * - *

- * An xref sub-section is an ordered list of xref lines. The object numbers of the - * corresponding objects are numbered incrementally. - *

- *

- * xref sections are important in Incremental Updates because they allow to - * specify explicitely which objects (object numbers) are contained in the xref. - *

- * - * @author wprinz - */ -public class XRefSubSectionParseResult extends ParseResult { - - public int start_obj_number; - - public int num_objects; - - public List xref_lines = new ArrayList(); - - public void appendXRefLine(XRefLineParseResult xref_line) { - assert xref_lines.size() < num_objects; - - xref_lines.add(xref_line); - } - -} -- cgit v1.2.3