diff options
author | tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> | 2013-01-09 15:41:29 +0000 |
---|---|---|
committer | tknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c> | 2013-01-09 15:41:29 +0000 |
commit | 535a04fa05f739ec16dd81666e3b0f82dfbd442d (patch) | |
tree | 0804f301c1a9ceb303a8441b7b29244fc8eb7ff0 /src/main/java/at/knowcenter/wag/exactparser | |
parent | 1efaf6fd5619dfa95c9d7e8c71eda4c2ffba4998 (diff) | |
download | pdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.tar.gz pdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.tar.bz2 pdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.zip |
pdf-as-lib maven project files moved to pdf-as-lib
git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/pdf-as/trunk@926 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
Diffstat (limited to 'src/main/java/at/knowcenter/wag/exactparser')
28 files changed, 0 insertions, 3120 deletions
diff --git a/src/main/java/at/knowcenter/wag/exactparser/ByteArrayUtils.java b/src/main/java/at/knowcenter/wag/exactparser/ByteArrayUtils.java deleted file mode 100644 index 4442650..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/ByteArrayUtils.java +++ /dev/null @@ -1,148 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: ByteArrayUtils.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser;
-
-import java.io.UnsupportedEncodingException;
-
-/**
- * Abstract class that contains utility methods for handling byte arrays.
- *
- * @author wprinz
- */
-public abstract class ByteArrayUtils {
-
- public static final String BYTE_ARRAY_ENCODING = "ISO-8859-1";
-
- /**
- * Converts the byte array to a String.
- *
- * @param data
- * The byte array.
- * @return Returns the String.
- * @throws UnsupportedEncodingException
- * Forwarded exception
- */
- public static String convertByteArrayToString(final byte[] data) throws UnsupportedEncodingException {
- return new String(data, BYTE_ARRAY_ENCODING);
- }
-
- /**
- * Finds the first occurance of search in data starting to search from the
- * given index.
- *
- * @param data
- * The big array.
- * @param index
- * The index to start searching from.
- * @param search
- * The sought array.
- * @return Returns the index of the found occurence or -1 if nothing was
- * found.
- */
- public static int indexOf(final byte[] data, final int index, final byte[] search) {
- for (int i = index; i <= data.length - search.length; i++) {
- if (compareByteArrays(data, i, search)) {
- return i;
- }
- }
- return -1;
- }
-
- /**
- * Finds the last occurance of the array.
- *
- * @param data
- * The source array to be searched.
- * @param search
- * The sought array.
- * @return Returns the index of the last occurance - or -1 if nothing was
- * found.
- */
- public static int lastIndexOf(final byte[] data, byte[] search) {
- for (int index = data.length - search.length; index >= 0; index--) {
- if (compareByteArrays(data, index, search)) {
- return index;
- }
- }
- return -1;
- }
-
- /**
- * Compares the two byte arrays for equality.
- *
- * @param data
- * The source array.
- * @param index
- * In index into the source array marking where the comparison should
- * start.
- * @param search
- * The sought array.
- * @return Returns true if the first search.length bytes of data+index and
- * search match exactly. Returns false otherwise.
- */
- public static boolean compareByteArrays(final byte[] data, final int index, byte[] search) {
- if (index < 0 || index >= data.length) {
- throw new IndexOutOfBoundsException("The index " + index + " is out of bounds");
- }
-
- if (search.length > data.length) {
- return false;
- }
-
- if (search.length > data.length - index) {
- return false;
- }
-
- for (int i = 0; i < search.length; i++) {
- if (data[index + i] != search[i]) {
- return false;
- }
- }
-
- return true;
- }
-
- /**
- * Checks, if the sought data byte is contained within the byte array.
- *
- * @param byte_array
- * The byte array.
- * @param data
- * A data byte sought within the byte array.
- * @return Returns true, if the data byte was found (at least once) in the
- * byte array, false otherwise.
- */
- public static boolean contains(final byte[] byte_array, final byte data) {
- for (int i = 0; i < byte_array.length; i++) {
- byte b = byte_array[i];
- if (b == data) {
- return true;
- }
- }
- return false;
- }
-
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java b/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java deleted file mode 100644 index fbaa4de..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java +++ /dev/null @@ -1,272 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: ParseDocument.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import at.knowcenter.wag.exactparser.parsing.PDFUtils;
-import at.knowcenter.wag.exactparser.parsing.results.DictionaryParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.IndirectObjectReferenceParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.NameParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.NumberParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.ObjectParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.StartXRefParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.TrailerParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.XRefSectionParseResult;
-
-
-/**
- * Test class.
- * @author wprinz
- */
-public class ParseDocument
-{
-
- public static final String DOCUMENT = "C:/wprinz/temp.pdf";
-
- public static final byte[] EGIZ_DICT_NAME = { 'E', 'G', 'I', 'Z', 'S', 'i',
- 'g', 'D', 'i', 'c', 't' };
-
- public static final byte[] EGIZ_ODS_NAME = { 'O', 'D', 'S' };
-
- public static final byte[] EGIZ_XOBJ_NAME = { 'S', 'i', 'g', 'X', 'O', 'b',
- 'j', 'e', 'c', 't' };
-
- /**
- * @param args
- */
- public static void main(String[] args)
- {
-
- try
- {
- File in = new File(DOCUMENT);
- FileInputStream fis = new FileInputStream(in);
- byte[] pdf = new byte[(int) in.length()];
- fis.read(pdf);
- fis.close();
- fis = null;
-
- List blocks = parseDocument(pdf);
-
- Iterator it = blocks.iterator();
- while (it.hasNext())
- {
- FooterParseResult bpr = (FooterParseResult) it.next();
-
- System.out.print("block from " + bpr.start_index + " to " + bpr.next_index);
-
- if (bpr.tpr.root != null)
- {
- int root_index = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(bpr.xpr, bpr.tpr.root.ior);
- ObjectParseResult root_opr = PDFUtils.parseObject(pdf, root_index);
- DictionaryParseResult root_dpr = (DictionaryParseResult) root_opr.object;
-
- int egiz_index = PDFUtils.indexOfName(pdf, root_dpr.names, EGIZ_DICT_NAME);
- if (egiz_index >= 0)
- {
- System.out.print(" == EGIZDict");
- }
- }
-
- System.out.println();
- }
-
- }
- catch (IOException e)
- {
- e.printStackTrace();
- }
- }
-
- public static List parseDocument(final byte[] pdf) throws IOException
- {
- //HeaderParseResult hpr = PDFUtils.parseHeader(pdf, 0);
- //System.out.println("PDF-version = " + hpr.major + "." + hpr.minor);
-
- List blocks = new ArrayList();
-
- int last_start_xref = PDFUtils.findLastStartXRef(pdf);
- StartXRefParseResult last_sxpr = PDFUtils.parseStartXRef(pdf, last_start_xref);
- int xref_index = last_sxpr.xref_index;
-
- for (;;)
- {
- FooterParseResult fpr = PDFUtils.parseFooter(pdf, xref_index);
- blocks.add(0, fpr);
-
- //System.out.println("tpr.has_predecessor = " + fpr.tpr.has_predecessor);
- if (!fpr.tpr.has_predecessor)
- {
- // eventually parse the PDF header here.
- break;
- }
-
- //System.out.println("tpr.prev = " + fpr.tpr.getPrev());
-
- xref_index = fpr.tpr.getPrev();
- }
-
- return blocks;
- }
-
- // public static void parseEGIZ()
- // {
- //
- // int root_index =
- // PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(bpr.xpr,
- // bpr.tpr.root.ior);
- // ObjectParseResult root_opr = PDFUtils.parseObject(pdf, root_index);
- // DictionaryParseResult root_dpr = (DictionaryParseResult) root_opr.object;
- //
- // int egiz_index = PDFUtils.indexOfName(pdf, root_dpr.names, EGIZ_DICT_NAME);
- // if (egiz_index >= 0)
- // {
- // IndirectObjectReferenceParseResult egiz_iorpr =
- // (IndirectObjectReferenceParseResult) root_dpr.values.get(egiz_index);
- // System.out.println("EGIZ signature info at = " + egiz_iorpr);
- //
- // int egiz_dict_index =
- // PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(bpr.xpr,
- // egiz_iorpr.ior);
- // ObjectParseResult opr = PDFUtils.parseObject(pdf, egiz_dict_index);
- // DictionaryParseResult egiz_dict = (DictionaryParseResult) opr.object;
- //
- // for (int i = 0; i < egiz_dict.names.size(); i++)
- // {
- // NameParseResult npr = egiz_dict.names.get(i);
- // int len = npr.next_index - npr.name_start_index;
- // byte[] name = new byte[len];
- // System.arraycopy(pdf, npr.name_start_index, name, 0, len);
- // System.out.print(" " + new String(name, "US-ASCII") + " = ");
- //
- // System.out.println(egiz_dict.values.get(i));
- // }
- //
- // // int key = PDFUtils.indexOfName(pdf, egiz_dict.names, new byte [] { 'K',
- // // 'e', 'y'});
- // // IndirectObjectReferenceParseResult key_iorpr =
- // // (IndirectObjectReferenceParseResult) egiz_dict.values.get(key);
- // // int key_offset =
- // // PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(xpr,
- // // key_iorpr.ior);
- // // ObjectParseResult key_opr = PDFUtils.parseObject(pdf, key_offset);
- // // StreamParseResult spr = (StreamParseResult) key_opr.object;
- // // System.out.println(" key stream from " + spr.content_start_index + " to
- // // " + spr.content_end_index);
- // //
- // // int data_len = spr.content_end_index - spr.content_start_index;
- // // byte [] data = new byte[data_len];
- // // System.arraycopy(pdf, spr.content_start_index, data, 0, data_len);
- // // System.out.println(new String(data, "US-ASCII"));
- //
- // }
- // else
- // {
- // System.out.println("No EGIZ block found.");
- // }
- //
- // }
-
- public static byte[] getOriginalDocument(final File file_name) throws IOException
- {
- FileInputStream fis = new FileInputStream(file_name);
- byte[] pdf = new byte[(int) file_name.length()];
- fis.read(pdf);
- fis.close();
- fis = null;
-
- int last_start_xref = PDFUtils.findLastStartXRef(pdf);
-
- StartXRefParseResult sxpr = PDFUtils.parseStartXRef(pdf, last_start_xref);
-
- XRefSectionParseResult xpr = PDFUtils.parseXRefSection(pdf, sxpr.xref_index);
-
- TrailerParseResult tpr = PDFUtils.parseTrailer(pdf, xpr.next_index);
-
- System.out.println("tpr.info = " + tpr.info);
- System.out.println("tpr.root = " + tpr.root);
- System.out.println("tpr.size = " + tpr.size);
-
- System.out.println("tpr.has_predecessor = " + tpr.has_predecessor);
- if (tpr.has_predecessor)
- {
- System.out.println("tpr.prev = " + tpr.getPrev());
- }
-
- int root_index = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(xpr, tpr.root.ior);
- ObjectParseResult root_opr = PDFUtils.parseObject(pdf, root_index);
- DictionaryParseResult root_dpr = (DictionaryParseResult) root_opr.object;
-
- byte[] EGIZ_TYPE = new String("EGIZSigDict").getBytes("US-ASCII");
- int egiz_index = PDFUtils.indexOfName(pdf, root_dpr.names, EGIZ_TYPE);
- if (egiz_index >= 0)
- {
- System.out.println("The document is EGIZ-signed. ==> extract original document");
-
- IndirectObjectReferenceParseResult egiz_iorpr = (IndirectObjectReferenceParseResult) root_dpr.values.get(egiz_index);
- System.out.println("EGIZ signature info at = " + egiz_iorpr);
-
- int egiz_dict_index = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(xpr, egiz_iorpr.ior);
- ObjectParseResult opr = PDFUtils.parseObject(pdf, egiz_dict_index);
- DictionaryParseResult egiz_dict = (DictionaryParseResult) opr.object;
-
- for (int i = 0; i < egiz_dict.names.size(); i++)
- {
- NameParseResult npr = (NameParseResult) egiz_dict.names.get(i);
- int len = npr.next_index - npr.name_start_index;
- byte[] name = new byte[len];
- System.arraycopy(pdf, npr.name_start_index, name, 0, len);
- System.out.print(" " + new String(name, "US-ASCII") + " = ");
-
- System.out.println(egiz_dict.values.get(i));
- }
-
- // Original document size
- int key = PDFUtils.indexOfName(pdf, egiz_dict.names, new byte[] { 'O',
- 'D', 'S' });
- NumberParseResult ods = (NumberParseResult) egiz_dict.values.get(key);
-
- int original_document_size = ods.number;
- System.out.println("Original Document Size = " + original_document_size);
-
- byte[] original = new byte[original_document_size];
- System.arraycopy(pdf, 0, original, 0, original_document_size);
-
- return original;
- }
-
- System.out.println("No EGIZ block found. ==> the whold document is the original document");
- return pdf;
- }
-
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/IndirectObjectReference.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/IndirectObjectReference.java deleted file mode 100644 index 2bfdf56..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/IndirectObjectReference.java +++ /dev/null @@ -1,57 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: IndirectObjectReference.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing;
-
-/**
- * The IndirectObjectReference class completely holds a so called object
- * identifier of an indirect object.
- *
- * <p>
- * An indirect object is an object not contained within another object. In
- * accordance, a direct object is structurally part of another object. For
- * example, a direct String object that is the value of some key in a dictionary
- * object.
- * </p>
- * <p>
- * An object identifier uniquely identifies a specific indirect object by the
- * object number and the generation number. In PDF such an object identifier may
- * be used to reference to the object.
- * </p>
- *
- * @author wprinz
- */
-public class IndirectObjectReference {
-
- public int object_number;
-
- public int generation_number;
-
- //@Override
- public String toString() {
- return object_number + " " + generation_number;
- }
-
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java deleted file mode 100644 index 0ee5863..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java +++ /dev/null @@ -1,184 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: PDFNames.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing;
-
-/**
- * Abstract class that contains several frequently used PDF constants.
- *
- * <p>
- * The PDF specification partitions the character set (ASCII) into three groups:
- * </p>
- * <ul>
- * <li>Whitespace characters (space, tab, etc., but also newline and carriage
- * return) used to separate tokens. Unless otherwise specified a group of
- * consecutive whitespace characters behaves like a single whitespace character.</li>
- * <li>Delimiter characters ('(', '<', etc., but also '/', which precedes the
- * PDF Key Names in dictionaries) that are used to encircle semantic groups.
- * <li>Regular characters are per definition the rest characters that are
- * neither whitespaces nor delimiters.</li>
- * </ul>
- * <p>
- * Newlines consist per default of CR and LF, but also LF and even CR alone are
- * allowed. It seems that all variations of newlines may exist within a single
- * document.
- * </p>
- *
- * @author wprinz
- */
-public abstract class PDFNames
-{
-
- /**
- * The standard encoding of PDF tokens and names.
- *
- * <p>
- * PDF is usually an 8 bit format. Binary data etc. can be saves just as it
- * is. Nevertheless all PDF tokens ('xref', 'obj', etc.) and PDF Names
- * ('/Size', '/Pages', '/Type', etc.) must be in 7 bit ASCII US encoding.
- * </p>
- * <p>
- * Therefor, whenever using Java Strings to convert e.g. numbers to such PDF
- * tokens use this encoding constant.
- * </p>
- * <p>
- * The same applies for PDF token/name byte arrays that are retransfromed to
- * Java Strings.
- * </p>
- */
- public static final String PDF_STANDARD_ENCODING = "US-ASCII";
-
- // Whitespace characters
-
- // TABLE 3.1 White-space characters
- // DECIMAL HEXADECIMAL OCTAL NAME
- // 0 00 000 Null (NUL)
- // 9 09 011 Tab (HT)
- // 10 0A 012 Line feed (LF)
- // 12 0C 014 Form feed (FF)
- // 13 0D 015 Carriage return
- // 32 20 040 Space (SP)
-
- public static final byte WHITESPACE_NUL = 0x00;
-
- public static final byte WHITESPACE_HT = 0x09;
-
- public static final byte WHITESPACE_LF = 0x0A;
-
- public static final byte WHITESPACE_FF = 0x0C;
-
- public static final byte WHITESPACE_CR = 0x0D;
-
- public static final byte WHITESPACE_SP = 0x20;
-
- public static final byte[] WHITESPACE_CHARACTERS = { WHITESPACE_NUL,
- WHITESPACE_HT, WHITESPACE_LF, WHITESPACE_FF, WHITESPACE_CR, WHITESPACE_SP };
-
- // comment character
-
- public static final byte COMMENT = '%';
-
- // PDF-version
-
- public static final byte[] PDF_VERSION_STR = { 'P', 'D', 'F', '-' };
-
- public static final byte PDF_VERSION_SEPARATOR = '.';
-
- // delimiter characters
-
- public static final byte DELIMITER_STRING_OPEN = '(';
-
- public static final byte DELIMITER_STRING_CLOSE = ')';
-
- public static final byte DELIMITER_HEXSTRING_OPEN = '<';
-
- public static final byte DELIMITER_HEXSTRING_CLOSE = '>';
-
- public static final byte DELIMITER_ARRAY_OPEN = '[';
-
- public static final byte DELIMITER_ARRAY_CLOSE = ']';
-
- public static final byte DELIMITER_CURLY_OPEN = '{';
-
- public static final byte DELIMITER_CURLY_CLOSE = '}';
-
- public static final byte DELIMITER_NAME = '/';
-
- public static final byte[] DELIMITER_CHARACTERS = { DELIMITER_STRING_OPEN,
- DELIMITER_STRING_CLOSE, DELIMITER_HEXSTRING_OPEN,
- DELIMITER_HEXSTRING_CLOSE, DELIMITER_ARRAY_OPEN, DELIMITER_ARRAY_CLOSE,
- DELIMITER_CURLY_OPEN, DELIMITER_CURLY_CLOSE, DELIMITER_NAME };
-
- // Footer
-
- public static final byte[] XREF_STR = { 'x', 'r', 'e', 'f' };
-
- public static final byte[] TRAILER_STR = { 't', 'r', 'a', 'i', 'l', 'e', 'r' };
-
- public static final byte[] STARTXREF_STR = { 's', 't', 'a', 'r', 't', 'x',
- 'r', 'e', 'f' };
-
- public static final byte[] EOF_STR = { '%', '%', 'E', 'O', 'F' };
-
- // objects
-
- public static final byte[] OBJ_STR = { 'o', 'b', 'j' };
-
- public static final byte[] ENDOBJ_STR = { 'e', 'n', 'd', 'o', 'b', 'j' };
-
- public static final byte[] DICT_START_STR = { DELIMITER_HEXSTRING_OPEN,
- DELIMITER_HEXSTRING_OPEN };
-
- public static final byte[] DICT_END_STR = { DELIMITER_HEXSTRING_CLOSE,
- DELIMITER_HEXSTRING_CLOSE };
-
- public static final byte[] STREAM_STR = { 's', 't', 'r', 'e', 'a', 'm' };
-
- public static final byte[] ENDSTREAM_STR = { 'e', 'n', 'd', 's', 't', 'r',
- 'e', 'a', 'm' };
-
- public static final byte[] NULL_STR = { 'n', 'u', 'l', 'l' };
-
- public static final byte[] TRUE_STR = { 't', 'r', 'u', 'e' };
-
- public static final byte[] FALSE_STR = { 'f', 'a', 'l', 's', 'e' };
-
- // indirect object references
-
- public static final byte[] REFERENCE_STR = { 'R' };
-
- // Dictionary keys
-
- public static final byte[] SIZE_STR = { 'S', 'i', 'z', 'e' };
-
- public static final byte[] PREV_STR = { 'P', 'r', 'e', 'v' };
-
- public static final byte[] ROOT_STR = { 'R', 'o', 'o', 't' };
-
- public static final byte[] INFO_STR = { 'I', 'n', 'f', 'o' };
-
- public static final byte[] LENGTH_STR = { 'L', 'e', 'n', 'g', 't', 'h' };
-
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java deleted file mode 100644 index de356c9..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFUtils.java +++ /dev/null @@ -1,1405 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: PDFUtils.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing;
-
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.log4j.Logger;
-
-import at.knowcenter.wag.exactparser.ByteArrayUtils;
-import at.knowcenter.wag.exactparser.parsing.results.ArrayParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.BooleanParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.DictionaryParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.EOFParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.HeaderParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.HexStringParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.IndirectObjectReferenceParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.IntegerParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.LiteralStringParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.NameParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.NullParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.NumberParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.ObjectHeaderParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.ObjectParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.ParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.StartXRefParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.StreamParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.TrailerParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.XRefLineParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.XRefSectionParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.XRefSubSectionParseResult;
-
-
-
-/**
- * Abstract class that contains several static utility methods for parsing and
- * analyzing PDF documents on the lowest level.
- *
- * <p>
- * Most operations require random access to the PDF data (mostly to verify the
- * synthax). So the whole PDF document has to be provided as a byte array. The
- * term "pdf+index" states a specific position index within this byte array.
- * </p>
- *
- * @author wprinz
- *
- */
-public abstract class PDFUtils
-{
- private static Logger log = Logger.getLogger(PDFUtils.class);
-
- public static boolean isWhitespace(final byte data)
- {
- return ByteArrayUtils.contains(PDFNames.WHITESPACE_CHARACTERS, data);
- }
-
- public static boolean isDelimiter(final byte data)
- {
- return ByteArrayUtils.contains(PDFNames.DELIMITER_CHARACTERS, data);
- }
-
- protected static boolean isRegular(final byte data)
- {
- return !(isWhitespace(data) || isDelimiter(data));
- }
-
- /**
- * Skips whitespace.
- *
- * <p>
- * Skips all whitespace, which may be none, one or multiple whitespace
- * characters.
- * </p>
- * <p>
- * Note that this also skips newline characters (which belong to whitespace as
- * well).
- * </p>
- *
- * @param data
- * The PDF data.
- * @param index
- * The index.
- * @return Returns the index of the first non whitespace character. This may
- * be equal to index if no whitespaces were skipped at all.
- */
- public static int skipWhitespace(final byte[] data, final int index)
- {
- int non_whitespace_index = index;
- while (isWhitespace(data[non_whitespace_index]))
- {
- non_whitespace_index++;
- }
- return non_whitespace_index;
- }
-
- /**
- * Skips bytes until whitespace is reached.
- *
- * <p>
- * Skips all non whitespace characters, which may be none at all.
- * </p>
- *
- * @param data
- * The PDF data.
- * @param index
- * The index.
- * @return Returns the index of the first whitespace character. This may be
- * equal to index if no non whitespaces were skipped at all.
- */
- public static int skipToWhitespace(final byte[] data, final int index)
- {
- int whitespace_index = index;
- while (!isWhitespace(data[whitespace_index]))
- {
- whitespace_index++;
- }
- return whitespace_index;
- }
-
- protected static final byte[] LINE_TERMINATOR_CRLF = {
- PDFNames.WHITESPACE_CR, PDFNames.WHITESPACE_LF };
-
- protected static final byte[] LINE_TERMINATOR_CRALONE = { PDFNames.WHITESPACE_CR };
-
- protected static final byte[] LINE_TERMINATOR_LF = { PDFNames.WHITESPACE_LF };
-
- public static boolean isNewline(final byte[] data, final int index)
- {
- if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_LF))
- {
- return true;
- }
- if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRLF))
- {
- return true;
- }
- // although not specified by PDF, some applications use the CR alone as line
- // terminator
- if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRALONE))
- {
- return true;
- }
- return false;
- }
-
- public static int skipNewline(final byte[] data, final int index)
- {
- if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_LF))
- {
- return index + LINE_TERMINATOR_LF.length;
- }
- if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRLF))
- {
- return index + LINE_TERMINATOR_CRLF.length;
- }
- // although not specified by PDF, some applications use the CR alone as line
- // terminator
- if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRALONE))
- {
- return index + LINE_TERMINATOR_CRALONE.length;
- }
-
- assert false : "don't call this if you don't expect a newline - call skipWhitespace instead";
- return index;
- }
-
- public static int skipToNewline(final byte[] data, final int index)
- {
- int current_index = index;
- for (;;)
- {
- if (ByteArrayUtils.compareByteArrays(data, current_index, LINE_TERMINATOR_LF))
- {
- return current_index + LINE_TERMINATOR_LF.length;
- }
- if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRLF))
- {
- return index + LINE_TERMINATOR_CRLF.length;
- }
- // although not specified by PDF, some applications use the CR alone as
- // line terminator
- if (ByteArrayUtils.compareByteArrays(data, index, LINE_TERMINATOR_CRALONE))
- {
- return index + LINE_TERMINATOR_CRALONE.length;
- }
- current_index++;
- }
- }
-
- /**
- * Parses a boolean value.
- *
- * @param pdf
- * The PDF data.
- * @param index
- * The index.
- * @return Returns the result of the parsing operation.
- */
- public static BooleanParseResult parseBoolean(final byte[] pdf,
- final int index)
- {
- BooleanParseResult bpr = new BooleanParseResult();
- bpr.start_index = index;
-
- if (ByteArrayUtils.compareByteArrays(pdf, bpr.start_index, PDFNames.TRUE_STR))
- {
- bpr.value = true;
- bpr.next_index = bpr.start_index + PDFNames.TRUE_STR.length;
-
- return bpr;
- }
- if (ByteArrayUtils.compareByteArrays(pdf, bpr.start_index, PDFNames.FALSE_STR))
- {
- bpr.value = false;
- bpr.next_index = bpr.start_index + PDFNames.FALSE_STR.length;
-
- return bpr;
- }
-
- throw new RuntimeException("Boolean couldn't be parsed at index " + index);
- }
-
- public static boolean isSign(final byte data)
- {
- return data == '+' || data == '-';
- }
-
- public static boolean isNumeric(final byte data)
- {
- return '0' <= data && data <= '9';
- }
-
- /**
- * Reads the (positive integer) number from the data. The number must be
- * terminated by the end of line.
- *
- * @param data
- * The data.
- * @param index
- * The index.
- * @return Returns the read number.
- */
- public static int readNumberFromByteArray(final byte[] data, final int index)
- {
- NumberParseResult npr = parseNumberFromByteArray(data, index);
-
- assert npr.number >= 0;
- return npr.number;
- }
-
- /**
- * Parses an unsigned integer.
- *
- * <p>
- * The integer must be a block of successive number characters. It must not be
- * preceded by a sign (not even '+').
- * </p>
- *
- * @param pdf
- * The PDF data.
- * @param index
- * The index.
- * @return Returns the result of the parsing operation.
- */
- public static IntegerParseResult parseUnsignedInteger(final byte[] pdf,
- final int index)
- {
- assert isNumeric(pdf[index]);
-
- String number = "";
-
- int cur_index = index;
- while (isNumeric(pdf[cur_index]))
- {
-
- number += (char) pdf[cur_index];
-
- cur_index++;
- }
-
- // TODO: make better
- int int_value = Integer.parseInt(number);
-
- assert int_value >= 0;
-
- IntegerParseResult ipr = new IntegerParseResult();
- ipr.start_index = index;
- ipr.next_index = cur_index;
- ipr.number = int_value;
- return ipr;
- }
-
- /**
- * Parses a (potentially) signed integer.
- *
- * <p>
- * The integer must be a block of successive number characters. It may be
- * preceded by a sign character ('+' or '-').
- * </p>
- *
- * @param pdf
- * The PDF data.
- * @param index
- * The index.
- * @return Returns the result of the parsing operation.
- */
- public static IntegerParseResult parseInteger(final byte[] pdf,
- final int index)
- {
- assert isSign(pdf[index]) || isNumeric(pdf[index]);
-
- int sign = +1;
- int number_start = index;
- if (pdf[index] == '+')
- {
- sign = +1;
- number_start++;
- }
- else
- {
- if (pdf[index] == '-')
- {
- sign = -1;
- number_start++;
- }
- else
- {
- assert isNumeric(pdf[index]);
- }
- }
-
- IntegerParseResult ipr = parseUnsignedInteger(pdf, number_start);
- ipr.start_index = index;
- ipr.number *= sign;
- return ipr;
- }
-
- /**
- * Parses an arbitrary number;
- *
- * @param pdf
- * The PDF data.
- * @param index
- * The index.
- * @return Returns the result of the parsing operation.
- */
- public static NumberParseResult parseNumberFromByteArray(final byte[] pdf,
- int index)
- {
- String number = "";
-
- assert isSign(pdf[index]) || isNumeric(pdf[index]);
-
- int sign = +1;
- if (pdf[index] == '+')
- {
- sign = +1;
- index++;
- }
- else
- {
- if (pdf[index] == '-')
- {
- sign = -1;
- index++;
- }
- else
- {
- assert isNumeric(pdf[index]);
- }
- }
-
- while (isNumeric(pdf[index]) || pdf[index] == '.')
- {
-
- char digit = (char) pdf[index];
- number += digit;
-
- index++;
- }
-
- NumberParseResult npr = new NumberParseResult();
- npr.next_index = index;
- // TODO: make better
- try
- {
- npr.number = Integer.parseInt(number) * sign;
- }
- catch (NumberFormatException e)
- {
- npr.floating = Float.parseFloat(number) * sign;
- }
-
- return npr;
- }
-
- /**
- * Searches the last occurrence of the "startxref" entry ... in other words
- * starts the search from the end of the document and works reversely.
- *
- * @param pdf
- * The complete PDF file data.
- * @return Returns the offset (byte index) of the "startxref" entry.
- */
- public static int findLastStartXRef(final byte[] pdf)
- {
- return ByteArrayUtils.lastIndexOf(pdf, PDFNames.STARTXREF_STR);
- }
-
- /**
- * Parses the xref section at pdf+index.
- *
- * <p>
- * An xref section starts with 'xref' and contains one or more xref
- * sub-sections.
- * </p>
- *
- * @param pdf
- * The PDF data.
- * @param index
- * The start index of the xref table.
- * @return Returns the result of the parsing operation.
- */
- public static XRefSectionParseResult parseXRefSection(final byte[] pdf,
- final int index)
- {
- at.knowcenter.wag.exactparser.parsing.results.XRefSectionParseResult xpr = new XRefSectionParseResult();
- xpr.start_index = index;
-
- assert ByteArrayUtils.compareByteArrays(pdf, xpr.start_index, PDFNames.XREF_STR);
- assert isNewline(pdf, xpr.start_index + PDFNames.XREF_STR.length);
-
- int cur_index = skipWhitespace(pdf, xpr.start_index + PDFNames.XREF_STR.length);
- // skipNewline(pdf, xpr.start_index + PDFNames.XREF_STR.length);
-
- for (;;)
- {
- // trailer ends the xref section.
- if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.TRAILER_STR))
- {
- break;
- }
-
- // no trailer ==> another xref section
-
- XRefSubSectionParseResult sspr = parseXRefSubSection(pdf, cur_index);
- xpr.appendXRefSubSection(sspr);
-
- cur_index = sspr.next_index;
- }
-
- xpr.next_index = cur_index;
- assert ByteArrayUtils.compareByteArrays(pdf, xpr.next_index, PDFNames.TRAILER_STR);
-
- return xpr;
- }
-
- /**
- * Parses a xref sub-section.
- *
- * @param pdf
- * The PDF data.
- * @param index
- * The index.
- * @return Returns the result of the parsing operation.
- */
- public static XRefSubSectionParseResult parseXRefSubSection(final byte[] pdf,
- final int index)
- {
- XRefSubSectionParseResult sspr = new XRefSubSectionParseResult();
- sspr.start_index = index;
-
- NumberParseResult start_obj_num_npr = parseNumberFromByteArray(pdf, sspr.start_index);
- sspr.start_obj_number = start_obj_num_npr.number;
- assert sspr.start_obj_number >= 0;
-
- assert isWhitespace(pdf[start_obj_num_npr.next_index]);
- int num_obj_index = skipWhitespace(pdf, start_obj_num_npr.next_index);
-
- NumberParseResult num_obj_npr = parseNumberFromByteArray(pdf, num_obj_index);
- sspr.num_objects = num_obj_npr.number;
-
- // assert isNewline(pdf, num_obj_npr.next_index);
- assert isWhitespace(pdf[num_obj_npr.next_index]);
- int start_of_line = skipWhitespace(pdf, num_obj_npr.next_index);
- // skipNewline(pdf, num_obj_npr.next_index);
-
- for (int i = 0; i < sspr.num_objects; i++)
- {
- final int cur_object_number = sspr.start_obj_number + i;
-
- XRefLineParseResult lpr = parseXrefLine(pdf, start_of_line);
- sspr.appendXRefLine(lpr);
-
- // System.out.println("xref line of object " + (oc.start_obj_number + i) +
- // " at " + lpr.start_index + ": " + lpr.object_offset + " " +
- // lpr.generation_number + " " + (char) lpr.object_usage);
-
- if (lpr.object_usage == 'n')
- {
- // check the line - this simple check may make problems with object
- // streams and xref streams
- ObjectHeaderParseResult ohpr = parseObjectHeader(pdf, lpr.object_offset);
- assert ohpr.object_number == cur_object_number;
- assert ohpr.generation_number == lpr.generation_number;
- }
-
- start_of_line = lpr.next_index;
- }
-
- sspr.next_index = start_of_line;
- return sspr;
- }
-
- /**
- * Parses a single 20 bytes xref line at pdf+index.
- *
- * @param pdf
- * The PDF data.
- * @param index
- * The index.
- * @return Returns the result of the parsing operation.
- */
- public static XRefLineParseResult parseXrefLine(final byte[] pdf,
- final int index)
- {
- XRefLineParseResult lpr = new XRefLineParseResult();
-
- lpr.start_index = index;
-
- IntegerParseResult object_offset_ipr = parseUnsignedInteger(pdf, lpr.start_index);
- lpr.object_offset = object_offset_ipr.number;
- assert lpr.object_offset >= 0;
- assert lpr.object_offset < pdf.length;
- assert object_offset_ipr.next_index == lpr.start_index + 10;
-
- assert pdf[object_offset_ipr.next_index] == PDFNames.WHITESPACE_SP; // Standard
- // explicitely
- // says 1
- // single
- // SPACE
- int generation_number_index = object_offset_ipr.next_index + 1;
-
- IntegerParseResult generation_number_ipr = parseUnsignedInteger(pdf, generation_number_index);
- lpr.generation_number = generation_number_ipr.number;
- assert generation_number_ipr.next_index == lpr.start_index + 16;
-
- assert pdf[generation_number_ipr.next_index] == PDFNames.WHITESPACE_SP;
- int usage_index = generation_number_ipr.next_index + 1;
-
- lpr.object_usage = pdf[usage_index];
- assert lpr.object_usage == 'n' || lpr.object_usage == 'f';
-
- if (pdf[usage_index + 1] == PDFNames.WHITESPACE_SP)
- {
- assert pdf[usage_index + 2] == PDFNames.WHITESPACE_CR || pdf[usage_index + 2] == PDFNames.WHITESPACE_LF;
- }
- else
- {
- assert pdf[usage_index + 1] == PDFNames.WHITESPACE_CR;
- assert pdf[usage_index + 2] == PDFNames.WHITESPACE_LF;
- }
-
- lpr.next_index = usage_index + 3;
-
- assert lpr.next_index == lpr.start_index + 20;
-
- return lpr;
- }
-
- public static int indexOfName(final byte[] pdf, List names,
- byte[] sought)
- {
- for (int i = 0; i < names.size(); i++)
- {
- NameParseResult name = (NameParseResult) names.get(i);
- if (ByteArrayUtils.compareByteArrays(pdf, name.name_start_index, sought))
- {
- return i;
- }
- }
- return -1;
- }
-
- public static TrailerParseResult parseTrailer(final byte[] pdf,
- final int index)
- {
- TrailerParseResult tpr = new TrailerParseResult();
- tpr.start_index = index;
- tpr.has_predecessor = false;
-
- assert ByteArrayUtils.compareByteArrays(pdf, tpr.start_index, PDFNames.TRAILER_STR);
-
- // assert isWhitespace(pdf[tpr.start_index + PDFNames.TRAILER_STR.length]);
- tpr.contents_index = skipWhitespace(pdf, tpr.start_index + PDFNames.TRAILER_STR.length);
-
- int trailer_dict_index = skipWhitespace(pdf, tpr.contents_index);
-
- assert ByteArrayUtils.compareByteArrays(pdf, trailer_dict_index, PDFNames.DICT_START_STR);
-
- tpr.dpr = parseDictionary(pdf, trailer_dict_index);
-
- int cur_index = tpr.dpr.next_index;
-
- int info_index = indexOfName(pdf, tpr.dpr.names, PDFNames.INFO_STR);
- if (info_index >= 0)
- {
- tpr.info = (IndirectObjectReferenceParseResult) tpr.dpr.values.get(info_index);
- }
-
- int root_index = indexOfName(pdf, tpr.dpr.names, PDFNames.ROOT_STR);
- if (root_index >= 0)
- {
- tpr.root = (IndirectObjectReferenceParseResult) tpr.dpr.values.get(root_index);
- }
-
- tpr.size = ((NumberParseResult) tpr.dpr.values.get(indexOfName(pdf, tpr.dpr.names, PDFNames.SIZE_STR))).number;
-
- int prev_index = indexOfName(pdf, tpr.dpr.names, PDFNames.PREV_STR);
- if (prev_index >= 0)
- {
- tpr.has_predecessor = true;
- tpr.setPrev(((NumberParseResult) tpr.dpr.values.get(prev_index)).number);
- }
-
- //
- // int cur_index = skipWhitespace(pdf, trailer_dict_index +
- // PDFNames.DICT_START_STR.length);
- // for (;;) {
- // if (ByteArrayUtils.compareByteArrays(pdf, cur_index,
- // PDFNames.DICT_END_STR)) {
- // cur_index += PDFNames.DICT_END_STR.length;
- // break;
- // }
- //
- // assert pdf[cur_index] == PDFNames.DELIMITER_NAME;
- // cur_index++;
- //
- // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.INFO_STR))
- // {
- // assert isWhitespace(pdf[cur_index + PDFNames.INFO_STR.length]);
- // int ir_index = skipWhitespace(pdf, cur_index + PDFNames.INFO_STR.length);
- //
- // IndirectObjectReferenceParseResult iorpr =
- // parseIndirectObjectReference(pdf, ir_index);
- // tpr.info = iorpr;
- //
- // cur_index = skipWhitespace(pdf, iorpr.next_index);
- // continue;
- // }
- //
- // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.ROOT_STR))
- // {
- // assert isWhitespace(pdf[cur_index + PDFNames.ROOT_STR.length]);
- // int ir_index = skipWhitespace(pdf, cur_index + PDFNames.ROOT_STR.length);
- //
- // IndirectObjectReferenceParseResult iorpr =
- // parseIndirectObjectReference(pdf, ir_index);
- // tpr.root = iorpr;
- //
- // cur_index = skipWhitespace(pdf, iorpr.next_index);
- // continue;
- // }
- //
- // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.SIZE_STR))
- // {
- // assert isWhitespace(pdf[cur_index + PDFNames.SIZE_STR.length]);
- // int size_index = skipWhitespace(pdf, cur_index +
- // PDFNames.SIZE_STR.length);
- //
- // NumberParseResult npr = parseNumberFromByteArray(pdf, size_index);
- // tpr.size = npr.number;
- // assert tpr.size > 0;
- //
- // cur_index = skipWhitespace(pdf, npr.next_index);
- // continue;
- // }
- //
- // if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.PREV_STR))
- // {
- // assert isWhitespace(pdf[cur_index + PDFNames.PREV_STR.length]);
- // int prev_index = skipWhitespace(pdf, cur_index +
- // PDFNames.PREV_STR.length);
- //
- // NumberParseResult npr = parseNumberFromByteArray(pdf, prev_index);
- // tpr.has_predecessor = true;
- // tpr.setPrev(npr.number);
- // assert tpr.getPrev() >= 0;
- // assert tpr.getPrev() < pdf.length;
- //
- // assert ByteArrayUtils.compareByteArrays(pdf, tpr.getPrev(),
- // PDFNames.XREF_STR);
- //
- // cur_index = skipWhitespace(pdf, npr.next_index);
- // continue;
- // }
- //
- // // unrecognized type
- // // skip to next delimiter
- // // TODO: this will not work with nested dicts. - already deprecated
- // while (pdf[cur_index] != PDFNames.DELIMITER_NAME) {
- // cur_index++;
- // }
- // }
-
- tpr.contents_end_index = cur_index;
- tpr.next_index = skipWhitespace(pdf, tpr.contents_end_index);
-
- assert ByteArrayUtils.compareByteArrays(pdf, tpr.next_index, PDFNames.STARTXREF_STR);
- return tpr;
- }
-
- /**
- * Parses the startxref section at pdf+index.
- *
- * @param pdf
- * The complete PDF file data.
- * @param index
- * The index of the startxref section.
- * @return Returns the retsult of the parsing operation.
- */
- public static StartXRefParseResult parseStartXRef(final byte[] pdf,
- final int index)
- {
- StartXRefParseResult spr = new StartXRefParseResult();
- spr.next_index = index;
-
- assert ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.STARTXREF_STR);
- assert isNewline(pdf, index + PDFNames.STARTXREF_STR.length);
-
- int index_of_number = skipWhitespace(pdf, index + PDFNames.STARTXREF_STR.length);
- // skipNewline(pdf, index + PDFNames.STARTXREF_STR.length);
- NumberParseResult npr = parseNumberFromByteArray(pdf, index_of_number);
- spr.xref_index = npr.number;
-
- assert isNewline(pdf, npr.next_index);
- spr.next_index = skipWhitespace(pdf, npr.next_index);
- // skipNewline(pdf, npr.next_index);
-
- assert ByteArrayUtils.compareByteArrays(pdf, spr.next_index, PDFNames.EOF_STR);
-
- assert spr.xref_index >= 0;
- assert spr.xref_index < pdf.length;
-
- // A linearized document sets the startxref value of the first page's footer
- // to 0.
- if (spr.xref_index != 0)
- {
- assert ByteArrayUtils.compareByteArrays(pdf, spr.xref_index, PDFNames.XREF_STR);
- }
-
- return spr;
- }
-
- /**
- * Parses the End Of File (EOF) marker at pdf+index.
- *
- * @param pdf
- * The PDF data.
- * @param index
- * The index where to start the parsing.
- * @return Returns the result of the parsing operation.
- */
- public static EOFParseResult parseEOF(final byte[] pdf, final int index)
- {
- EOFParseResult eofpr = new EOFParseResult();
- eofpr.start_index = index;
-
- assert ByteArrayUtils.compareByteArrays(pdf, eofpr.start_index, PDFNames.EOF_STR);
-
- eofpr.eof_end_index = eofpr.start_index + PDFNames.EOF_STR.length;
-
- // Note: The EOF marker is not necessarily terminated with a
- // newline.
-
- // perhaps explicitely determine a newline.
-
- eofpr.next_index = eofpr.eof_end_index;
-
- return eofpr;
- }
-
- public static boolean isIndirectObjectReference(final byte[] pdf,
- final int index)
- {
- IndirectObjectReferenceParseResult iorpr = new IndirectObjectReferenceParseResult();
- iorpr.ior = new IndirectObjectReference();
- iorpr.start_index = index;
-
- if (!PDFUtils.isNumeric(pdf[iorpr.start_index]))
- {
- return false;
- }
- NumberParseResult object_number_npr = parseNumberFromByteArray(pdf, iorpr.start_index);
- iorpr.ior.object_number = object_number_npr.number;
- if (iorpr.ior.object_number <= 0)
- {
- return false;
- }
-
- if (!isWhitespace(pdf[object_number_npr.next_index]))
- {
- return false;
- }
- int generation_number_index = skipWhitespace(pdf, object_number_npr.next_index);
-
- if (!PDFUtils.isNumeric(pdf[generation_number_index]))
- {
- return false;
- }
- NumberParseResult generation_number_npr = parseNumberFromByteArray(pdf, generation_number_index);
- iorpr.ior.generation_number = generation_number_npr.number;
- if (iorpr.ior.generation_number < 0)
- {
- return false;
- }
-
- if (!isWhitespace(pdf[generation_number_npr.next_index]))
- {
- return false;
- }
- int R_index = skipWhitespace(pdf, generation_number_npr.next_index);
-
- if (!ByteArrayUtils.compareByteArrays(pdf, R_index, PDFNames.REFERENCE_STR))
- {
- return false;
- }
-
- iorpr.next_index = R_index + PDFNames.REFERENCE_STR.length;
-
- return true;
- }
-
- /**
- * Parses an indirect object reference.
- *
- * @param pdf
- * The PDF data.
- * @param index
- * The index.
- * @return Returns the result of the parsing operation.
- */
- public static IndirectObjectReferenceParseResult parseIndirectObjectReference(
- final byte[] pdf, final int index)
- {
-
- assert isIndirectObjectReference(pdf, index);
-
- IndirectObjectReferenceParseResult iorpr = new IndirectObjectReferenceParseResult();
- iorpr.ior = new IndirectObjectReference();
- iorpr.start_index = index;
-
- NumberParseResult object_number_npr = parseNumberFromByteArray(pdf, iorpr.start_index);
- iorpr.ior.object_number = object_number_npr.number;
- assert iorpr.ior.object_number > 0;
-
- assert isWhitespace(pdf[object_number_npr.next_index]);
- int generation_number_index = skipWhitespace(pdf, object_number_npr.next_index);
-
- NumberParseResult generation_number_npr = parseNumberFromByteArray(pdf, generation_number_index);
- iorpr.ior.generation_number = generation_number_npr.number;
- assert iorpr.ior.generation_number >= 0;
-
- assert isWhitespace(pdf[generation_number_npr.next_index]);
- int R_index = skipWhitespace(pdf, generation_number_npr.next_index);
-
- assert ByteArrayUtils.compareByteArrays(pdf, R_index, PDFNames.REFERENCE_STR);
-
- iorpr.next_index = R_index + PDFNames.REFERENCE_STR.length;
-
- return iorpr;
- }
-
- /**
- * Parses the object header at pdf+index.
- *
- * @param pdf
- * The PDF data.
- * @param index
- * The index.
- * @return Returns the result of the parsing operation.
- */
- public static ObjectHeaderParseResult parseObjectHeader(final byte[] pdf,
- final int index)
- {
- ObjectHeaderParseResult ohpr = new ObjectHeaderParseResult();
-
- ohpr.start_index = index;
-
- NumberParseResult object_number_npr = parseNumberFromByteArray(pdf, ohpr.start_index);
- ohpr.object_number = object_number_npr.number;
- assert ohpr.object_number > 0;
-
- assert isWhitespace(pdf[object_number_npr.next_index]);
- int generation_number_index = skipWhitespace(pdf, object_number_npr.next_index);
-
- NumberParseResult generation_number_npr = parseNumberFromByteArray(pdf, generation_number_index);
- ohpr.generation_number = generation_number_npr.number;
- assert ohpr.generation_number >= 0;
-
- assert isWhitespace(pdf[generation_number_npr.next_index]);
- int obj_index = skipWhitespace(pdf, generation_number_npr.next_index);
-
- assert ByteArrayUtils.compareByteArrays(pdf, obj_index, PDFNames.OBJ_STR);
-
- // not all pdfwriters make a newline after obj...
- // assert isNewline(pdf, obj_index + PDFNames.OBJ_STR.length);
- // ohpr.next_index = skipNewline(pdf, obj_index + PDFNames.OBJ_STR.length);
- ohpr.next_index = skipWhitespace(pdf, obj_index + PDFNames.OBJ_STR.length);
-
- return ohpr;
- }
-
- public static ObjectParseResult parseObject(final byte[] pdf, final int index)
- {
- ObjectParseResult opr = new ObjectParseResult();
- opr.start_index = index;
-
- opr.header = parseObjectHeader(pdf, opr.start_index);
- opr.content_index = opr.header.next_index;
-
- int cur_index = skipWhitespace(pdf, opr.content_index);
-
- opr.object = parseUnknownObject(pdf, cur_index);
-
- cur_index = skipWhitespace(pdf, opr.object.next_index);
-
- opr.end_of_content_index = cur_index;
- assert ByteArrayUtils.compareByteArrays(pdf, opr.end_of_content_index, PDFNames.ENDOBJ_STR);
-
- cur_index = opr.end_of_content_index + PDFNames.ENDOBJ_STR.length;
-
- opr.next_index = cur_index;
- //assert isNewline(pdf, cur_index);
- //opr.next_index = skipNewline(pdf, cur_index);
-
- return opr;
- }
-
- public static ParseResult parseUnknownObject(final byte[] pdf, final int index)
- {
- if (ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.DICT_START_STR))
- {
- DictionaryParseResult dpr = parseDictionary(pdf, index);
-
- int possible_stream_index = skipWhitespace(pdf, dpr.next_index);
- if (ByteArrayUtils.compareByteArrays(pdf, possible_stream_index, PDFNames.STREAM_STR))
- {
- return parseStream(pdf, possible_stream_index, dpr);
- }
-
- return dpr;
- }
-
- if (ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.NULL_STR))
- {
- return parseNull(pdf, index);
- }
-
- if (ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.TRUE_STR) || ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.FALSE_STR))
- {
- return parseBoolean(pdf, index);
- }
-
- final byte first_byte = pdf[index];
-
- if (isNumeric(first_byte) || isSign(first_byte))
- {
-
- // try to parse a Indirect reference first - if this fails, parse a number
- if (isIndirectObjectReference(pdf, index))
- {
- return parseIndirectObjectReference(pdf, index);
- }
-
- return parseNumberFromByteArray(pdf, index);
- }
-
- ParseResult pr = null;
-
- switch (first_byte)
- {
- case PDFNames.DELIMITER_STRING_OPEN:
- pr = parseLiteralString(pdf, index);
- break;
- case PDFNames.DELIMITER_HEXSTRING_OPEN:
- pr = parseHexString(pdf, index);
- break;
- case PDFNames.DELIMITER_ARRAY_OPEN:
- pr = parseArray(pdf, index);
- break;
- case PDFNames.DELIMITER_NAME:
- pr = parseName(pdf, index);
- break;
- default:
- throw new RuntimeException("Unknown first_byte " + first_byte + "' when parsing an unknown object at index=" + index + ".");
- // assert false : "nyi or invalid char";
- }
- assert pr != null;
-
- return pr;
- }
-
- /**
- * Parses a literal string.
- *
- * <p>
- * A literal string is a string of ASCII characters enclosed by '(' and ')'.
- * Balanced pairs of '(' and ')' are allowed within the string. Unbalanced '('
- * or ')' must be escaped as '\(' or '\)'.
- * </p>
- *
- * @param pdf
- * The PDF data.
- * @param index
- * The index.
- * @return Returns the result of the parsing operation.
- */
- public static LiteralStringParseResult parseLiteralString(final byte[] pdf,
- final int index)
- {
- LiteralStringParseResult lspr = new LiteralStringParseResult();
- lspr.start_index = index;
-
- assert pdf[lspr.start_index] == PDFNames.DELIMITER_STRING_OPEN;
-
- lspr.content_start_index = lspr.start_index + 1;
-
- int cur_index = lspr.content_start_index;
- int parenthesis_stack = 0;
- for (;;)
- {
- if (pdf[cur_index] == '\\' && (pdf[cur_index + 1] == PDFNames.DELIMITER_STRING_CLOSE || pdf[cur_index + 1] == PDFNames.DELIMITER_STRING_OPEN))
- {
- cur_index += 2;
- continue;
- }
- if (pdf[cur_index] == PDFNames.DELIMITER_STRING_OPEN)
- {
- parenthesis_stack++;
- }
- if (pdf[cur_index] == PDFNames.DELIMITER_STRING_CLOSE)
- {
- assert parenthesis_stack >= 0;
-
- if (parenthesis_stack == 0)
- {
- break;
- }
-
- assert parenthesis_stack > 0;
- parenthesis_stack--;
-
- }
-
- cur_index++;
- }
-
- lspr.content_end_index = cur_index;
- assert pdf[lspr.content_end_index] == PDFNames.DELIMITER_STRING_CLOSE;
-
- lspr.next_index = lspr.content_end_index + 1;
-
- return lspr;
- }
-
- protected static boolean isHex(final byte data)
- {
- return isNumeric(data) || ('a' <= data && data <= 'f') || ('A' <= data && data <= 'f');
- }
-
- /**
- * Parses a hexadecimal string.
- *
- * @param pdf
- * The PDF data.
- * @param index
- * The index.
- * @return Returns the result of the parsing operation.
- */
- public static HexStringParseResult parseHexString(final byte[] pdf,
- final int index)
- {
- HexStringParseResult hspr = new HexStringParseResult();
- hspr.start_index = index;
-
- assert pdf[hspr.start_index] == PDFNames.DELIMITER_HEXSTRING_OPEN;
-
- hspr.content_start_index = hspr.start_index + 1;
-
- int cur_index = hspr.content_start_index;
- while (isHex(pdf[cur_index]) || isWhitespace(pdf[cur_index]))
- {
- cur_index++;
- }
-
- hspr.content_end_index = cur_index;
- assert pdf[hspr.content_end_index] == PDFNames.DELIMITER_HEXSTRING_CLOSE;
-
- hspr.next_index = hspr.content_end_index + 1;
-
- return hspr;
- }
-
- public static ArrayParseResult parseArray(final byte[] pdf, final int index)
- {
- ArrayParseResult apr = new ArrayParseResult();
- apr.start_index = index;
- assert pdf[apr.start_index] == PDFNames.DELIMITER_ARRAY_OPEN;
-
- apr.content_start_index = apr.start_index + 1;
-
- apr.elements = new ArrayList();
-
- int cur_index = skipWhitespace(pdf, apr.content_start_index);
- for (;;)
- {
- if (pdf[cur_index] == PDFNames.DELIMITER_ARRAY_CLOSE)
- {
- break;
- }
-
- ParseResult pr = parseUnknownObject(pdf, cur_index);
- apr.elements.add(pr);
-
- cur_index = skipWhitespace(pdf, pr.next_index);
- }
- assert pdf[cur_index] == PDFNames.DELIMITER_ARRAY_CLOSE;
-
- apr.content_end_index = cur_index;
- assert pdf[apr.content_end_index] == PDFNames.DELIMITER_ARRAY_CLOSE;
-
- apr.next_index = apr.content_end_index + 1;
- return apr;
- }
-
- /**
- * Parses a PDF Name.
- *
- * @param pdf
- * The PDF data.
- * @param index
- * The index.
- * @return Returns the result of this parsing operation.
- */
- public static NameParseResult parseName(final byte[] pdf, final int index)
- {
- NameParseResult npr = new NameParseResult();
- npr.start_index = index;
-
- assert pdf[npr.start_index] == PDFNames.DELIMITER_NAME;
-
- npr.name_start_index = npr.start_index + 1;
-
- assert isRegular(pdf[npr.name_start_index]);
-
- int cur_index = npr.name_start_index;
- while (isRegular(pdf[cur_index]))
- {
- cur_index++;
- }
- assert !isRegular(pdf[cur_index]);
-
- npr.next_index = cur_index;
-
- return npr;
- }
-
- public static DictionaryParseResult parseDictionary(final byte[] pdf,
- final int index)
- {
- DictionaryParseResult dpr = new DictionaryParseResult();
- dpr.start_index = index;
-
- assert ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.DICT_START_STR);
-
- dpr.content_start_index = dpr.start_index + PDFNames.DICT_START_STR.length;
-
- dpr.names = new ArrayList();
- dpr.values = new ArrayList();
-
- int cur_index = skipWhitespace(pdf, dpr.content_start_index);
- for (;;)
- {
- if (ByteArrayUtils.compareByteArrays(pdf, cur_index, PDFNames.DICT_END_STR))
- {
- break;
- }
-
- NameParseResult npr = parseName(pdf, cur_index);
- dpr.names.add(npr);
-
- cur_index = npr.next_index;
- cur_index = skipWhitespace(pdf, cur_index);
-
- ParseResult pr = parseUnknownObject(pdf, cur_index);
- dpr.values.add(pr);
-
- cur_index = pr.next_index;
- cur_index = skipWhitespace(pdf, cur_index);
- }
-
- dpr.content_end_index = cur_index;
- assert ByteArrayUtils.compareByteArrays(pdf, dpr.content_end_index, PDFNames.DICT_END_STR);
- dpr.next_index = dpr.content_end_index + PDFNames.DICT_END_STR.length;
-
- return dpr;
- }
-
- /**
- * Parses a stream.
- *
- * @param pdf
- * The PDF data.
- * @param index
- * The index.
- * @param dpr
- * The DictionaryParseResult of the stream's dictionary. This
- * dictionary must precede the stream keyword. Usually this is
- * provided in the stream object's dictionary via the /Length field.
- * @return Returns the result of this parsing operation.
- */
- public static StreamParseResult parseStream(final byte[] pdf,
- final int index, final DictionaryParseResult dpr)
- {
- StreamParseResult spr = new StreamParseResult();
- spr.stream_dictionary = dpr;
- spr.start_index = spr.stream_dictionary.start_index;
- spr.stream_start_index = index;
- assert ByteArrayUtils.compareByteArrays(pdf, index, PDFNames.STREAM_STR);
-
- // assert that the provided dictionary really belongs to this stream
- assert spr.stream_start_index == skipWhitespace(pdf, spr.stream_dictionary.next_index);
-
- // see PDF Spec 1.4 chapter 3.2.7
- assert pdf[spr.stream_start_index + PDFNames.STREAM_STR.length] == PDFNames.WHITESPACE_LF || (pdf[spr.stream_start_index + PDFNames.STREAM_STR.length] == PDFNames.WHITESPACE_CR && pdf[spr.stream_start_index + PDFNames.STREAM_STR.length + 1] == PDFNames.WHITESPACE_LF);
- spr.content_start_index = skipNewline(pdf, spr.stream_start_index + PDFNames.STREAM_STR.length);
-
- int length = -1;
- for (int i = 0; i < spr.stream_dictionary.names.size(); i++)
- {
- NameParseResult name = (NameParseResult) spr.stream_dictionary.names.get(i);
- if (ByteArrayUtils.compareByteArrays(pdf, name.name_start_index, PDFNames.LENGTH_STR))
- {
- ParseResult pr = (ParseResult) spr.stream_dictionary.values.get(i);
- NumberParseResult npr = null;
- if (pr instanceof IndirectObjectReferenceParseResult)
- {
- log.debug("An object stream with indirect length - cannot parse this instantly - parse later again.");
- spr.content_end_index = -1;
- spr.next_index = -1;
- return spr;
-
- }
- else
- {
- npr = (NumberParseResult) pr;
- }
- assert npr != null;
-
- length = npr.number;
- break;
- }
-
- }
- assert length >= 0;
-
- spr.content_end_index = spr.content_start_index + length;
-
- int endstr_index = spr.content_end_index;
- if (isNewline(pdf, endstr_index))
- {
- endstr_index = skipWhitespace(pdf, endstr_index);
- }
- assert ByteArrayUtils.compareByteArrays(pdf, endstr_index, PDFNames.ENDSTREAM_STR);
-
- spr.next_index = endstr_index + PDFNames.ENDSTREAM_STR.length;
-
- return spr;
- }
-
- public static NullParseResult parseNull(final byte[] pdf, final int index)
- {
- NullParseResult npr = new NullParseResult();
- npr.start_index = index;
-
- assert ByteArrayUtils.compareByteArrays(pdf, npr.start_index, PDFNames.NULL_STR);
-
- npr.next_index = npr.start_index + PDFNames.NULL_STR.length;
-
- return npr;
- }
-
- public static int getObjectOffsetFromXRefByIndirectObjectReference(
- XRefSectionParseResult xpr, IndirectObjectReference ior)
- {
- Iterator it = xpr.xref_subsections.iterator();
- while (it.hasNext())
- {
- XRefSubSectionParseResult section = (XRefSubSectionParseResult) it.next();
-
- for (int i = 0; i < section.xref_lines.size(); i++)
- {
- if (section.start_obj_number + i == ior.object_number)
- {
- XRefLineParseResult lpr = (XRefLineParseResult) section.xref_lines.get(i);
- return lpr.object_offset;
- }
- }
- }
-
- return -1;
- }
-
- public static HeaderParseResult parseHeader(final byte[] pdf, final int index)
- {
- HeaderParseResult hpr = new HeaderParseResult();
- hpr.start_index = index;
-
- assert pdf[hpr.start_index] == PDFNames.COMMENT;
-
- assert ByteArrayUtils.compareByteArrays(pdf, hpr.start_index + 1, PDFNames.PDF_VERSION_STR);
-
- hpr.major_index = hpr.start_index + 1 + PDFNames.PDF_VERSION_STR.length;
-
- IntegerParseResult major_ipr = parseUnsignedInteger(pdf, hpr.major_index);
- hpr.major = major_ipr.number;
- assert hpr.major >= 1;
-
- assert pdf[major_ipr.next_index] == PDFNames.PDF_VERSION_SEPARATOR;
-
- hpr.minor_index = major_ipr.next_index + 1;
-
- IntegerParseResult minor_ipr = parseUnsignedInteger(pdf, hpr.minor_index);
- hpr.minor = minor_ipr.number;
- assert hpr.minor >= 0;
-
- assert isWhitespace(pdf[minor_ipr.next_index]);
- hpr.binary_characters_index = skipWhitespace(pdf, minor_ipr.next_index);
-
- assert pdf[hpr.binary_characters_index] == PDFNames.COMMENT;
-
- hpr.next_index = skipToNewline(pdf, hpr.binary_characters_index);
- return hpr;
- }
-
- /**
- * Parses a PDF footer.
- *
- * <p>
- * A PDF footer starts with the xref, followed by the trailer, the startxref
- * and the EOF marker.
- * </p>
- *
- * @param pdf
- * The PDF data.
- * @param index
- * The index.
- * @return Returns the result of the parsing operation.
- *
- * @see FooterParseResult
- */
- public static FooterParseResult parseFooter(final byte[] pdf, final int index)
- {
- FooterParseResult fpr = new FooterParseResult();
- fpr.start_index = index;
-
- fpr.xpr = PDFUtils.parseXRefSection(pdf, fpr.start_index);
-
- fpr.tpr = PDFUtils.parseTrailer(pdf, fpr.xpr.next_index);
-
- fpr.sxpr = PDFUtils.parseStartXRef(pdf, fpr.tpr.next_index);
-
- fpr.eofpr = PDFUtils.parseEOF(pdf, fpr.sxpr.next_index);
-
- fpr.next_index = fpr.eofpr.next_index;
- return fpr;
- }
-
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ArrayParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ArrayParseResult.java deleted file mode 100644 index 9d0a745..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ArrayParseResult.java +++ /dev/null @@ -1,42 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: ArrayParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-import java.util.List;
-
-/**
- * The result of parsing a hex string.
- *
- * @see at.knowcenter.wag.exactparser.parsing.results.LiteralStringParseResult
- *
- * @author wprinz
- */
-public class ArrayParseResult extends ContainerParseResult {
-
- public List elements = null;
-
-
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/BooleanParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/BooleanParseResult.java deleted file mode 100644 index e0bc276..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/BooleanParseResult.java +++ /dev/null @@ -1,38 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: BooleanParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-/**
- * Parse result of parsing a boolean value.
- *
- * @author wprinz
- */
-public class BooleanParseResult extends ParseResult
-{
-
- public boolean value = false;
-
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ContainerParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ContainerParseResult.java deleted file mode 100644 index 1974ade..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ContainerParseResult.java +++ /dev/null @@ -1,45 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: ContainerParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-/**
- * Base class of container parse results.
- *
- * <p>
- * Containers are types that include some content.
- * E.g. literal strings include string data as content,
- * arrays include elements as content etc.
- * </p>
- *
- * @author wprinz
- */
-public class ContainerParseResult extends ParseResult {
-
- public int content_start_index = -1;
- public int content_end_index = -1;
-
-
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/DictionaryParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/DictionaryParseResult.java deleted file mode 100644 index 47101e0..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/DictionaryParseResult.java +++ /dev/null @@ -1,41 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: DictionaryParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-import java.util.List;
-
-/**
- * The result of parsing a dictionary.
- *
- * @author wprinz
- */
-public class DictionaryParseResult extends ContainerParseResult
-{
-
- public List names = null;
-
- public List values = null;
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/EOFParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/EOFParseResult.java deleted file mode 100644 index dea1d22..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/EOFParseResult.java +++ /dev/null @@ -1,47 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: EOFParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-/**
- * The result of parsing the End Of File marker.
- *
- * @author wprinz
- */
-public class EOFParseResult extends ParseResult
-{
-
- /**
- * The index of the byte after the EOF marker.
- *
- * <p>
- * A newline is not necessary after the EOF marker, but if it is present it will be considered
- * as part of it.
- * So eof_end_index marks this newline.
- * If eof_end_index == next_index, then no new line is present.
- * </p>
- */
- public int eof_end_index = -1;
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/FooterParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/FooterParseResult.java deleted file mode 100644 index 2a52aa6..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/FooterParseResult.java +++ /dev/null @@ -1,53 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: FooterParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-
-/**
- * The result of parsing a PDF footer block.
- *
- * <p>
- * A PDF footer block starts with the xref table followed by the trailer, the
- * startxref and finally the EOF marker. Usually the footer should be at the end
- * of the file. All object offsets in the footer's xref table should be before
- * the footer itself. Nevertheless, there are PDF Writers (e.g. Microsoft Word)
- * that put the footer at the beginning of the document so that all indirect
- * objects are after the EOF marker.
- * </p>
- *
- * @author wprinz
- */
-public class FooterParseResult extends ParseResult
-{
-
- public StartXRefParseResult sxpr = null;
-
- public EOFParseResult eofpr = null;
-
- public XRefSectionParseResult xpr = null;
-
- public TrailerParseResult tpr = null;
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HeaderParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HeaderParseResult.java deleted file mode 100644 index 3befda3..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HeaderParseResult.java +++ /dev/null @@ -1,48 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: HeaderParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-/**
- * The result of parsing the PDF header.
- *
- * <p>
- * The header contains the PDF version and is usually followed by some binary
- * characers.
- * </p>
- *
- * @author wprinz
- */
-public class HeaderParseResult extends ParseResult
-{
- public int major_index = -1;
- public int minor_index = -1;
-
- public int major = -1;
- public int minor = -1;
-
- public int binary_characters_index = -1;
-
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HexStringParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HexStringParseResult.java deleted file mode 100644 index 27dbf70..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/HexStringParseResult.java +++ /dev/null @@ -1,36 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: HexStringParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-/**
- * The result of parsing a hex string.
- *
- * @see at.knowcenter.wag.exactparser.parsing.results.LiteralStringParseResult
- *
- * @author wprinz
- */
-public class HexStringParseResult extends ContainerParseResult {
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IndirectObjectReferenceParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IndirectObjectReferenceParseResult.java deleted file mode 100644 index 797678e..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IndirectObjectReferenceParseResult.java +++ /dev/null @@ -1,44 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: IndirectObjectReferenceParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-import at.knowcenter.wag.exactparser.parsing.IndirectObjectReference;
-
-/**
- * The ParseResult of parsing an indirect object reference.
- *
- * @author wprinz
- */
-public class IndirectObjectReferenceParseResult extends ParseResult {
-
- public IndirectObjectReference ior;
-
- //@Override
- public String toString()
- {
- return ior.toString() + " R";
- }
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IntegerParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IntegerParseResult.java deleted file mode 100644 index 48ea7d2..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/IntegerParseResult.java +++ /dev/null @@ -1,36 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: IntegerParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-/**
- * @author wprinz
- */
-public class IntegerParseResult extends ParseResult
-{
-
- public int number;
-
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/LiteralStringParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/LiteralStringParseResult.java deleted file mode 100644 index 60fc277..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/LiteralStringParseResult.java +++ /dev/null @@ -1,37 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: LiteralStringParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-/**
- * The result of parsing a simple string (ASCII string).
- *
- * @see at.knowcenter.wag.exactparser.parsing.results.HexStringParseResult
- *
- * @author wprinz
- */
-public class LiteralStringParseResult extends ContainerParseResult {
-
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NameParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NameParseResult.java deleted file mode 100644 index e564285..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NameParseResult.java +++ /dev/null @@ -1,35 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: NameParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-/**
- * @author wprinz
- */
-public class NameParseResult extends ParseResult {
-
- public int name_start_index = -1;
-
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NullParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NullParseResult.java deleted file mode 100644 index 49d9dfb..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NullParseResult.java +++ /dev/null @@ -1,34 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: NullParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-/**
- * The result of parsing a "null".
- *
- * @author wprinz
- */
-public class NullParseResult extends ParseResult {
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NumberParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NumberParseResult.java deleted file mode 100644 index e88596c..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/NumberParseResult.java +++ /dev/null @@ -1,41 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: NumberParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-/**
- * The ParseResult of parsing an integer number.
- *
- * @author wprinz
- */
-public class NumberParseResult extends ParseResult {
- /**
- * The (signed) integer number.
- */
- public int number;
-
- // TODO: make better
- public float floating;
-}
\ No newline at end of file diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectHeaderParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectHeaderParseResult.java deleted file mode 100644 index 0729108..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectHeaderParseResult.java +++ /dev/null @@ -1,51 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: ObjectHeaderParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-/**
- * The ParseResult of a parsing an object header.
- *
- * <p>
- * Note that this information regards only the object header and not the
- * contents of the object itself. (meaning: next points to the contents and not
- * to the end of the whole object)
- * </p>
- *
- * @author Administrator
- */
-public class ObjectHeaderParseResult extends ParseResult {
-
- /**
- * The object's object number.
- */
- public int object_number = -1;
-
- /**
- * The object's generation number.
- */
- public int generation_number = -1;
-
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectParseResult.java deleted file mode 100644 index 2fdde34..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ObjectParseResult.java +++ /dev/null @@ -1,50 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: ObjectParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-
-/**
- * The ParseResult of parsing an indirect object.
- * @author wprinz
- */
-public class ObjectParseResult extends ParseResult {
-
- public int content_index = -1;
- public int end_of_content_index = -1;
-
- public ObjectHeaderParseResult header = null;
-
-/* enum ObjectType
- {
- UNKNOWN_TO_PARSER,
- OBJ_DICTIONARY
- };
-
- public ObjectType object_type = ObjectType.UNKNOWN_TO_PARSER;
- */
- public ParseResult object = null;
-
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ParseResult.java deleted file mode 100644 index 12c4b19..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/ParseResult.java +++ /dev/null @@ -1,50 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: ParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-/**
- * Base class of all parse results.
- *
- * @author wprinz
- */
-public class ParseResult {
-
- /**
- * The start index, where the parser started its work and where the parsed
- * entity begins.
- */
- public int start_index = -1;
-
- /**
- * The index of the next entity following the currently parsed entity.
- *
- * <p>
- * This is the index of the first byte not belonging to this entity anymore.
- * </p>
- */
- public int next_index = -1;
-
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StartXRefParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StartXRefParseResult.java deleted file mode 100644 index a1f6792..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StartXRefParseResult.java +++ /dev/null @@ -1,36 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: StartXRefParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-
-/**
- * The ParseResult of parsing a startxref entry.
- * @author wprinz
- */
-public class StartXRefParseResult extends ParseResult {
-
- public int xref_index;
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StreamParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StreamParseResult.java deleted file mode 100644 index 16da12a..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/StreamParseResult.java +++ /dev/null @@ -1,41 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: StreamParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-
-/**
- * The result of parsing a hex string.
- *
- * @see at.knowcenter.wag.exactparser.parsing.results.LiteralStringParseResult
- *
- * @author wprinz
- */
-public class StreamParseResult extends ContainerParseResult {
-
- public DictionaryParseResult stream_dictionary = null;
-
- public int stream_start_index = -1;
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/TrailerParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/TrailerParseResult.java deleted file mode 100644 index 4589ee8..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/TrailerParseResult.java +++ /dev/null @@ -1,84 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: TrailerParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-/**
- * The ParseResult of parsing the trailer.
- *
- * @author wprinz
- */
-public class TrailerParseResult extends ParseResult {
-
- public int contents_index = -1;
- public int contents_end_index = -1;
-
- public DictionaryParseResult dpr = null;
-
- public IndirectObjectReferenceParseResult info;
-
- public IndirectObjectReferenceParseResult root;
-
- /**
- * The content of the "/Size" entry.
- */
- public int size;
-
- /**
- * Tells, if this PDF footer has a predecessor (as specified by
- * the /Prev entry).
- */
- public boolean has_predecessor = false;
-
- /**
- * The index of the predecessor.
- *
- * <p>
- * Only valid if has_predecessor is true.
- * </p>
- * <p>
- * Use getPrev and setPrev to access this member variable.
- * </p>
- *
- * @see #getPrev()
- * @see #setPrev(int)
- */
- private int prev = -1;
-
- public int getPrev() {
- assert has_predecessor;
- return prev;
- }
-
- public void setPrev(int prev) {
- assert has_predecessor : "Set has_predecessor to true first.";
- this.prev = prev;
- }
-
-
-
-
-
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefLineParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefLineParseResult.java deleted file mode 100644 index 8039153..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefLineParseResult.java +++ /dev/null @@ -1,40 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: XRefLineParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-/**
- * The ParseResult of parsing a single xref line.
- *
- * @author wprinz
- */
-public class XRefLineParseResult extends ParseResult {
-
- public int object_offset;
-
- public int generation_number;
-
- public byte object_usage;
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSectionParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSectionParseResult.java deleted file mode 100644 index eedea81..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSectionParseResult.java +++ /dev/null @@ -1,66 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: XRefSectionParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * The ParseResult of an xref parsing operation.
- *
- * <p>
- * This contains one whole xref table section. An xref section starts with the
- * word xref and contains one or more xref sub-sections.
- * </p>
- * <p>
- * Due to Incremental Updates, there may be more than one xref section in a
- * document. All xref section together are called the xref table. Using this
- * aggregated xref table, an application has the full access to all indirect
- * objects in the document.
- * </p>
- * <p>
- * In many PDF libraries and applications one xref section is also informally
- * called xref table.
- * </p>
- *
- * @author wprinz
- */
-public class XRefSectionParseResult extends ParseResult
-{
-
- public List xref_subsections = new ArrayList();
-
- /**
- * Appends another cross-reference (xref) sub-section to the xref table.
- *
- * @param xref_section
- * The xref section to be appended.
- */
- public void appendXRefSubSection(XRefSubSectionParseResult xref_section)
- {
- xref_subsections.add(xref_section);
- }
-}
diff --git a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSubSectionParseResult.java b/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSubSectionParseResult.java deleted file mode 100644 index ec19004..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/parsing/results/XRefSubSectionParseResult.java +++ /dev/null @@ -1,59 +0,0 @@ -/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: XRefSubSectionParseResult.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser.parsing.results;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Contains an xref sub-section.
- *
- * <p>
- * An xref sub-section is an ordered list of xref lines. The object numbers of the
- * corresponding objects are numbered incrementally.
- * </p>
- * <p>
- * xref sections are important in Incremental Updates because they allow to
- * specify explicitely which objects (object numbers) are contained in the xref.
- * </p>
- *
- * @author wprinz
- */
-public class XRefSubSectionParseResult extends ParseResult {
-
- public int start_obj_number;
-
- public int num_objects;
-
- public List xref_lines = new ArrayList();
-
- public void appendXRefLine(XRefLineParseResult xref_line) {
- assert xref_lines.size() < num_objects;
-
- xref_lines.add(xref_line);
- }
-
-}
|