aboutsummaryrefslogtreecommitdiff
path: root/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java
diff options
context:
space:
mode:
Diffstat (limited to 'pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java')
-rw-r--r--pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java184
1 files changed, 184 insertions, 0 deletions
diff --git a/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java
new file mode 100644
index 0000000..0ee5863
--- /dev/null
+++ b/pdf-as-lib/src/main/java/at/knowcenter/wag/exactparser/parsing/PDFNames.java
@@ -0,0 +1,184 @@
+/**
+ * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
+ * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
+ * joint initiative of the Federal Chancellery Austria and Graz University of
+ * Technology.
+ *
+ * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
+ * the European Commission - subsequent versions of the EUPL (the "Licence");
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ * http://www.osor.eu/eupl/
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
+ * limitations under the Licence.
+ *
+ * This product combines work with different licenses. See the "NOTICE" text
+ * file for details on the various modules and licenses.
+ * The "NOTICE" text file is part of the distribution. Any derivative works
+ * that you distribute must include a readable copy of the "NOTICE" text file.
+ *
+ * $Id: PDFNames.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
+ */
+package at.knowcenter.wag.exactparser.parsing;
+
+/**
+ * Abstract class that contains several frequently used PDF constants.
+ *
+ * <p>
+ * The PDF specification partitions the character set (ASCII) into three groups:
+ * </p>
+ * <ul>
+ * <li>Whitespace characters (space, tab, etc., but also newline and carriage
+ * return) used to separate tokens. Unless otherwise specified a group of
+ * consecutive whitespace characters behaves like a single whitespace character.</li>
+ * <li>Delimiter characters ('(', '<', etc., but also '/', which precedes the
+ * PDF Key Names in dictionaries) that are used to encircle semantic groups.
+ * <li>Regular characters are per definition the rest characters that are
+ * neither whitespaces nor delimiters.</li>
+ * </ul>
+ * <p>
+ * Newlines consist per default of CR and LF, but also LF and even CR alone are
+ * allowed. It seems that all variations of newlines may exist within a single
+ * document.
+ * </p>
+ *
+ * @author wprinz
+ */
+public abstract class PDFNames
+{
+
+ /**
+ * The standard encoding of PDF tokens and names.
+ *
+ * <p>
+ * PDF is usually an 8 bit format. Binary data etc. can be saves just as it
+ * is. Nevertheless all PDF tokens ('xref', 'obj', etc.) and PDF Names
+ * ('/Size', '/Pages', '/Type', etc.) must be in 7 bit ASCII US encoding.
+ * </p>
+ * <p>
+ * Therefor, whenever using Java Strings to convert e.g. numbers to such PDF
+ * tokens use this encoding constant.
+ * </p>
+ * <p>
+ * The same applies for PDF token/name byte arrays that are retransfromed to
+ * Java Strings.
+ * </p>
+ */
+ public static final String PDF_STANDARD_ENCODING = "US-ASCII";
+
+ // Whitespace characters
+
+ // TABLE 3.1 White-space characters
+ // DECIMAL HEXADECIMAL OCTAL NAME
+ // 0 00 000 Null (NUL)
+ // 9 09 011 Tab (HT)
+ // 10 0A 012 Line feed (LF)
+ // 12 0C 014 Form feed (FF)
+ // 13 0D 015 Carriage return
+ // 32 20 040 Space (SP)
+
+ public static final byte WHITESPACE_NUL = 0x00;
+
+ public static final byte WHITESPACE_HT = 0x09;
+
+ public static final byte WHITESPACE_LF = 0x0A;
+
+ public static final byte WHITESPACE_FF = 0x0C;
+
+ public static final byte WHITESPACE_CR = 0x0D;
+
+ public static final byte WHITESPACE_SP = 0x20;
+
+ public static final byte[] WHITESPACE_CHARACTERS = { WHITESPACE_NUL,
+ WHITESPACE_HT, WHITESPACE_LF, WHITESPACE_FF, WHITESPACE_CR, WHITESPACE_SP };
+
+ // comment character
+
+ public static final byte COMMENT = '%';
+
+ // PDF-version
+
+ public static final byte[] PDF_VERSION_STR = { 'P', 'D', 'F', '-' };
+
+ public static final byte PDF_VERSION_SEPARATOR = '.';
+
+ // delimiter characters
+
+ public static final byte DELIMITER_STRING_OPEN = '(';
+
+ public static final byte DELIMITER_STRING_CLOSE = ')';
+
+ public static final byte DELIMITER_HEXSTRING_OPEN = '<';
+
+ public static final byte DELIMITER_HEXSTRING_CLOSE = '>';
+
+ public static final byte DELIMITER_ARRAY_OPEN = '[';
+
+ public static final byte DELIMITER_ARRAY_CLOSE = ']';
+
+ public static final byte DELIMITER_CURLY_OPEN = '{';
+
+ public static final byte DELIMITER_CURLY_CLOSE = '}';
+
+ public static final byte DELIMITER_NAME = '/';
+
+ public static final byte[] DELIMITER_CHARACTERS = { DELIMITER_STRING_OPEN,
+ DELIMITER_STRING_CLOSE, DELIMITER_HEXSTRING_OPEN,
+ DELIMITER_HEXSTRING_CLOSE, DELIMITER_ARRAY_OPEN, DELIMITER_ARRAY_CLOSE,
+ DELIMITER_CURLY_OPEN, DELIMITER_CURLY_CLOSE, DELIMITER_NAME };
+
+ // Footer
+
+ public static final byte[] XREF_STR = { 'x', 'r', 'e', 'f' };
+
+ public static final byte[] TRAILER_STR = { 't', 'r', 'a', 'i', 'l', 'e', 'r' };
+
+ public static final byte[] STARTXREF_STR = { 's', 't', 'a', 'r', 't', 'x',
+ 'r', 'e', 'f' };
+
+ public static final byte[] EOF_STR = { '%', '%', 'E', 'O', 'F' };
+
+ // objects
+
+ public static final byte[] OBJ_STR = { 'o', 'b', 'j' };
+
+ public static final byte[] ENDOBJ_STR = { 'e', 'n', 'd', 'o', 'b', 'j' };
+
+ public static final byte[] DICT_START_STR = { DELIMITER_HEXSTRING_OPEN,
+ DELIMITER_HEXSTRING_OPEN };
+
+ public static final byte[] DICT_END_STR = { DELIMITER_HEXSTRING_CLOSE,
+ DELIMITER_HEXSTRING_CLOSE };
+
+ public static final byte[] STREAM_STR = { 's', 't', 'r', 'e', 'a', 'm' };
+
+ public static final byte[] ENDSTREAM_STR = { 'e', 'n', 'd', 's', 't', 'r',
+ 'e', 'a', 'm' };
+
+ public static final byte[] NULL_STR = { 'n', 'u', 'l', 'l' };
+
+ public static final byte[] TRUE_STR = { 't', 'r', 'u', 'e' };
+
+ public static final byte[] FALSE_STR = { 'f', 'a', 'l', 's', 'e' };
+
+ // indirect object references
+
+ public static final byte[] REFERENCE_STR = { 'R' };
+
+ // Dictionary keys
+
+ public static final byte[] SIZE_STR = { 'S', 'i', 'z', 'e' };
+
+ public static final byte[] PREV_STR = { 'P', 'r', 'e', 'v' };
+
+ public static final byte[] ROOT_STR = { 'R', 'o', 'o', 't' };
+
+ public static final byte[] INFO_STR = { 'I', 'n', 'f', 'o' };
+
+ public static final byte[] LENGTH_STR = { 'L', 'e', 'n', 'g', 't', 'h' };
+
+}