From 6025b6016517c6d898d8957d1d7e03ba71431912 Mon Sep 17 00:00:00 2001 From: tknall Date: Fri, 1 Dec 2006 12:20:24 +0000 Subject: Initial import of release 2.2. git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@4 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../java/com/lowagie/text/pdf/PRTokeniser.java | 585 +++++++++++++++++++++ 1 file changed, 585 insertions(+) create mode 100644 src/main/java/com/lowagie/text/pdf/PRTokeniser.java (limited to 'src/main/java/com/lowagie/text/pdf/PRTokeniser.java') diff --git a/src/main/java/com/lowagie/text/pdf/PRTokeniser.java b/src/main/java/com/lowagie/text/pdf/PRTokeniser.java new file mode 100644 index 0000000..9aaa881 --- /dev/null +++ b/src/main/java/com/lowagie/text/pdf/PRTokeniser.java @@ -0,0 +1,585 @@ +/* + * $Id: PRTokeniser.java,v 1.51 2005/12/22 18:29:21 psoares33 Exp $ + * + * Copyright 2001, 2002 by Paulo Soares. + * + * The contents of this file are subject to the Mozilla Public License Version 1.1 + * (the "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the License. + * + * The Original Code is 'iText, a free JAVA-PDF library'. + * + * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by + * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie. + * All Rights Reserved. + * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer + * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved. + * + * Contributor(s): all the names of the contributors are added in the source code + * where applicable. + * + * Alternatively, the contents of this file may be used under the terms of the + * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the + * provisions of LGPL are applicable instead of those above. If you wish to + * allow use of your version of this file only under the terms of the LGPL + * License and not to allow others to use your version of this file under + * the MPL, indicate your decision by deleting the provisions above and + * replace them with the notice and other provisions required by the LGPL. + * If you do not delete the provisions above, a recipient may use your version + * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE. + * + * This library is free software; you can redistribute it and/or modify it + * under the terms of the MPL as stated above or under the terms of the GNU + * Library General Public License as published by the Free Software Foundation; + * either version 2 of the License, or any later version. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more + * details. + * + * If you didn't download this code from the following link, you should check if + * you aren't using an obsolete version: + * http://www.lowagie.com/iText/ + */ + +package com.lowagie.text.pdf; + +import java.io.*; +/** + * + * @author Paulo Soares (psoares@consiste.pt) + */ +public class PRTokeniser { + + public static final int TK_NUMBER = 1; + public static final int TK_STRING = 2; + public static final int TK_NAME = 3; + public static final int TK_COMMENT = 4; + public static final int TK_START_ARRAY = 5; + public static final int TK_END_ARRAY = 6; + public static final int TK_START_DIC = 7; + public static final int TK_END_DIC = 8; + public static final int TK_REF = 9; + public static final int TK_OTHER = 10; + public static final boolean delims[] = { + true, true, false, false, false, false, false, false, false, false, + true, true, false, true, true, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, false, true, false, false, false, false, true, false, + false, true, true, false, false, false, false, false, true, false, + false, false, false, false, false, false, false, false, false, false, + false, true, false, true, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, true, false, true, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false}; + + static final String EMPTY = ""; + + + protected RandomAccessFileOrArray file; + protected int type; + protected String stringValue; + protected int reference; + protected int generation; + protected boolean hexString; + + private static final int LINE_SEGMENT_SIZE = 256; + + public PRTokeniser(String filename) throws IOException { + file = new RandomAccessFileOrArray(filename); + } + + public PRTokeniser(byte pdfIn[]) { + file = new RandomAccessFileOrArray(pdfIn); + } + + public PRTokeniser(RandomAccessFileOrArray file) { + this.file = file; + } + + public void seek(int pos) throws IOException { + file.seek(pos); + } + + public int getFilePointer() throws IOException { + return file.getFilePointer(); + } + + public void close() throws IOException { + file.close(); + } + + public int length() throws IOException { + return file.length(); + } + + public int read() throws IOException { + return file.read(); + } + + public RandomAccessFileOrArray getSafeFile() { + return new RandomAccessFileOrArray(file); + } + + public RandomAccessFileOrArray getFile() { + return file; + } + + public String readString(int size) throws IOException { + StringBuffer buf = new StringBuffer(); + int ch; + while ((size--) > 0) { + ch = file.read(); + if (ch == -1) + break; + buf.append((char)ch); + } + return buf.toString(); + } + + public static final boolean isWhitespace(int ch) { + return (ch == 0 || ch == 9 || ch == 10 || ch == 12 || ch == 13 || ch == 32); + } + + public static final boolean isDelimiter(int ch) { + return (ch == '(' || ch == ')' || ch == '<' || ch == '>' || ch == '[' || ch == ']' || ch == '/' || ch == '%'); + } + + public static final boolean isDelimiterWhitespace(int ch) { + return delims[ch + 1]; + } + + public int getTokenType() { + return type; + } + + public String getStringValue() { + return stringValue; + } + + public int getReference() { + return reference; + } + + public int getGeneration() { + return generation; + } + + public void backOnePosition(int ch) throws IOException { + if (ch != -1) + file.pushBack((byte)ch); + } + + public void throwError(String error) throws IOException { + throw new IOException(error + " at file pointer " + file.getFilePointer()); + } + + public char checkPdfHeader() throws IOException { + file.setStartOffset(0); + String str = readString(1024); + int idx = str.indexOf("%PDF-1."); + if (idx < 0) + throw new IOException("PDF header signature not found."); + file.setStartOffset(idx); + return str.charAt(idx + 7); + } + + public void checkFdfHeader() throws IOException { + file.setStartOffset(0); + String str = readString(1024); + int idx = str.indexOf("%FDF-1.2"); + if (idx < 0) + throw new IOException("FDF header signature not found."); + file.setStartOffset(idx); + } + + public int getStartxref() throws IOException { + int size = Math.min(1024, file.length()); + int pos = file.length() - size; + file.seek(pos); + String str = readString(1024); + int idx = str.lastIndexOf("startxref"); + if (idx < 0) + throw new IOException("PDF startxref not found."); + return pos + idx; + } + + public static int getHex(int v) { + if (v >= '0' && v <= '9') + return v - '0'; + if (v >= 'A' && v <= 'F') + return v - 'A' + 10; + if (v >= 'a' && v <= 'f') + return v - 'a' + 10; + return -1; + } + + public void nextValidToken() throws IOException { + int level = 0; + String n1 = null; + String n2 = null; + int ptr = 0; + while (nextToken()) { + if (type == TK_COMMENT) + continue; + switch (level) { + case 0: + { + if (type != TK_NUMBER) + return; + ptr = file.getFilePointer(); + n1 = stringValue; + ++level; + break; + } + case 1: + { + if (type != TK_NUMBER) { + file.seek(ptr); + type = TK_NUMBER; + stringValue = n1; + return; + } + n2 = stringValue; + ++level; + break; + } + default: + { + if (type != TK_OTHER || !stringValue.equals("R")) { + file.seek(ptr); + type = TK_NUMBER; + stringValue = n1; + return; + } + type = TK_REF; + reference = Integer.parseInt(n1); + generation = Integer.parseInt(n2); + return; + } + } + } + throwError("Unexpected end of file"); + } + + public boolean nextToken() throws IOException { + StringBuffer outBuf = null; + stringValue = EMPTY; + int ch = 0; + do { + ch = file.read(); + } while (ch != -1 && isWhitespace(ch)); + if (ch == -1) + return false; + switch (ch) { + case '[': + type = TK_START_ARRAY; + break; + case ']': + type = TK_END_ARRAY; + break; + case '/': + { + outBuf = new StringBuffer(); + type = TK_NAME; + while (true) { + ch = file.read(); + if (delims[ch + 1]) + break; + if (ch == '#') { + ch = (getHex(file.read()) << 4) + getHex(file.read()); + } + outBuf.append((char)ch); + } + backOnePosition(ch); + break; + } + case '>': + ch = file.read(); + if (ch != '>') + throwError("'>' not expected"); + type = TK_END_DIC; + break; + case '<': + { + int v1 = file.read(); + if (v1 == '<') { + type = TK_START_DIC; + break; + } + outBuf = new StringBuffer(); + type = TK_STRING; + hexString = true; + int v2 = 0; + while (true) { + while (isWhitespace(v1)) + v1 = file.read(); + if (v1 == '>') + break; + v1 = getHex(v1); + if (v1 < 0) + break; + v2 = file.read(); + while (isWhitespace(v2)) + v2 = file.read(); + if (v2 == '>') { + ch = v1 << 4; + outBuf.append((char)ch); + break; + } + v2 = getHex(v2); + if (v2 < 0) + break; + ch = (v1 << 4) + v2; + outBuf.append((char)ch); + v1 = file.read(); + } + if (v1 < 0 || v2 < 0) + throwError("Error reading string"); + break; + } + case '%': + type = TK_COMMENT; + do { + ch = file.read(); + } while (ch != -1 && ch != '\r' && ch != '\n'); + break; + case '(': + { + outBuf = new StringBuffer(); + type = TK_STRING; + hexString = false; + int nesting = 0; + while (true) { + ch = file.read(); + if (ch == -1) + break; + if (ch == '(') { + ++nesting; + } + else if (ch == ')') { + --nesting; + } + else if (ch == '\\') { + boolean lineBreak = false; + ch = file.read(); + switch (ch) { + case 'n': + ch = '\n'; + break; + case 'r': + ch = '\r'; + break; + case 't': + ch = '\t'; + break; + case 'b': + ch = '\b'; + break; + case 'f': + ch = '\f'; + break; + case '(': + case ')': + case '\\': + break; + case '\r': + lineBreak = true; + ch = file.read(); + if (ch != '\n') + backOnePosition(ch); + break; + case '\n': + lineBreak = true; + break; + default: + { + if (ch < '0' || ch > '7') { + break; + } + int octal = ch - '0'; + ch = file.read(); + if (ch < '0' || ch > '7') { + backOnePosition(ch); + ch = octal; + break; + } + octal = (octal << 3) + ch - '0'; + ch = file.read(); + if (ch < '0' || ch > '7') { + backOnePosition(ch); + ch = octal; + break; + } + octal = (octal << 3) + ch - '0'; + ch = octal & 0xff; + break; + } + } + if (lineBreak) + continue; + if (ch < 0) + break; + } + else if (ch == '\r') { + ch = file.read(); + if (ch < 0) + break; + if (ch != '\n') { + backOnePosition(ch); + ch = '\n'; + } + } + if (nesting == -1) + break; + outBuf.append((char)ch); + } + if (ch == -1) + throwError("Error reading string"); + break; + } + default: + { + outBuf = new StringBuffer(); + if (ch == '-' || ch == '+' || ch == '.' || (ch >= '0' && ch <= '9')) { + type = TK_NUMBER; + do { + outBuf.append((char)ch); + ch = file.read(); + } while (ch != -1 && ((ch >= '0' && ch <= '9') || ch == '.')); + } + else { + type = TK_OTHER; + do { + outBuf.append((char)ch); + ch = file.read(); + } while (!delims[ch + 1]); + } + backOnePosition(ch); + break; + } + } + if (outBuf != null) + stringValue = outBuf.toString(); + return true; + } + + public int intValue() { + return Integer.parseInt(stringValue); + } + + public boolean readLineSegment(byte input[]) throws IOException { + int c = -1; + boolean eol = false; + int ptr = 0; + int len = input.length; + // ssteward, pdftk-1.10, 040922: + // skip initial whitespace; added this because PdfReader.rebuildXref() + // assumes that line provided by readLineSegment does not have init. whitespace; + if ( ptr < len ) { + while ( isWhitespace( (c = read()) ) ); + } + while ( !eol && ptr < len ) { + switch (c) { + case -1: + case '\n': + eol = true; + break; + case '\r': + eol = true; + int cur = getFilePointer(); + if ((read()) != '\n') { + seek(cur); + } + break; + default: + input[ptr++] = (byte)c; + break; + } + + // break loop? do it before we read() again + if( eol || len <= ptr ) { + break; + } + else { + c = read(); + } + } + if (ptr >= len) { + eol = false; + while (!eol) { + switch (c = read()) { + case -1: + case '\n': + eol = true; + break; + case '\r': + eol = true; + int cur = getFilePointer(); + if ((read()) != '\n') { + seek(cur); + } + break; + } + } + } + + if ((c == -1) && (ptr == 0)) { + return false; + } + if (ptr + 2 <= len) { + input[ptr++] = (byte)' '; + input[ptr] = (byte)'X'; + } + return true; + } + + public static int[] checkObjectStart(byte line[]) { + try { + PRTokeniser tk = new PRTokeniser(line); + int num = 0; + int gen = 0; + if (!tk.nextToken() || tk.getTokenType() != TK_NUMBER) + return null; + num = tk.intValue(); + if (!tk.nextToken() || tk.getTokenType() != TK_NUMBER) + return null; + gen = tk.intValue(); + if (!tk.nextToken()) + return null; + if (!tk.getStringValue().equals("obj")) + return null; + return new int[]{num, gen}; + } + catch (Exception ioe) { + // empty on purpose + } + return null; + } + + public boolean isHexString() { + return this.hexString; + } + +} -- cgit v1.2.3