From c68ad0ec056b37c82debebcecfcde1866d61b4d9 Mon Sep 17 00:00:00 2001 From: tknall Date: Tue, 25 Nov 2008 12:03:13 +0000 Subject: Removing pdfbox from source. git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@301 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../java/org/pdfbox/pdfparser/PDFStreamParser.java | 403 --------------------- 1 file changed, 403 deletions(-) delete mode 100644 src/main/java/org/pdfbox/pdfparser/PDFStreamParser.java (limited to 'src/main/java/org/pdfbox/pdfparser/PDFStreamParser.java') diff --git a/src/main/java/org/pdfbox/pdfparser/PDFStreamParser.java b/src/main/java/org/pdfbox/pdfparser/PDFStreamParser.java deleted file mode 100644 index d59c5a4..0000000 --- a/src/main/java/org/pdfbox/pdfparser/PDFStreamParser.java +++ /dev/null @@ -1,403 +0,0 @@ -/** - * Copyright (c) 2003-2004, www.pdfbox.org - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * 3. Neither the name of pdfbox; nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * http://www.pdfbox.org - * - */ -package org.pdfbox.pdfparser; - -import java.io.ByteArrayOutputStream; -import java.io.InputStream; -import java.io.IOException; -import java.io.RandomAccessFile; - -import java.util.ArrayList; -import java.util.List; - -import org.pdfbox.cos.COSBase; -import org.pdfbox.cos.COSBoolean; -import org.pdfbox.cos.COSDictionary; -import org.pdfbox.cos.COSName; -import org.pdfbox.cos.COSNull; -import org.pdfbox.cos.COSNumber; -import org.pdfbox.cos.COSObject; -import org.pdfbox.cos.COSStream; - -import org.pdfbox.util.PDFOperator; -import org.pdfbox.util.ImageParameters; - -import org.apache.log4j.Logger; - -/** - * This will parse a PDF byte stream and extract operands and such. - * - * @author Ben Litchfield (ben@csh.rit.edu) - * @version $Revision: 1.29 $ - */ -public class PDFStreamParser extends BaseParser -{ - private static Logger log = Logger.getLogger(PDFStreamParser.class); - private List streamObjects = new ArrayList( 100 ); - private RandomAccessFile file; - private PDFOperator lastBIToken = null; - - /** - * Constructor that takes a stream to parse. - * - * @param stream The stream to read data from. - * @param raf The random access file. - * - * @throws IOException If there is an error reading from the stream. - */ - public PDFStreamParser( InputStream stream, RandomAccessFile raf ) throws IOException - { - super( stream ); - file = raf; - } - - /** - * Constructor. - * - * @param stream The stream to parse. - * - * @throws IOException If there is an error initializing the stream. - */ - public PDFStreamParser( COSStream stream ) throws IOException - { - this( stream.getUnfilteredStream(), stream.getScratchFile() ); - } - - /** - * This will parse the tokens in the stream. This will close the - * stream when it is finished parsing. - * - * @throws IOException If there is an error while parsing the stream. - */ - public void parse() throws IOException - { - if( log.isDebugEnabled() ) - { - log.debug( "parse() start" ); - } - - try - { - Object token = null; - while( (token = parseNextToken()) != null ) - { - streamObjects.add( token ); - } - } - finally - { - pdfSource.close(); - } - if( log.isDebugEnabled() ) - { - log.debug( "parse() end" ); - } - } - - /** - * This will get the tokens that were parsed from the stream. - * - * @return All of the tokens in the stream. - */ - public List getTokens() - { - return streamObjects; - } - - /** - * This will parse the next token in the stream. - * - * @return The next token in the stream or null if there are no more tokens in the stream. - * - * @throws IOException If an io error occurs while parsing the stream. - */ - private Object parseNextToken() throws IOException - { - if( log.isDebugEnabled() ) - { - log.debug( "parseNextToken() start" ); - } - Object retval = null; - - skipSpaces(); - int nextByte = pdfSource.peek(); - if( ((byte)nextByte) == -1 ) - { - return null; - } - char c = (char)nextByte; - switch(c) - { - case '<': - { - int leftBracket = pdfSource.read();//pull off first left bracket - c = (char)pdfSource.peek(); //check for second left bracket - pdfSource.unread( leftBracket ); //put back first bracket - if(c == '<') - { - - COSDictionary pod = parseCOSDictionary(); - skipSpaces(); - if((char)pdfSource.peek() == 's') - { - retval = parseCOSStream( pod, file ); - } - else - { - retval = pod; - } - } - else - { - retval = parseCOSString(); - } - break; - } - case '[': // array - { - retval = parseCOSArray(); - break; - } - case '(': // string - retval = parseCOSString(); - break; - case '/': // name - retval = parseCOSName(); - break; - case 'n': // null - { - String nullString = readString(); - if( nullString.equals( "null") ) - { - retval = COSNull.NULL; - } - else - { - retval = PDFOperator.getOperator( nullString ); - } - break; - } - case 't': - case 'f': - { - String next = readString(); - if( next.equals( "true" ) ) - { - retval = COSBoolean.TRUE; - break; - } - else if( next.equals( "false" ) ) - { - retval = COSBoolean.FALSE; - } - else - { - retval = PDFOperator.getOperator( next ); - } - break; - } - case 'R': - { - String line = readString(); - if( line.equals( "R" ) ) - { - retval = new COSObject( null ); - } - else - { - retval = PDFOperator.getOperator( line ); - } - break; - } - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - case '-': - case '+': - case '.': - { - if( Character.isDigit(c) || c == '-' || c == '+' || c == '.') - { - StringBuffer buf = new StringBuffer(); - while( Character.isDigit(( c = (char)pdfSource.peek()) )|| c== '-' || c== '+' || c =='.' ) - { - buf.append( c ); - pdfSource.read(); - } - retval = COSNumber.get( buf.toString() ); - } - else - { - throw new IOException( "Unknown dir object c='" + c + - "' peek='" + (char)pdfSource.peek() + "' " + pdfSource ); - } - break; - } - case 'B': - { - String next = readString(); - retval = PDFOperator.getOperator( next ); - - if( next.equals( "BI" ) ) - { - lastBIToken = (PDFOperator)retval; - COSDictionary imageParams = new COSDictionary(); - lastBIToken.setImageParameters( new ImageParameters( imageParams ) ); - Object nextToken = null; - while( (nextToken = parseNextToken()) instanceof COSName ) - { - Object value = parseNextToken(); - imageParams.setItem( (COSName)nextToken, (COSBase)value ); - } - //final token will be the image data, maybe?? - PDFOperator imageData = (PDFOperator)nextToken; - lastBIToken.setImageData( imageData.getImageData() ); - } - break; - } - case 'I': - { - ImageParameters imageParams = lastBIToken.getImageParameters(); - int expectedBytes = (int)Math.ceil(imageParams.getHeight() * imageParams.getWidth() * - (imageParams.getBitsPerComponent()/8) ); - //Special case for ID operator - String id = "" + (char)pdfSource.read() + (char)pdfSource.read(); - if( !id.equals( "ID" ) ) - { - throw new IOException( "Error: Expected operator 'ID' actual='" + id + "'" ); - } - ByteArrayOutputStream imageData = new ByteArrayOutputStream(); - boolean foundEnd = false; - if( this.isWhitespace() ) - { - //pull off the whitespace character - pdfSource.read(); - } - int twoBytesAgo = 0; - int lastByte = pdfSource.read(); - int currentByte = pdfSource.read(); - int count = 0; - //PDF spec is kinda unclear about this. Should a whitespace - //always appear before EI? Not sure, I found a PDF - //(UnderstandingWebSphereClassLoaders.pdf) which has EI as part - //of the image data and will stop parsing prematurely if there is - //not a check for EI. - while( !(isWhitespace( twoBytesAgo ) && - lastByte == 'E' && - currentByte == 'I' && - isWhitespace() //&& - //amyuni2_05d__pdf1_3_acro4x.pdf has image data that - //is compressed, so expectedBytes is useless here. - //count >= expectedBytes - ) && - !pdfSource.isEOF() ) - { - imageData.write( lastByte ); - twoBytesAgo = lastByte; - lastByte = currentByte; - currentByte = pdfSource.read(); - count++; - } - pdfSource.unread( 'I' ); //unread the EI operator - pdfSource.unread( 'E' ); - retval = PDFOperator.getOperator( "ID" ); - ((PDFOperator)retval).setImageData( imageData.toByteArray() ); - break; - } - case ']': - { - // some ']' around without its previous '[' - // this means a PDF is somewhat corrupt but we will continue to parse. - pdfSource.read(); - retval = COSNull.NULL; // must be a better solution than null... - break; - } - default: - { - //we must be an operator - String operator = readOperator(); - if( operator.trim().length() == 0 ) - { - //we have a corrupt stream, stop reading here - retval = null; - } - else - { - retval = PDFOperator.getOperator( operator ); - } - } - - } - if( log.isDebugEnabled() ) - { - log.debug( "parseNextToken() retval=" + retval + " peek=" + (char)pdfSource.peek() + " end" ); - } - - return retval; - } - - /** - * This will read an operator from the stream. - * - * @return The operator that was read from the stream. - * - * @throws IOException If there is an error reading from the stream. - */ - protected String readOperator() throws IOException - { - skipSpaces(); - - //average string size is around 2 and the normal string buffer size is - //about 16 so lets save some space. - StringBuffer buffer = new StringBuffer(4); - while( - !isWhitespace() && - !isClosing() && - !pdfSource.isEOF() && - pdfSource.peek() != (int)'[' && - pdfSource.peek() != (int)'<' && - pdfSource.peek() != (int)'(' && - pdfSource.peek() != (int)'/' && - (pdfSource.peek() < (int)'0' || - pdfSource.peek() > (int)'9' ) ) - { - buffer.append( (char)pdfSource.read() ); - } - return buffer.toString(); - } -} \ No newline at end of file -- cgit v1.2.3