From 6025b6016517c6d898d8957d1d7e03ba71431912 Mon Sep 17 00:00:00 2001 From: tknall Date: Fri, 1 Dec 2006 12:20:24 +0000 Subject: Initial import of release 2.2. git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@4 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../java/org/pdfbox/cmapparser/CMapParser.java | 285 +++++++++++++++++++++ 1 file changed, 285 insertions(+) create mode 100644 src/main/java/org/pdfbox/cmapparser/CMapParser.java (limited to 'src/main/java/org/pdfbox/cmapparser/CMapParser.java') diff --git a/src/main/java/org/pdfbox/cmapparser/CMapParser.java b/src/main/java/org/pdfbox/cmapparser/CMapParser.java new file mode 100644 index 0000000..5434bb7 --- /dev/null +++ b/src/main/java/org/pdfbox/cmapparser/CMapParser.java @@ -0,0 +1,285 @@ +/** + * Copyright (c) 2003-2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.cmapparser; + +import java.io.FileInputStream; +import java.io.InputStream; +import java.io.IOException; +import java.io.RandomAccessFile; + +import java.util.List; + +import org.pdfbox.cmaptypes.CMap; +import org.pdfbox.cmaptypes.CodespaceRange; + +import org.pdfbox.cos.COSArray; +import org.pdfbox.cos.COSName; +import org.pdfbox.cos.COSNumber; +import org.pdfbox.cos.COSString; + +import org.pdfbox.pdfparser.PDFStreamParser; + +import org.pdfbox.util.PDFOperator; + +/** + * This will parser a CMap stream. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.10 $ + */ +public class CMapParser +{ + private static final String BEGIN_CODESPACE_RANGE = "begincodespacerange"; + private static final String BEGIN_BASE_FONT_CHAR = "beginbfchar"; + private static final String BEGIN_BASE_FONT_RANGE = "beginbfrange"; + + private InputStream input; + private CMap result; + private RandomAccessFile file; + + /** + * Creates a new instance of CMapParser. + * + * @param in The input stream to read data from. + * @param raf The random access file from the document + */ + public CMapParser( InputStream in, RandomAccessFile raf ) + { + input = in; + file = raf; + } + + /** + * This will get the results of the parsing. parse() must be called first. + * + * @return The parsed CMap file. + */ + public CMap getResult() + { + return result; + } + + /** + * This will parse the stream and create a cmap object. + * + * @throws IOException If there is an error parsing the stream. + */ + public void parse() throws IOException + { + result = new CMap(); + PDFStreamParser parser = new PDFStreamParser( input, file ); + parser.parse(); + List tokens = parser.getTokens(); + for( int i=0; i= 0 ) + { + done = true; + } + value = createStringFromBytes( tokenBytes ); + result.addMapping( startBytes, value ); + increment( startBytes ); + + if( array == null ) + { + increment( tokenBytes ); + } + else + { + if( arrayIndex < array.size() ) + { + tokenBytes = ((COSString)array.getObject( arrayIndex++ )).getBytes(); + } + } + } + } + } + } + } + } + + private void increment( byte[] data ) + { + increment( data, data.length-1 ); + } + + private void increment( byte[] data, int position ) + { + if( position > 0 && (data[position]+256)%256 == 255 ) + { + data[position]=0; + increment( data, position-1); + } + else + { + data[position] = (byte)(data[position]+1); + } + } + + private String createStringFromBytes( byte[] bytes ) throws IOException + { + String retval = null; + if( bytes.length == 1 ) + { + retval = new String( bytes ); + } + else + { + retval = new String( bytes, "UTF-16BE" ); + } + return retval; + } + + private int compare( byte[] first, byte[] second ) + { + int retval = 1; + boolean done = false; + for( int i=0; i" ); + System.exit( -1 ); + } + CMapParser parser = new CMapParser( new FileInputStream( args[0] ), null ); + parser.parse(); + CMap result = parser.getResult(); + System.out.println( "Result:" + result ); + } +} \ No newline at end of file -- cgit v1.2.3