From 6025b6016517c6d898d8957d1d7e03ba71431912 Mon Sep 17 00:00:00 2001 From: tknall Date: Fri, 1 Dec 2006 12:20:24 +0000 Subject: Initial import of release 2.2. git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@4 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- src/main/java/org/pdfbox/encoding/AFMEncoding.java | 76 ++++++ .../org/pdfbox/encoding/DictionaryEncoding.java | 112 ++++++++ src/main/java/org/pdfbox/encoding/Encoding.java | 268 +++++++++++++++++++ .../java/org/pdfbox/encoding/EncodingManager.java | 87 +++++++ .../java/org/pdfbox/encoding/MacRomanEncoding.java | 267 +++++++++++++++++++ .../java/org/pdfbox/encoding/PdfDocEncoding.java | 289 +++++++++++++++++++++ .../java/org/pdfbox/encoding/StandardEncoding.java | 209 +++++++++++++++ .../java/org/pdfbox/encoding/WinAnsiEncoding.java | 281 ++++++++++++++++++++ src/main/java/org/pdfbox/encoding/package.html | 9 + 9 files changed, 1598 insertions(+) create mode 100644 src/main/java/org/pdfbox/encoding/AFMEncoding.java create mode 100644 src/main/java/org/pdfbox/encoding/DictionaryEncoding.java create mode 100644 src/main/java/org/pdfbox/encoding/Encoding.java create mode 100644 src/main/java/org/pdfbox/encoding/EncodingManager.java create mode 100644 src/main/java/org/pdfbox/encoding/MacRomanEncoding.java create mode 100644 src/main/java/org/pdfbox/encoding/PdfDocEncoding.java create mode 100644 src/main/java/org/pdfbox/encoding/StandardEncoding.java create mode 100644 src/main/java/org/pdfbox/encoding/WinAnsiEncoding.java create mode 100644 src/main/java/org/pdfbox/encoding/package.html (limited to 'src/main/java/org/pdfbox/encoding') diff --git a/src/main/java/org/pdfbox/encoding/AFMEncoding.java b/src/main/java/org/pdfbox/encoding/AFMEncoding.java new file mode 100644 index 0000000..badb7bf --- /dev/null +++ b/src/main/java/org/pdfbox/encoding/AFMEncoding.java @@ -0,0 +1,76 @@ +/** + * Copyright (c) 2003, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.encoding; + +import java.util.Iterator; + +import org.pdfbox.afmtypes.CharMetric; +import org.pdfbox.afmtypes.FontMetric; + +import org.pdfbox.cos.COSBase; +import org.pdfbox.cos.COSName; + +/** + * This will handle the encoding from an AFM font. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.6 $ + */ +public class AFMEncoding extends Encoding +{ + private FontMetric metric = null; + + /** + * Constructor. + * + * @param fontInfo The font metric information. + */ + public AFMEncoding( FontMetric fontInfo ) + { + metric = fontInfo; + Iterator characters = metric.getCharMetrics().iterator(); + while( characters.hasNext() ) + { + CharMetric nextMetric = (CharMetric)characters.next(); + addCharacterEncoding( nextMetric.getCharacterCode(), COSName.getPDFName( nextMetric.getName() ) ); + } + } + + /** + * Convert this standard java object to a COS object. + * + * @return The cos object that matches this Java object. + */ + public COSBase getCOSObject() + { + return null; + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/encoding/DictionaryEncoding.java b/src/main/java/org/pdfbox/encoding/DictionaryEncoding.java new file mode 100644 index 0000000..4378898 --- /dev/null +++ b/src/main/java/org/pdfbox/encoding/DictionaryEncoding.java @@ -0,0 +1,112 @@ +/** + * Copyright (c) 2003, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.encoding; + +import java.io.IOException; + +import org.pdfbox.cos.COSArray; +import org.pdfbox.cos.COSBase; +import org.pdfbox.cos.COSDictionary; +import org.pdfbox.cos.COSName; +import org.pdfbox.cos.COSNumber; + +/** + * This will perform the encoding from a dictionary. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.12 $ + */ +public class DictionaryEncoding extends Encoding +{ + private COSDictionary encoding = null; + + /** + * Constructor. + * + * @param fontEncoding The encoding dictionary. + * + * @throws IOException If there is a problem getting the base font. + */ + public DictionaryEncoding( COSDictionary fontEncoding ) throws IOException + { + encoding = fontEncoding; + + //first set up the base encoding + //The previious value WinAnsiEncoding() has been changed to StandardEnding + //see p 389 of the PDF 1.5 reférence table 5.11 entries in a dictionary encoding + //"If this entry is absent, the Differences entry describes differences from an implicit + //base encoding. For a font program that is embedded in the PDF file, the + //implicit base encoding is the font program’s built-in encoding, as described + //above and further elaborated in the sections on specific font types below. Otherwise, + //for a nonsymbolic font, it is StandardEncoding, and for a symbolic font, it + //is the font’s built-in encoding." + + //so the default base encoding is standardEncoding + Encoding baseEncoding = new StandardEncoding(); + COSName baseEncodingName = (COSName)encoding.getDictionaryObject( COSName.BASE_ENCODING ); + + if( baseEncodingName != null ) + { + EncodingManager manager = new EncodingManager(); + baseEncoding = manager.getEncoding( baseEncodingName ); + } + nameToCode.putAll( baseEncoding.nameToCode ); + codeToName.putAll( baseEncoding.codeToName ); + + + //now replace with the differences. + COSArray differences = (COSArray)encoding.getDictionaryObject( COSName.DIFFERENCES ); + int currentIndex = -1; + for( int i=0; differences != null && i= 0 ) + { + try + { + String characterName = line.substring( 0, semicolonIndex ); + String unicodeValue = line.substring( semicolonIndex+1, line.length() ); + StringTokenizer tokenizer = new StringTokenizer( unicodeValue, " ", false ); + String value = ""; + while(tokenizer.hasMoreTokens()) + { + int characterCode = Integer.parseInt( tokenizer.nextToken(), 16 ); + value += (char)characterCode; + } + + NAME_TO_CHARACTER.put( COSName.getPDFName( characterName ), value ); + } + catch( NumberFormatException nfe ) + { + log.error( "Error parsing line '" + line + "' ", nfe ); + } + } + } + } + } + catch( IOException io ) + { + log.error( "Error reading Resources/glyphlist.txt", io ); + } + finally + { + if( glyphStream != null ) + { + try + { + glyphStream.close(); + } + catch( IOException e ) + { + log.warn( "Error closing stream", e ); + } + + } + } + + + NAME_TO_CHARACTER.put( COSName.getPDFName( ".notdef" ), "" ); + NAME_TO_CHARACTER.put( COSName.getPDFName( "fi" ), "fi" ); + NAME_TO_CHARACTER.put( COSName.getPDFName( "fl" ), "fl" ); + NAME_TO_CHARACTER.put( COSName.getPDFName( "ffi" ), "ffi" ); + NAME_TO_CHARACTER.put( COSName.getPDFName( "ff" ), "ff" ); + NAME_TO_CHARACTER.put( COSName.getPDFName( "pi" ), "pi" ); + + Iterator keys = NAME_TO_CHARACTER.keySet().iterator(); + while( keys.hasNext() ) + { + Object key = keys.next(); + Object value = NAME_TO_CHARACTER.get( key ); + CHARACTER_TO_NAME.put( value, key ); + } + } + + + /** + * This will add a character encoding. + * + * @param code The character code that matches the character. + * @param name The name of the character. + */ + protected void addCharacterEncoding( int code, COSName name ) + { + Integer intCode = new Integer( code ); + codeToName.put( intCode, name ); + nameToCode.put( name, intCode ); + } + + /** + * This will get the character code for the name. + * + * @param name The name of the character. + * + * @return The code for the character. + * + * @throws IOException If there is no character code for the name. + */ + public int getCode( COSName name ) throws IOException + { + Integer code = (Integer)nameToCode.get( name ); + if( code == null ) + { + throw new IOException( "No character code for character name '" + name.getName() + "'" ); + } + return code.intValue(); + } + + /** + * This will take a character code and get the name from the code. + * + * @param code The character code. + * + * @return The name of the character. + * + * @throws IOException If there is no name for the code. + */ + public COSName getName( int code ) throws IOException + { + COSName name = (COSName)codeToName.get( new Integer( code ) ); + if( name == null ) + { + //lets be forgiving for now + name = COSName.getPDFName( "space" ); + //throw new IOException( getClass().getName() + + // ": No name for character code '" + code + "'" ); + } + if( log.isDebugEnabled() ) + { + log.debug( "Encoding.getName( " + code + " )=" + name ); + } + return name; + } + + /** + * This will take a character code and get the name from the code. + * + * @param c The character. + * + * @return The name of the character. + * + * @throws IOException If there is no name for the character. + */ + public COSName getNameFromCharacter( char c ) throws IOException + { + COSName name = (COSName)CHARACTER_TO_NAME.get( "" + c ); + if( name == null ) + { + throw new IOException( "No name for character '" + c + "'" ); + } + return name; + } + + /** + * This will get the character from the code. + * + * @param code The character code. + * + * @return The printable character for the code. + * + * @throws IOException If there is not name for the character. + */ + public String getCharacter( int code ) throws IOException + { + String character = getCharacter( getName( code ) ); + if( log.isDebugEnabled() ) + { + log.debug( "Encoding.getCharacter( " + code + " )=" + character ); + } + return character; + } + + /** + * This will get the character from the name. + * + * @param name The name of the character. + * + * @return The printable character for the code. + */ + public static String getCharacter( COSName name ) + { + String character = (String)NAME_TO_CHARACTER.get( name ); + if( character == null ) + { + character = name.getName(); + } + if( log.isDebugEnabled() ) + { + log.debug( "Encoding.getCharacter(" + name + ")=" + character ); + } + return character; + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/encoding/EncodingManager.java b/src/main/java/org/pdfbox/encoding/EncodingManager.java new file mode 100644 index 0000000..9e8e4b3 --- /dev/null +++ b/src/main/java/org/pdfbox/encoding/EncodingManager.java @@ -0,0 +1,87 @@ +/** + * Copyright (c) 2003, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.encoding; + +import java.io.IOException; + +import java.util.HashMap; +import java.util.Map; + +import org.pdfbox.cos.COSName; + +/** + * This class will handle getting the appropriate encodings. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.5 $ + */ +public class EncodingManager +{ + private static final Map ENCODINGS = new HashMap(); + + static + { + ENCODINGS.put( COSName.MAC_ROMAN_ENCODING, new MacRomanEncoding() ); + ENCODINGS.put( COSName.PDF_DOC_ENCODING, new PdfDocEncoding() ); + ENCODINGS.put( COSName.STANDARD_ENCODING, new StandardEncoding() ); + ENCODINGS.put( COSName.WIN_ANSI_ENCODING, new WinAnsiEncoding() ); + + } + + /** + * This will get the standard encoding. + * + * @return The standard encoding. + */ + public Encoding getStandardEncoding() + { + return (Encoding)ENCODINGS.get( COSName.STANDARD_ENCODING ); + } + + /** + * This will get an encoding by name. + * + * @param name The name of the encoding to get. + * + * @return The encoding that matches the name. + * + * @throws IOException If there is not encoding with that name. + */ + public Encoding getEncoding( COSName name ) throws IOException + { + Encoding encoding = (Encoding)ENCODINGS.get( name ); + if( encoding == null ) + { + throw new IOException( "Unknown encoding for '" + name.getName() + "'" ); + } + return encoding; + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/encoding/MacRomanEncoding.java b/src/main/java/org/pdfbox/encoding/MacRomanEncoding.java new file mode 100644 index 0000000..6a560a2 --- /dev/null +++ b/src/main/java/org/pdfbox/encoding/MacRomanEncoding.java @@ -0,0 +1,267 @@ +/** + * Copyright (c) 2003, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.encoding; + +import org.pdfbox.cos.COSBase; +import org.pdfbox.cos.COSName; + +/** + * This is an interface to a text encoder. + * + * @author Ben Litchfield + * @version $Revision: 1.9 $ + */ +public class MacRomanEncoding extends Encoding +{ + /** + * Constructor. + */ + public MacRomanEncoding() + { + addCharacterEncoding( 0101, COSName.getPDFName( "A" ) ); + addCharacterEncoding( 0256, COSName.getPDFName( "AE" ) ); + addCharacterEncoding( 0347, COSName.getPDFName( "Aacute" ) ); + addCharacterEncoding( 0345, COSName.getPDFName( "Acircumflex" ) ); + addCharacterEncoding( 0200, COSName.getPDFName( "Adieresis" ) ); + addCharacterEncoding( 0313, COSName.getPDFName( "Agrave" ) ); + addCharacterEncoding( 0201, COSName.getPDFName( "Aring" ) ); + addCharacterEncoding( 0314, COSName.getPDFName( "Atilde" ) ); + addCharacterEncoding( 0102, COSName.getPDFName( "B" ) ); + addCharacterEncoding( 0103, COSName.getPDFName( "C" ) ); + addCharacterEncoding( 0202, COSName.getPDFName( "Ccedilla" ) ); + addCharacterEncoding( 0104, COSName.getPDFName( "D" ) ); + addCharacterEncoding( 0105, COSName.getPDFName( "E" ) ); + addCharacterEncoding( 0203, COSName.getPDFName( "Eacute" ) ); + addCharacterEncoding( 0346, COSName.getPDFName( "Ecircumflex" ) ); + addCharacterEncoding( 0350, COSName.getPDFName( "Edieresis" ) ); + addCharacterEncoding( 0351, COSName.getPDFName( "Egrave" ) ); + addCharacterEncoding( 0106, COSName.getPDFName( "F" ) ); + addCharacterEncoding( 0107, COSName.getPDFName( "G" ) ); + addCharacterEncoding( 0110, COSName.getPDFName( "H" ) ); + addCharacterEncoding( 0111, COSName.getPDFName( "I" ) ); + addCharacterEncoding( 0352, COSName.getPDFName( "Iacute" ) ); + addCharacterEncoding( 0353, COSName.getPDFName( "Icircumflex" ) ); + addCharacterEncoding( 0354, COSName.getPDFName( "Idieresis" ) ); + addCharacterEncoding( 0355, COSName.getPDFName( "Igrave" ) ); + addCharacterEncoding( 0112, COSName.getPDFName( "J" ) ); + addCharacterEncoding( 0113, COSName.getPDFName( "K" ) ); + addCharacterEncoding( 0114, COSName.getPDFName( "L" ) ); + addCharacterEncoding( 0115, COSName.getPDFName( "M" ) ); + addCharacterEncoding( 0116, COSName.getPDFName( "N" ) ); + addCharacterEncoding( 0204, COSName.getPDFName( "Ntilde" ) ); + addCharacterEncoding( 0117, COSName.getPDFName( "O" ) ); + addCharacterEncoding( 0316, COSName.getPDFName( "OE" ) ); + addCharacterEncoding( 0356, COSName.getPDFName( "Oacute" ) ); + addCharacterEncoding( 0357, COSName.getPDFName( "Ocircumflex" ) ); + addCharacterEncoding( 0205, COSName.getPDFName( "Odieresis" ) ); + addCharacterEncoding( 0361, COSName.getPDFName( "Ograve" ) ); + addCharacterEncoding( 0257, COSName.getPDFName( "Oslash" ) ); + addCharacterEncoding( 0315, COSName.getPDFName( "Otilde" ) ); + addCharacterEncoding( 0120, COSName.getPDFName( "P" ) ); + addCharacterEncoding( 0121, COSName.getPDFName( "Q" ) ); + addCharacterEncoding( 0122, COSName.getPDFName( "R" ) ); + addCharacterEncoding( 0123, COSName.getPDFName( "S" ) ); + addCharacterEncoding( 0124, COSName.getPDFName( "T" ) ); + addCharacterEncoding( 0125, COSName.getPDFName( "U" ) ); + addCharacterEncoding( 0362, COSName.getPDFName( "Uacute" ) ); + addCharacterEncoding( 0363, COSName.getPDFName( "Ucircumflex" ) ); + addCharacterEncoding( 0206, COSName.getPDFName( "Udieresis" ) ); + addCharacterEncoding( 0364, COSName.getPDFName( "Ugrave" ) ); + addCharacterEncoding( 0126, COSName.getPDFName( "V" ) ); + addCharacterEncoding( 0127, COSName.getPDFName( "W" ) ); + addCharacterEncoding( 0130, COSName.getPDFName( "X" ) ); + addCharacterEncoding( 0131, COSName.getPDFName( "Y" ) ); + addCharacterEncoding( 0331, COSName.getPDFName( "Ydieresis" ) ); + addCharacterEncoding( 0132, COSName.getPDFName( "Z" ) ); + addCharacterEncoding( 0141, COSName.getPDFName( "a" ) ); + addCharacterEncoding( 0207, COSName.getPDFName( "aacute" ) ); + addCharacterEncoding( 0211, COSName.getPDFName( "acircumflex" ) ); + addCharacterEncoding( 0253, COSName.getPDFName( "acute" ) ); + addCharacterEncoding( 0212, COSName.getPDFName( "adieresis" ) ); + addCharacterEncoding( 0276, COSName.getPDFName( "ae" ) ); + addCharacterEncoding( 0210, COSName.getPDFName( "agrave" ) ); + addCharacterEncoding( 046, COSName.getPDFName( "ampersand" ) ); + addCharacterEncoding( 0214, COSName.getPDFName( "aring" ) ); + addCharacterEncoding( 0136, COSName.getPDFName( "asciicircum" ) ); + addCharacterEncoding( 0176, COSName.getPDFName( "asciitilde" ) ); + addCharacterEncoding( 052, COSName.getPDFName( "asterisk" ) ); + addCharacterEncoding( 0100, COSName.getPDFName( "at" ) ); + addCharacterEncoding( 0213, COSName.getPDFName( "atilde" ) ); + addCharacterEncoding( 0142, COSName.getPDFName( "b" ) ); + addCharacterEncoding( 0134, COSName.getPDFName( "backslash" ) ); + addCharacterEncoding( 0174, COSName.getPDFName( "bar" ) ); + addCharacterEncoding( 0173, COSName.getPDFName( "braceleft" ) ); + addCharacterEncoding( 0175, COSName.getPDFName( "braceright" ) ); + addCharacterEncoding( 0133, COSName.getPDFName( "bracketleft" ) ); + addCharacterEncoding( 0135, COSName.getPDFName( "bracketright" ) ); + addCharacterEncoding( 0371, COSName.getPDFName( "breve" ) ); + addCharacterEncoding( 0245, COSName.getPDFName( "bullet" ) ); + addCharacterEncoding( 0143, COSName.getPDFName( "c" ) ); + addCharacterEncoding( 0377, COSName.getPDFName( "caron" ) ); + addCharacterEncoding( 0215, COSName.getPDFName( "ccedilla" ) ); + addCharacterEncoding( 0374, COSName.getPDFName( "cedilla" ) ); + addCharacterEncoding( 0242, COSName.getPDFName( "cent" ) ); + addCharacterEncoding( 0366, COSName.getPDFName( "circumflex" ) ); + addCharacterEncoding( 072, COSName.getPDFName( "colon" ) ); + addCharacterEncoding( 054, COSName.getPDFName( "comma" ) ); + addCharacterEncoding( 0251, COSName.getPDFName( "copyright" ) ); + addCharacterEncoding( 0333, COSName.getPDFName( "currency1" ) ); + addCharacterEncoding( 0144, COSName.getPDFName( "d" ) ); + addCharacterEncoding( 0240, COSName.getPDFName( "dagger" ) ); + addCharacterEncoding( 0340, COSName.getPDFName( "daggerdbl" ) ); + addCharacterEncoding( 0241, COSName.getPDFName( "degree" ) ); + addCharacterEncoding( 0254, COSName.getPDFName( "dieresis" ) ); + addCharacterEncoding( 0326, COSName.getPDFName( "divide" ) ); + addCharacterEncoding( 044, COSName.getPDFName( "dollar" ) ); + addCharacterEncoding( 0372, COSName.getPDFName( "dotaccent" ) ); + addCharacterEncoding( 0365, COSName.getPDFName( "dotlessi" ) ); + addCharacterEncoding( 0145, COSName.getPDFName( "e" ) ); + addCharacterEncoding( 0216, COSName.getPDFName( "eacute" ) ); + addCharacterEncoding( 0220, COSName.getPDFName( "ecircumflex" ) ); + addCharacterEncoding( 0221, COSName.getPDFName( "edieresis" ) ); + addCharacterEncoding( 0217, COSName.getPDFName( "egrave" ) ); + addCharacterEncoding( 070, COSName.getPDFName( "eight" ) ); + addCharacterEncoding( 0311, COSName.getPDFName( "ellipsis" ) ); + addCharacterEncoding( 0321, COSName.getPDFName( "emdash" ) ); + addCharacterEncoding( 0320, COSName.getPDFName( "endash" ) ); + addCharacterEncoding( 075, COSName.getPDFName( "equal" ) ); + addCharacterEncoding( 041, COSName.getPDFName( "exclam" ) ); + addCharacterEncoding( 0301, COSName.getPDFName( "exclamdown" ) ); + addCharacterEncoding( 0146, COSName.getPDFName( "f" ) ); + addCharacterEncoding( 0336, COSName.getPDFName( "fi" ) ); + addCharacterEncoding( 065, COSName.getPDFName( "five" ) ); + addCharacterEncoding( 0337, COSName.getPDFName( "fl" ) ); + addCharacterEncoding( 0304, COSName.getPDFName( "florin" ) ); + addCharacterEncoding( 064, COSName.getPDFName( "four" ) ); + addCharacterEncoding( 0332, COSName.getPDFName( "fraction" ) ); + addCharacterEncoding( 0147, COSName.getPDFName( "g" ) ); + addCharacterEncoding( 0247, COSName.getPDFName( "germandbls" ) ); + addCharacterEncoding( 0140, COSName.getPDFName( "grave" ) ); + addCharacterEncoding( 076, COSName.getPDFName( "greater" ) ); + addCharacterEncoding( 0307, COSName.getPDFName( "guillemotleft" ) ); + addCharacterEncoding( 0310, COSName.getPDFName( "guillemotright" ) ); + addCharacterEncoding( 0334, COSName.getPDFName( "guilsinglleft" ) ); + addCharacterEncoding( 0335, COSName.getPDFName( "guilsinglright" ) ); + addCharacterEncoding( 0150, COSName.getPDFName( "h" ) ); + addCharacterEncoding( 0375, COSName.getPDFName( "hungarumlaut" ) ); + addCharacterEncoding( 055, COSName.getPDFName( "hyphen" ) ); + addCharacterEncoding( 0151, COSName.getPDFName( "i" ) ); + addCharacterEncoding( 0222, COSName.getPDFName( "iacute" ) ); + addCharacterEncoding( 0224, COSName.getPDFName( "icircumflex" ) ); + addCharacterEncoding( 0225, COSName.getPDFName( "idieresis" ) ); + addCharacterEncoding( 0223, COSName.getPDFName( "igrave" ) ); + addCharacterEncoding( 0152, COSName.getPDFName( "j" ) ); + addCharacterEncoding( 0153, COSName.getPDFName( "k" ) ); + addCharacterEncoding( 0154, COSName.getPDFName( "l" ) ); + addCharacterEncoding( 074, COSName.getPDFName( "less" ) ); + addCharacterEncoding( 0302, COSName.getPDFName( "logicalnot" ) ); + addCharacterEncoding( 0155, COSName.getPDFName( "m" ) ); + addCharacterEncoding( 0370, COSName.getPDFName( "macron" ) ); + addCharacterEncoding( 0265, COSName.getPDFName( "mu" ) ); + addCharacterEncoding( 0156, COSName.getPDFName( "n" ) ); + addCharacterEncoding( 071, COSName.getPDFName( "nine" ) ); + addCharacterEncoding( 0226, COSName.getPDFName( "ntilde" ) ); + addCharacterEncoding( 043, COSName.getPDFName( "numbersign" ) ); + addCharacterEncoding( 0157, COSName.getPDFName( "o" ) ); + addCharacterEncoding( 0227, COSName.getPDFName( "oacute" ) ); + addCharacterEncoding( 0231, COSName.getPDFName( "ocircumflex" ) ); + addCharacterEncoding( 0232, COSName.getPDFName( "odieresis" ) ); + addCharacterEncoding( 0317, COSName.getPDFName( "oe" ) ); + addCharacterEncoding( 0376, COSName.getPDFName( "ogonek" ) ); + addCharacterEncoding( 0230, COSName.getPDFName( "ograve" ) ); + addCharacterEncoding( 061, COSName.getPDFName( "one" ) ); + addCharacterEncoding( 0273, COSName.getPDFName( "ordfeminine" ) ); + addCharacterEncoding( 0274, COSName.getPDFName( "ordmasculine" ) ); + addCharacterEncoding( 0277, COSName.getPDFName( "oslash" ) ); + addCharacterEncoding( 0233, COSName.getPDFName( "otilde" ) ); + addCharacterEncoding( 0160, COSName.getPDFName( "p" ) ); + addCharacterEncoding( 0246, COSName.getPDFName( "paragraph" ) ); + addCharacterEncoding( 050, COSName.getPDFName( "parenleft" ) ); + addCharacterEncoding( 051, COSName.getPDFName( "parenright" ) ); + addCharacterEncoding( 045, COSName.getPDFName( "percent" ) ); + addCharacterEncoding( 056, COSName.getPDFName( "period" ) ); + addCharacterEncoding( 0341, COSName.getPDFName( "periodcentered" ) ); + addCharacterEncoding( 0344, COSName.getPDFName( "perthousand" ) ); + addCharacterEncoding( 053, COSName.getPDFName( "plus" ) ); + addCharacterEncoding( 0261, COSName.getPDFName( "plusminus" ) ); + addCharacterEncoding( 0161, COSName.getPDFName( "q" ) ); + addCharacterEncoding( 077, COSName.getPDFName( "question" ) ); + addCharacterEncoding( 0300, COSName.getPDFName( "questiondown" ) ); + addCharacterEncoding( 042, COSName.getPDFName( "quotedbl" ) ); + addCharacterEncoding( 0343, COSName.getPDFName( "quotedblbase" ) ); + addCharacterEncoding( 0322, COSName.getPDFName( "quotedblleft" ) ); + addCharacterEncoding( 0323, COSName.getPDFName( "quotedblright" ) ); + addCharacterEncoding( 0324, COSName.getPDFName( "quoteleft" ) ); + addCharacterEncoding( 0325, COSName.getPDFName( "quoteright" ) ); + addCharacterEncoding( 0342, COSName.getPDFName( "quotesinglbase" ) ); + addCharacterEncoding( 047, COSName.getPDFName( "quotesingle" ) ); + addCharacterEncoding( 0162, COSName.getPDFName( "r" ) ); + addCharacterEncoding( 0250, COSName.getPDFName( "registered" ) ); + addCharacterEncoding( 0373, COSName.getPDFName( "ring" ) ); + addCharacterEncoding( 0163, COSName.getPDFName( "s" ) ); + addCharacterEncoding( 0244, COSName.getPDFName( "section" ) ); + addCharacterEncoding( 073, COSName.getPDFName( "semicolon" ) ); + addCharacterEncoding( 067, COSName.getPDFName( "seven" ) ); + addCharacterEncoding( 066, COSName.getPDFName( "six" ) ); + addCharacterEncoding( 057, COSName.getPDFName( "slash" ) ); + addCharacterEncoding( 040, COSName.getPDFName( "space" ) ); + addCharacterEncoding( 0243, COSName.getPDFName( "sterling" ) ); + addCharacterEncoding( 0164, COSName.getPDFName( "t" ) ); + addCharacterEncoding( 063, COSName.getPDFName( "three" ) ); + addCharacterEncoding( 0367, COSName.getPDFName( "tilde" ) ); + addCharacterEncoding( 0252, COSName.getPDFName( "trademark" ) ); + addCharacterEncoding( 062, COSName.getPDFName( "two" ) ); + addCharacterEncoding( 0165, COSName.getPDFName( "u" ) ); + addCharacterEncoding( 0234, COSName.getPDFName( "uacute" ) ); + addCharacterEncoding( 0236, COSName.getPDFName( "ucircumflex" ) ); + addCharacterEncoding( 0237, COSName.getPDFName( "udieresis" ) ); + addCharacterEncoding( 0235, COSName.getPDFName( "ugrave" ) ); + addCharacterEncoding( 0137, COSName.getPDFName( "underscore" ) ); + addCharacterEncoding( 0166, COSName.getPDFName( "v" ) ); + addCharacterEncoding( 0167, COSName.getPDFName( "w" ) ); + addCharacterEncoding( 0170, COSName.getPDFName( "x" ) ); + addCharacterEncoding( 0171, COSName.getPDFName( "y" ) ); + addCharacterEncoding( 0330, COSName.getPDFName( "ydieresis" ) ); + addCharacterEncoding( 0264, COSName.getPDFName( "yen" ) ); + addCharacterEncoding( 0172, COSName.getPDFName( "z" ) ); + addCharacterEncoding( 060, COSName.getPDFName( "zero" ) ); + } + + /** + * Convert this standard java object to a COS object. + * + * @return The cos object that matches this Java object. + */ + public COSBase getCOSObject() + { + return COSName.MAC_ROMAN_ENCODING; + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/encoding/PdfDocEncoding.java b/src/main/java/org/pdfbox/encoding/PdfDocEncoding.java new file mode 100644 index 0000000..1153002 --- /dev/null +++ b/src/main/java/org/pdfbox/encoding/PdfDocEncoding.java @@ -0,0 +1,289 @@ +/** + * Copyright (c) 2003, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.encoding; + +import org.pdfbox.cos.COSBase; +import org.pdfbox.cos.COSName; + +/** + * This is an interface to a text encoder. + * + * @author Ben Litchfield + * @version $Revision: 1.9 $ + */ +public class PdfDocEncoding extends Encoding +{ + /** + * Constructor. + */ + public PdfDocEncoding() + { + addCharacterEncoding( 0101, COSName.getPDFName( "A" ) ); + addCharacterEncoding( 0306, COSName.getPDFName( "AE" ) ); + addCharacterEncoding( 0301, COSName.getPDFName( "Aacute" ) ); + addCharacterEncoding( 0302, COSName.getPDFName( "Acircumflex" ) ); + addCharacterEncoding( 0304, COSName.getPDFName( "Adieresis" ) ); + addCharacterEncoding( 0300, COSName.getPDFName( "Agrave" ) ); + addCharacterEncoding( 0305, COSName.getPDFName( "Aring" ) ); + addCharacterEncoding( 0303, COSName.getPDFName( "Atilde" ) ); + addCharacterEncoding( 0102, COSName.getPDFName( "B" ) ); + addCharacterEncoding( 0103, COSName.getPDFName( "C" ) ); + addCharacterEncoding( 0307, COSName.getPDFName( "Ccedilla" ) ); + addCharacterEncoding( 0104, COSName.getPDFName( "D" ) ); + addCharacterEncoding( 0105, COSName.getPDFName( "E" ) ); + addCharacterEncoding( 0311, COSName.getPDFName( "Eacute" ) ); + addCharacterEncoding( 0312, COSName.getPDFName( "Ecircumflex" ) ); + addCharacterEncoding( 0313, COSName.getPDFName( "Edieresis" ) ); + addCharacterEncoding( 0310, COSName.getPDFName( "Egrave" ) ); + addCharacterEncoding( 0320, COSName.getPDFName( "Eth" ) ); + addCharacterEncoding( 0240, COSName.getPDFName( "Euro" ) ); + addCharacterEncoding( 0106, COSName.getPDFName( "F" ) ); + addCharacterEncoding( 0107, COSName.getPDFName( "G" ) ); + addCharacterEncoding( 0110, COSName.getPDFName( "H" ) ); + addCharacterEncoding( 0111, COSName.getPDFName( "I" ) ); + addCharacterEncoding( 0315, COSName.getPDFName( "Iacute" ) ); + addCharacterEncoding( 0316, COSName.getPDFName( "Icircumflex" ) ); + addCharacterEncoding( 0317, COSName.getPDFName( "Idieresis" ) ); + addCharacterEncoding( 0314, COSName.getPDFName( "Igrave" ) ); + addCharacterEncoding( 0112, COSName.getPDFName( "J" ) ); + addCharacterEncoding( 0113, COSName.getPDFName( "K" ) ); + addCharacterEncoding( 0114, COSName.getPDFName( "L" ) ); + addCharacterEncoding( 0225, COSName.getPDFName( "Lslash" ) ); + addCharacterEncoding( 0115, COSName.getPDFName( "M" ) ); + addCharacterEncoding( 0116, COSName.getPDFName( "N" ) ); + addCharacterEncoding( 0321, COSName.getPDFName( "Ntilde" ) ); + addCharacterEncoding( 0117, COSName.getPDFName( "O" ) ); + addCharacterEncoding( 0226, COSName.getPDFName( "OE" ) ); + addCharacterEncoding( 0323, COSName.getPDFName( "Oacute" ) ); + addCharacterEncoding( 0324, COSName.getPDFName( "Ocircumflex" ) ); + addCharacterEncoding( 0326, COSName.getPDFName( "Odieresis" ) ); + addCharacterEncoding( 0322, COSName.getPDFName( "Ograve" ) ); + addCharacterEncoding( 0330, COSName.getPDFName( "Oslash" ) ); + addCharacterEncoding( 0325, COSName.getPDFName( "Otilde" ) ); + addCharacterEncoding( 0120, COSName.getPDFName( "P" ) ); + addCharacterEncoding( 0121, COSName.getPDFName( "Q" ) ); + addCharacterEncoding( 0122, COSName.getPDFName( "R" ) ); + addCharacterEncoding( 0123, COSName.getPDFName( "S" ) ); + addCharacterEncoding( 0227, COSName.getPDFName( "Scaron" ) ); + addCharacterEncoding( 0124, COSName.getPDFName( "T" ) ); + addCharacterEncoding( 0336, COSName.getPDFName( "Thorn" ) ); + addCharacterEncoding( 0125, COSName.getPDFName( "U" ) ); + addCharacterEncoding( 0332, COSName.getPDFName( "Uacute" ) ); + addCharacterEncoding( 0333, COSName.getPDFName( "Ucircumflex" ) ); + addCharacterEncoding( 0334, COSName.getPDFName( "Udieresis" ) ); + addCharacterEncoding( 0331, COSName.getPDFName( "Ugrave" ) ); + addCharacterEncoding( 0126, COSName.getPDFName( "V" ) ); + addCharacterEncoding( 0127, COSName.getPDFName( "W" ) ); + addCharacterEncoding( 0130, COSName.getPDFName( "X" ) ); + addCharacterEncoding( 0131, COSName.getPDFName( "Y" ) ); + addCharacterEncoding( 0335, COSName.getPDFName( "Yacute" ) ); + addCharacterEncoding( 0230, COSName.getPDFName( "Ydieresis" ) ); + addCharacterEncoding( 0132, COSName.getPDFName( "Z" ) ); + addCharacterEncoding( 0231, COSName.getPDFName( "Zcaron" ) ); + addCharacterEncoding( 0141, COSName.getPDFName( "a" ) ); + addCharacterEncoding( 0341, COSName.getPDFName( "aacute" ) ); + addCharacterEncoding( 0342, COSName.getPDFName( "acircumflex" ) ); + addCharacterEncoding( 0264, COSName.getPDFName( "acute" ) ); + addCharacterEncoding( 0344, COSName.getPDFName( "adieresis" ) ); + addCharacterEncoding( 0346, COSName.getPDFName( "ae" ) ); + addCharacterEncoding( 0340, COSName.getPDFName( "agrave" ) ); + addCharacterEncoding( 046, COSName.getPDFName( "ampersand" ) ); + addCharacterEncoding( 0345, COSName.getPDFName( "aring" ) ); + addCharacterEncoding( 0136, COSName.getPDFName( "asciicircum" ) ); + addCharacterEncoding( 0176, COSName.getPDFName( "asciitilde" ) ); + addCharacterEncoding( 052, COSName.getPDFName( "asterisk" ) ); + addCharacterEncoding( 0100, COSName.getPDFName( "at" ) ); + addCharacterEncoding( 0343, COSName.getPDFName( "atilde" ) ); + addCharacterEncoding( 0142, COSName.getPDFName( "b" ) ); + addCharacterEncoding( 0134, COSName.getPDFName( "backslash" ) ); + addCharacterEncoding( 0174, COSName.getPDFName( "bar" ) ); + addCharacterEncoding( 0173, COSName.getPDFName( "braceleft" ) ); + addCharacterEncoding( 0175, COSName.getPDFName( "braceright" ) ); + addCharacterEncoding( 0133, COSName.getPDFName( "bracketleft" ) ); + addCharacterEncoding( 0135, COSName.getPDFName( "bracketright" ) ); + addCharacterEncoding( 030, COSName.getPDFName( "breve" ) ); + addCharacterEncoding( 0246, COSName.getPDFName( "brokenbar" ) ); + addCharacterEncoding( 0200, COSName.getPDFName( "bullet" ) ); + addCharacterEncoding( 0143, COSName.getPDFName( "c" ) ); + addCharacterEncoding( 031, COSName.getPDFName( "caron" ) ); + addCharacterEncoding( 0347, COSName.getPDFName( "ccedilla" ) ); + addCharacterEncoding( 0270, COSName.getPDFName( "cedilla" ) ); + addCharacterEncoding( 0242, COSName.getPDFName( "cent" ) ); + addCharacterEncoding( 032, COSName.getPDFName( "circumflex" ) ); + addCharacterEncoding( 072, COSName.getPDFName( "colon" ) ); + addCharacterEncoding( 054, COSName.getPDFName( "comma" ) ); + addCharacterEncoding( 0251, COSName.getPDFName( "copyright" ) ); + addCharacterEncoding( 0244, COSName.getPDFName( "currency1" ) ); + addCharacterEncoding( 0144, COSName.getPDFName( "d" ) ); + addCharacterEncoding( 0201, COSName.getPDFName( "dagger" ) ); + addCharacterEncoding( 0202, COSName.getPDFName( "daggerdbl" ) ); + addCharacterEncoding( 0260, COSName.getPDFName( "degree" ) ); + addCharacterEncoding( 0250, COSName.getPDFName( "dieresis" ) ); + addCharacterEncoding( 0367, COSName.getPDFName( "divide" ) ); + addCharacterEncoding( 044, COSName.getPDFName( "dollar" ) ); + addCharacterEncoding( 033, COSName.getPDFName( "dotaccent" ) ); + addCharacterEncoding( 0232, COSName.getPDFName( "dotlessi" ) ); + addCharacterEncoding( 0145, COSName.getPDFName( "e" ) ); + addCharacterEncoding( 0351, COSName.getPDFName( "eacute" ) ); + addCharacterEncoding( 0352, COSName.getPDFName( "ecircumflex" ) ); + addCharacterEncoding( 0353, COSName.getPDFName( "edieresis" ) ); + addCharacterEncoding( 0350, COSName.getPDFName( "egrave" ) ); + addCharacterEncoding( 070, COSName.getPDFName( "eight" ) ); + addCharacterEncoding( 0203, COSName.getPDFName( "ellipsis" ) ); + addCharacterEncoding( 0204, COSName.getPDFName( "emdash" ) ); + addCharacterEncoding( 0205, COSName.getPDFName( "endash" ) ); + addCharacterEncoding( 075, COSName.getPDFName( "equal" ) ); + addCharacterEncoding( 0360, COSName.getPDFName( "eth" ) ); + addCharacterEncoding( 041, COSName.getPDFName( "exclam" ) ); + addCharacterEncoding( 0241, COSName.getPDFName( "exclamdown" ) ); + addCharacterEncoding( 0146, COSName.getPDFName( "f" ) ); + addCharacterEncoding( 0223, COSName.getPDFName( "fi" ) ); + addCharacterEncoding( 065, COSName.getPDFName( "five" ) ); + addCharacterEncoding( 0224, COSName.getPDFName( "fl" ) ); + addCharacterEncoding( 0206, COSName.getPDFName( "florin" ) ); + addCharacterEncoding( 064, COSName.getPDFName( "four" ) ); + addCharacterEncoding( 0207, COSName.getPDFName( "fraction" ) ); + addCharacterEncoding( 0147, COSName.getPDFName( "g" ) ); + addCharacterEncoding( 0337, COSName.getPDFName( "germandbls" ) ); + addCharacterEncoding( 0140, COSName.getPDFName( "grave" ) ); + addCharacterEncoding( 076, COSName.getPDFName( "greater" ) ); + addCharacterEncoding( 0253, COSName.getPDFName( "guillemotleft" ) ); + addCharacterEncoding( 0273, COSName.getPDFName( "guillemotright" ) ); + addCharacterEncoding( 0210, COSName.getPDFName( "guilsinglleft" ) ); + addCharacterEncoding( 0211, COSName.getPDFName( "guilsinglright" ) ); + addCharacterEncoding( 0150, COSName.getPDFName( "h" ) ); + addCharacterEncoding( 034, COSName.getPDFName( "hungarumlaut" ) ); + addCharacterEncoding( 055, COSName.getPDFName( "hyphen" ) ); + addCharacterEncoding( 0151, COSName.getPDFName( "i" ) ); + addCharacterEncoding( 0355, COSName.getPDFName( "iacute" ) ); + addCharacterEncoding( 0356, COSName.getPDFName( "icircumflex" ) ); + addCharacterEncoding( 0357, COSName.getPDFName( "idieresis" ) ); + addCharacterEncoding( 0354, COSName.getPDFName( "igrave" ) ); + addCharacterEncoding( 0152, COSName.getPDFName( "j" ) ); + addCharacterEncoding( 0153, COSName.getPDFName( "k" ) ); + addCharacterEncoding( 0154, COSName.getPDFName( "l" ) ); + addCharacterEncoding( 074, COSName.getPDFName( "less" ) ); + addCharacterEncoding( 0254, COSName.getPDFName( "logicalnot" ) ); + addCharacterEncoding( 0233, COSName.getPDFName( "lslash" ) ); + addCharacterEncoding( 0155, COSName.getPDFName( "m" ) ); + addCharacterEncoding( 0257, COSName.getPDFName( "macron" ) ); + addCharacterEncoding( 0212, COSName.getPDFName( "minus" ) ); + addCharacterEncoding( 0265, COSName.getPDFName( "mu" ) ); + addCharacterEncoding( 0327, COSName.getPDFName( "multiply" ) ); + addCharacterEncoding( 0156, COSName.getPDFName( "n" ) ); + addCharacterEncoding( 071, COSName.getPDFName( "nine" ) ); + addCharacterEncoding( 0361, COSName.getPDFName( "ntilde" ) ); + addCharacterEncoding( 043, COSName.getPDFName( "numbersign" ) ); + addCharacterEncoding( 0157, COSName.getPDFName( "o" ) ); + addCharacterEncoding( 0363, COSName.getPDFName( "oacute" ) ); + addCharacterEncoding( 0364, COSName.getPDFName( "ocircumflex" ) ); + addCharacterEncoding( 0366, COSName.getPDFName( "odieresis" ) ); + addCharacterEncoding( 0234, COSName.getPDFName( "oe" ) ); + addCharacterEncoding( 035, COSName.getPDFName( "ogonek" ) ); + addCharacterEncoding( 0362, COSName.getPDFName( "ograve" ) ); + addCharacterEncoding( 061, COSName.getPDFName( "one" ) ); + addCharacterEncoding( 0275, COSName.getPDFName( "onehalf" ) ); + addCharacterEncoding( 0274, COSName.getPDFName( "onequarter" ) ); + addCharacterEncoding( 0271, COSName.getPDFName( "onesuperior" ) ); + addCharacterEncoding( 0252, COSName.getPDFName( "ordfeminine" ) ); + addCharacterEncoding( 0272, COSName.getPDFName( "ordmasculine" ) ); + addCharacterEncoding( 0370, COSName.getPDFName( "oslash" ) ); + addCharacterEncoding( 0365, COSName.getPDFName( "otilde" ) ); + addCharacterEncoding( 0160, COSName.getPDFName( "p" ) ); + addCharacterEncoding( 0266, COSName.getPDFName( "paragraph" ) ); + addCharacterEncoding( 050, COSName.getPDFName( "parenleft" ) ); + addCharacterEncoding( 051, COSName.getPDFName( "parenright" ) ); + addCharacterEncoding( 045, COSName.getPDFName( "percent" ) ); + addCharacterEncoding( 056, COSName.getPDFName( "period" ) ); + addCharacterEncoding( 0267, COSName.getPDFName( "periodcentered" ) ); + addCharacterEncoding( 0213, COSName.getPDFName( "perthousand" ) ); + addCharacterEncoding( 053, COSName.getPDFName( "plus" ) ); + addCharacterEncoding( 0261, COSName.getPDFName( "plusminus" ) ); + addCharacterEncoding( 0161, COSName.getPDFName( "q" ) ); + addCharacterEncoding( 077, COSName.getPDFName( "question" ) ); + addCharacterEncoding( 0277, COSName.getPDFName( "questiondown" ) ); + addCharacterEncoding( 042, COSName.getPDFName( "quotedbl" ) ); + addCharacterEncoding( 0214, COSName.getPDFName( "quotedblbase" ) ); + addCharacterEncoding( 0215, COSName.getPDFName( "quotedblleft" ) ); + addCharacterEncoding( 0216, COSName.getPDFName( "quotedblright" ) ); + addCharacterEncoding( 0217, COSName.getPDFName( "quoteleft" ) ); + addCharacterEncoding( 0220, COSName.getPDFName( "quoteright" ) ); + addCharacterEncoding( 0221, COSName.getPDFName( "quotesinglbase" ) ); + addCharacterEncoding( 047, COSName.getPDFName( "quotesingle" ) ); + addCharacterEncoding( 0162, COSName.getPDFName( "r" ) ); + addCharacterEncoding( 0256, COSName.getPDFName( "registered" ) ); + addCharacterEncoding( 036, COSName.getPDFName( "ring" ) ); + addCharacterEncoding( 0163, COSName.getPDFName( "s" ) ); + addCharacterEncoding( 0235, COSName.getPDFName( "scaron" ) ); + addCharacterEncoding( 0247, COSName.getPDFName( "section" ) ); + addCharacterEncoding( 073, COSName.getPDFName( "semicolon" ) ); + addCharacterEncoding( 067, COSName.getPDFName( "seven" ) ); + addCharacterEncoding( 066, COSName.getPDFName( "six" ) ); + addCharacterEncoding( 057, COSName.getPDFName( "slash" ) ); + addCharacterEncoding( 040, COSName.getPDFName( "space" ) ); + addCharacterEncoding( 0243, COSName.getPDFName( "sterling" ) ); + addCharacterEncoding( 0164, COSName.getPDFName( "t" ) ); + addCharacterEncoding( 0376, COSName.getPDFName( "thorn" ) ); + addCharacterEncoding( 063, COSName.getPDFName( "three" ) ); + addCharacterEncoding( 0276, COSName.getPDFName( "threequarters" ) ); + addCharacterEncoding( 0263, COSName.getPDFName( "threesuperior" ) ); + addCharacterEncoding( 037, COSName.getPDFName( "tilde" ) ); + addCharacterEncoding( 0222, COSName.getPDFName( "trademark" ) ); + addCharacterEncoding( 062, COSName.getPDFName( "two" ) ); + addCharacterEncoding( 0262, COSName.getPDFName( "twosuperior" ) ); + addCharacterEncoding( 0165, COSName.getPDFName( "u" ) ); + addCharacterEncoding( 0372, COSName.getPDFName( "uacute" ) ); + addCharacterEncoding( 0373, COSName.getPDFName( "ucircumflex" ) ); + addCharacterEncoding( 0374, COSName.getPDFName( "udieresis" ) ); + addCharacterEncoding( 0371, COSName.getPDFName( "ugrave" ) ); + addCharacterEncoding( 0137, COSName.getPDFName( "underscore" ) ); + addCharacterEncoding( 0166, COSName.getPDFName( "v" ) ); + addCharacterEncoding( 0167, COSName.getPDFName( "w" ) ); + addCharacterEncoding( 0170, COSName.getPDFName( "x" ) ); + addCharacterEncoding( 0171, COSName.getPDFName( "y" ) ); + addCharacterEncoding( 0375, COSName.getPDFName( "yacute" ) ); + addCharacterEncoding( 0377, COSName.getPDFName( "ydieresis" ) ); + addCharacterEncoding( 0245, COSName.getPDFName( "yen" ) ); + addCharacterEncoding( 0172, COSName.getPDFName( "z" ) ); + addCharacterEncoding( 0236, COSName.getPDFName( "zcaron" ) ); + addCharacterEncoding( 060, COSName.getPDFName( "zero" ) ); + } + + /** + * Convert this standard java object to a COS object. + * + * @return The cos object that matches this Java object. + */ + public COSBase getCOSObject() + { + return COSName.PDF_DOC_ENCODING; + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/encoding/StandardEncoding.java b/src/main/java/org/pdfbox/encoding/StandardEncoding.java new file mode 100644 index 0000000..229adb4 --- /dev/null +++ b/src/main/java/org/pdfbox/encoding/StandardEncoding.java @@ -0,0 +1,209 @@ +/** + * Copyright (c) 2003-2004, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.encoding; + +import org.pdfbox.cos.COSBase; +import org.pdfbox.cos.COSName; + +/** + * This is an interface to a text encoder. + * + * @author Ben Litchfield + * @version $Revision: 1.9 $ + */ +public class StandardEncoding extends Encoding +{ + /** + * Constructor. + */ + public StandardEncoding() + { + addCharacterEncoding( 0101, COSName.getPDFName( "A" ) ); + addCharacterEncoding( 0341, COSName.getPDFName( "AE" ) ); + addCharacterEncoding( 0102, COSName.getPDFName( "B" ) ); + addCharacterEncoding( 0103, COSName.getPDFName( "C" ) ); + addCharacterEncoding( 0104, COSName.getPDFName( "D" ) ); + addCharacterEncoding( 0105, COSName.getPDFName( "E" ) ); + addCharacterEncoding( 0106, COSName.getPDFName( "F" ) ); + addCharacterEncoding( 0107, COSName.getPDFName( "G" ) ); + addCharacterEncoding( 0110, COSName.getPDFName( "H" ) ); + addCharacterEncoding( 0111, COSName.getPDFName( "I" ) ); + addCharacterEncoding( 0112, COSName.getPDFName( "J" ) ); + addCharacterEncoding( 0113, COSName.getPDFName( "K" ) ); + addCharacterEncoding( 0114, COSName.getPDFName( "L" ) ); + addCharacterEncoding( 0350, COSName.getPDFName( "Lslash" ) ); + addCharacterEncoding( 0115, COSName.getPDFName( "M" ) ); + addCharacterEncoding( 0116, COSName.getPDFName( "N" ) ); + addCharacterEncoding( 0117, COSName.getPDFName( "O" ) ); + addCharacterEncoding( 0352, COSName.getPDFName( "OE" ) ); + addCharacterEncoding( 0351, COSName.getPDFName( "Oslash" ) ); + addCharacterEncoding( 0120, COSName.getPDFName( "P" ) ); + addCharacterEncoding( 0121, COSName.getPDFName( "Q" ) ); + addCharacterEncoding( 0122, COSName.getPDFName( "R" ) ); + addCharacterEncoding( 0123, COSName.getPDFName( "S" ) ); + addCharacterEncoding( 0124, COSName.getPDFName( "T" ) ); + addCharacterEncoding( 0125, COSName.getPDFName( "U" ) ); + addCharacterEncoding( 0126, COSName.getPDFName( "V" ) ); + addCharacterEncoding( 0127, COSName.getPDFName( "W" ) ); + addCharacterEncoding( 0130, COSName.getPDFName( "X" ) ); + addCharacterEncoding( 0131, COSName.getPDFName( "Y" ) ); + addCharacterEncoding( 0132, COSName.getPDFName( "Z" ) ); + addCharacterEncoding( 0141, COSName.getPDFName( "a" ) ); + addCharacterEncoding( 0302, COSName.getPDFName( "acute" ) ); + addCharacterEncoding( 0361, COSName.getPDFName( "ae" ) ); + addCharacterEncoding( 0046, COSName.getPDFName( "ampersand" ) ); + addCharacterEncoding( 0136, COSName.getPDFName( "asciicircum" ) ); + addCharacterEncoding( 0176, COSName.getPDFName( "asciitilde" ) ); + addCharacterEncoding( 0052, COSName.getPDFName( "asterisk" ) ); + addCharacterEncoding( 0100, COSName.getPDFName( "at" ) ); + addCharacterEncoding( 0142, COSName.getPDFName( "b" ) ); + addCharacterEncoding( 0134, COSName.getPDFName( "backslash" ) ); + addCharacterEncoding( 0174, COSName.getPDFName( "bar" ) ); + addCharacterEncoding( 0173, COSName.getPDFName( "braceleft" ) ); + addCharacterEncoding( 0175, COSName.getPDFName( "braceright" ) ); + addCharacterEncoding( 0133, COSName.getPDFName( "bracketleft" ) ); + addCharacterEncoding( 0135, COSName.getPDFName( "bracketright" ) ); + addCharacterEncoding( 0306, COSName.getPDFName( "breve" ) ); + addCharacterEncoding( 0267, COSName.getPDFName( "bullet" ) ); + addCharacterEncoding( 0143, COSName.getPDFName( "c" ) ); + addCharacterEncoding( 0317, COSName.getPDFName( "caron" ) ); + addCharacterEncoding( 0313, COSName.getPDFName( "cedilla" ) ); + addCharacterEncoding( 0242, COSName.getPDFName( "cent" ) ); + addCharacterEncoding( 0303, COSName.getPDFName( "circumflex" ) ); + addCharacterEncoding( 0072, COSName.getPDFName( "colon" ) ); + addCharacterEncoding( 0054, COSName.getPDFName( "comma" ) ); + addCharacterEncoding( 0250, COSName.getPDFName( "currency1" ) ); + addCharacterEncoding( 0144, COSName.getPDFName( "d" ) ); + addCharacterEncoding( 0262, COSName.getPDFName( "dagger" ) ); + addCharacterEncoding( 0263, COSName.getPDFName( "daggerdbl" ) ); + addCharacterEncoding( 0310, COSName.getPDFName( "dieresis" ) ); + addCharacterEncoding( 0044, COSName.getPDFName( "dollar" ) ); + addCharacterEncoding( 0307, COSName.getPDFName( "dotaccent" ) ); + addCharacterEncoding( 0365, COSName.getPDFName( "dotlessi" ) ); + addCharacterEncoding( 0145, COSName.getPDFName( "e" ) ); + addCharacterEncoding( 0070, COSName.getPDFName( "eight" ) ); + addCharacterEncoding( 0274, COSName.getPDFName( "ellipsis" ) ); + addCharacterEncoding( 0320, COSName.getPDFName( "emdash" ) ); + addCharacterEncoding( 0261, COSName.getPDFName( "endash" ) ); + addCharacterEncoding( 0075, COSName.getPDFName( "equal" ) ); + addCharacterEncoding( 0041, COSName.getPDFName( "exclam" ) ); + addCharacterEncoding( 0241, COSName.getPDFName( "exclamdown" ) ); + addCharacterEncoding( 0146, COSName.getPDFName( "f" ) ); + addCharacterEncoding( 0256, COSName.getPDFName( "fi" ) ); + addCharacterEncoding( 0065, COSName.getPDFName( "five" ) ); + addCharacterEncoding( 0257, COSName.getPDFName( "fl" ) ); + addCharacterEncoding( 0246, COSName.getPDFName( "florin" ) ); + addCharacterEncoding( 0064, COSName.getPDFName( "four" ) ); + addCharacterEncoding( 0244, COSName.getPDFName( "fraction" ) ); + addCharacterEncoding( 0147, COSName.getPDFName( "g" ) ); + addCharacterEncoding( 0373, COSName.getPDFName( "germandbls" ) ); + addCharacterEncoding( 0301, COSName.getPDFName( "grave" ) ); + addCharacterEncoding( 0076, COSName.getPDFName( "greater" ) ); + addCharacterEncoding( 0253, COSName.getPDFName( "guillemotleft" ) ); + addCharacterEncoding( 0273, COSName.getPDFName( "guillemotright" ) ); + addCharacterEncoding( 0254, COSName.getPDFName( "guilsinglleft" ) ); + addCharacterEncoding( 0255, COSName.getPDFName( "guilsinglright" ) ); + addCharacterEncoding( 0150, COSName.getPDFName( "h" ) ); + addCharacterEncoding( 0315, COSName.getPDFName( "hungarumlaut" ) ); + addCharacterEncoding( 0055, COSName.getPDFName( "hyphen" ) ); + addCharacterEncoding( 0151, COSName.getPDFName( "i" ) ); + addCharacterEncoding( 0152, COSName.getPDFName( "j" ) ); + addCharacterEncoding( 0153, COSName.getPDFName( "k" ) ); + addCharacterEncoding( 0154, COSName.getPDFName( "l" ) ); + addCharacterEncoding( 0074, COSName.getPDFName( "less" ) ); + addCharacterEncoding( 0370, COSName.getPDFName( "lslash" ) ); + addCharacterEncoding( 0155, COSName.getPDFName( "m" ) ); + addCharacterEncoding( 0305, COSName.getPDFName( "macron" ) ); + addCharacterEncoding( 0156, COSName.getPDFName( "n" ) ); + addCharacterEncoding( 0071, COSName.getPDFName( "nine" ) ); + addCharacterEncoding( 0043, COSName.getPDFName( "numbersign" ) ); + addCharacterEncoding( 0157, COSName.getPDFName( "o" ) ); + addCharacterEncoding( 0372, COSName.getPDFName( "oe" ) ); + addCharacterEncoding( 0316, COSName.getPDFName( "ogonek" ) ); + addCharacterEncoding( 0061, COSName.getPDFName( "one" ) ); + addCharacterEncoding( 0343, COSName.getPDFName( "ordfeminine" ) ); + addCharacterEncoding( 0353, COSName.getPDFName( "ordmasculine" ) ); + addCharacterEncoding( 0371, COSName.getPDFName( "oslash" ) ); + addCharacterEncoding( 0160, COSName.getPDFName( "p" ) ); + addCharacterEncoding( 0266, COSName.getPDFName( "paragraph" ) ); + addCharacterEncoding( 0050, COSName.getPDFName( "parenleft" ) ); + addCharacterEncoding( 0051, COSName.getPDFName( "parenright" ) ); + addCharacterEncoding( 0045, COSName.getPDFName( "percent" ) ); + addCharacterEncoding( 0056, COSName.getPDFName( "period" ) ); + addCharacterEncoding( 0264, COSName.getPDFName( "periodcentered" ) ); + addCharacterEncoding( 0275, COSName.getPDFName( "perthousand" ) ); + addCharacterEncoding( 0053, COSName.getPDFName( "plus" ) ); + addCharacterEncoding( 0161, COSName.getPDFName( "q" ) ); + addCharacterEncoding( 0077, COSName.getPDFName( "question" ) ); + addCharacterEncoding( 0277, COSName.getPDFName( "questiondown" ) ); + addCharacterEncoding( 0042, COSName.getPDFName( "quotedbl" ) ); + addCharacterEncoding( 0271, COSName.getPDFName( "quotedblbase" ) ); + addCharacterEncoding( 0252, COSName.getPDFName( "quotedblleft" ) ); + addCharacterEncoding( 0272, COSName.getPDFName( "quotedblright" ) ); + addCharacterEncoding( 0140, COSName.getPDFName( "quoteleft" ) ); + addCharacterEncoding( 0047, COSName.getPDFName( "quoteright" ) ); + addCharacterEncoding( 0270, COSName.getPDFName( "quotesinglbase" ) ); + addCharacterEncoding( 0251, COSName.getPDFName( "quotesingle" ) ); + addCharacterEncoding( 0162, COSName.getPDFName( "r" ) ); + addCharacterEncoding( 0312, COSName.getPDFName( "ring" ) ); + addCharacterEncoding( 0163, COSName.getPDFName( "s" ) ); + addCharacterEncoding( 0247, COSName.getPDFName( "section" ) ); + addCharacterEncoding( 0073, COSName.getPDFName( "semicolon" ) ); + addCharacterEncoding( 0067, COSName.getPDFName( "seven" ) ); + addCharacterEncoding( 0066, COSName.getPDFName( "six" ) ); + addCharacterEncoding( 0057, COSName.getPDFName( "slash" ) ); + addCharacterEncoding( 0040, COSName.getPDFName( "space" ) ); + addCharacterEncoding( 0243, COSName.getPDFName( "sterling" ) ); + addCharacterEncoding( 0164, COSName.getPDFName( "t" ) ); + addCharacterEncoding( 0063, COSName.getPDFName( "three" ) ); + addCharacterEncoding( 0304, COSName.getPDFName( "tilde" ) ); + addCharacterEncoding( 0062, COSName.getPDFName( "two" ) ); + addCharacterEncoding( 0165, COSName.getPDFName( "u" ) ); + addCharacterEncoding( 0137, COSName.getPDFName( "underscore" ) ); + addCharacterEncoding( 0166, COSName.getPDFName( "v" ) ); + addCharacterEncoding( 0167, COSName.getPDFName( "w" ) ); + addCharacterEncoding( 0170, COSName.getPDFName( "x" ) ); + addCharacterEncoding( 0171, COSName.getPDFName( "y" ) ); + addCharacterEncoding( 0245, COSName.getPDFName( "yen" ) ); + addCharacterEncoding( 0172, COSName.getPDFName( "z" ) ); + addCharacterEncoding( 0060, COSName.getPDFName( "zero" ) ); + } + + /** + * Convert this standard java object to a COS object. + * + * @return The cos object that matches this Java object. + */ + public COSBase getCOSObject() + { + return COSName.STANDARD_ENCODING; + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/encoding/WinAnsiEncoding.java b/src/main/java/org/pdfbox/encoding/WinAnsiEncoding.java new file mode 100644 index 0000000..dcbe872 --- /dev/null +++ b/src/main/java/org/pdfbox/encoding/WinAnsiEncoding.java @@ -0,0 +1,281 @@ +/** + * Copyright (c) 2003, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.encoding; + +import org.pdfbox.cos.COSBase; +import org.pdfbox.cos.COSName; + +/** + * This the win ansi encoding. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.9 $ + */ +public class WinAnsiEncoding extends Encoding +{ + /** + * Constructor. + */ + public WinAnsiEncoding() + { + addCharacterEncoding( 0101, COSName.getPDFName( "A" ) ); + addCharacterEncoding( 0306, COSName.getPDFName( "AE" ) ); + addCharacterEncoding( 0301, COSName.getPDFName( "Aacute" ) ); + addCharacterEncoding( 0302, COSName.getPDFName( "Acircumflex" ) ); + addCharacterEncoding( 0304, COSName.getPDFName( "Adieresis" ) ); + addCharacterEncoding( 0300, COSName.getPDFName( "Agrave" ) ); + addCharacterEncoding( 0305, COSName.getPDFName( "Aring" ) ); + addCharacterEncoding( 0303, COSName.getPDFName( "Atilde" ) ); + addCharacterEncoding( 0102, COSName.getPDFName( "B" ) ); + addCharacterEncoding( 0103, COSName.getPDFName( "C" ) ); + addCharacterEncoding( 0307, COSName.getPDFName( "Ccedilla" ) ); + addCharacterEncoding( 0104, COSName.getPDFName( "D" ) ); + addCharacterEncoding( 0105, COSName.getPDFName( "E" ) ); + addCharacterEncoding( 0311, COSName.getPDFName( "Eacute" ) ); + addCharacterEncoding( 0312, COSName.getPDFName( "Ecircumflex" ) ); + addCharacterEncoding( 0313, COSName.getPDFName( "Edieresis" ) ); + addCharacterEncoding( 0310, COSName.getPDFName( "Egrave" ) ); + addCharacterEncoding( 0320, COSName.getPDFName( "Eth" ) ); + addCharacterEncoding( 0200, COSName.getPDFName( "Euro" ) ); + addCharacterEncoding( 0106, COSName.getPDFName( "F" ) ); + addCharacterEncoding( 0107, COSName.getPDFName( "G" ) ); + addCharacterEncoding( 0110, COSName.getPDFName( "H" ) ); + addCharacterEncoding( 0111, COSName.getPDFName( "I" ) ); + addCharacterEncoding( 0315, COSName.getPDFName( "Iacute" ) ); + addCharacterEncoding( 0316, COSName.getPDFName( "Icircumflex" ) ); + addCharacterEncoding( 0317, COSName.getPDFName( "Idieresis" ) ); + addCharacterEncoding( 0314, COSName.getPDFName( "Igrave" ) ); + addCharacterEncoding( 0112, COSName.getPDFName( "J" ) ); + addCharacterEncoding( 0113, COSName.getPDFName( "K" ) ); + addCharacterEncoding( 0114, COSName.getPDFName( "L" ) ); + addCharacterEncoding( 0115, COSName.getPDFName( "M" ) ); + addCharacterEncoding( 0116, COSName.getPDFName( "N" ) ); + addCharacterEncoding( 0321, COSName.getPDFName( "Ntilde" ) ); + addCharacterEncoding( 0117, COSName.getPDFName( "O" ) ); + addCharacterEncoding( 0214, COSName.getPDFName( "OE" ) ); + addCharacterEncoding( 0323, COSName.getPDFName( "Oacute" ) ); + addCharacterEncoding( 0324, COSName.getPDFName( "Ocircumflex" ) ); + addCharacterEncoding( 0326, COSName.getPDFName( "Odieresis" ) ); + addCharacterEncoding( 0322, COSName.getPDFName( "Ograve" ) ); + addCharacterEncoding( 0330, COSName.getPDFName( "Oslash" ) ); + addCharacterEncoding( 0325, COSName.getPDFName( "Otilde" ) ); + addCharacterEncoding( 0120, COSName.getPDFName( "P" ) ); + addCharacterEncoding( 0121, COSName.getPDFName( "Q" ) ); + addCharacterEncoding( 0122, COSName.getPDFName( "R" ) ); + addCharacterEncoding( 0123, COSName.getPDFName( "S" ) ); + addCharacterEncoding( 0212, COSName.getPDFName( "Scaron" ) ); + addCharacterEncoding( 0124, COSName.getPDFName( "T" ) ); + addCharacterEncoding( 0336, COSName.getPDFName( "Thorn" ) ); + addCharacterEncoding( 0125, COSName.getPDFName( "U" ) ); + addCharacterEncoding( 0332, COSName.getPDFName( "Uacute" ) ); + addCharacterEncoding( 0333, COSName.getPDFName( "Ucircumflex" ) ); + addCharacterEncoding( 0334, COSName.getPDFName( "Udieresis" ) ); + addCharacterEncoding( 0331, COSName.getPDFName( "Ugrave" ) ); + addCharacterEncoding( 0126, COSName.getPDFName( "V" ) ); + addCharacterEncoding( 0127, COSName.getPDFName( "W" ) ); + addCharacterEncoding( 0130, COSName.getPDFName( "X" ) ); + addCharacterEncoding( 0131, COSName.getPDFName( "Y" ) ); + addCharacterEncoding( 0335, COSName.getPDFName( "Yacute" ) ); + addCharacterEncoding( 0237, COSName.getPDFName( "Ydieresis" ) ); + addCharacterEncoding( 0132, COSName.getPDFName( "Z" ) ); + addCharacterEncoding( 0216, COSName.getPDFName( "Zcaron" ) ); + addCharacterEncoding( 0141, COSName.getPDFName( "a" ) ); + addCharacterEncoding( 0341, COSName.getPDFName( "aacute" ) ); + addCharacterEncoding( 0342, COSName.getPDFName( "acircumflex" ) ); + addCharacterEncoding( 0264, COSName.getPDFName( "acute" ) ); + addCharacterEncoding( 0344, COSName.getPDFName( "adieresis" ) ); + addCharacterEncoding( 0346, COSName.getPDFName( "ae" ) ); + addCharacterEncoding( 0340, COSName.getPDFName( "agrave" ) ); + addCharacterEncoding( 046, COSName.getPDFName( "ampersand" ) ); + addCharacterEncoding( 0345, COSName.getPDFName( "aring" ) ); + addCharacterEncoding( 0136, COSName.getPDFName( "asciicircum" ) ); + addCharacterEncoding( 0176, COSName.getPDFName( "asciitilde" ) ); + addCharacterEncoding( 052, COSName.getPDFName( "asterisk" ) ); + addCharacterEncoding( 0100, COSName.getPDFName( "at" ) ); + addCharacterEncoding( 0343, COSName.getPDFName( "atilde" ) ); + addCharacterEncoding( 0142, COSName.getPDFName( "b" ) ); + addCharacterEncoding( 0134, COSName.getPDFName( "backslash" ) ); + addCharacterEncoding( 0174, COSName.getPDFName( "bar" ) ); + addCharacterEncoding( 0173, COSName.getPDFName( "braceleft" ) ); + addCharacterEncoding( 0175, COSName.getPDFName( "braceright" ) ); + addCharacterEncoding( 0133, COSName.getPDFName( "bracketleft" ) ); + addCharacterEncoding( 0135, COSName.getPDFName( "bracketright" ) ); + addCharacterEncoding( 0246, COSName.getPDFName( "brokenbar" ) ); + addCharacterEncoding( 0225, COSName.getPDFName( "bullet" ) ); + addCharacterEncoding( 0143, COSName.getPDFName( "c" ) ); + addCharacterEncoding( 0347, COSName.getPDFName( "ccedilla" ) ); + addCharacterEncoding( 0270, COSName.getPDFName( "cedilla" ) ); + addCharacterEncoding( 0242, COSName.getPDFName( "cent" ) ); + addCharacterEncoding( 0210, COSName.getPDFName( "circumflex" ) ); + addCharacterEncoding( 072, COSName.getPDFName( "colon" ) ); + addCharacterEncoding( 054, COSName.getPDFName( "comma" ) ); + addCharacterEncoding( 0251, COSName.getPDFName( "copyright" ) ); + + /** + * Added because cweb.pdf uses circlecopyrt + */ + addCharacterEncoding( 0251, COSName.getPDFName( "circlecopyrt" ) ); + addCharacterEncoding( 0244, COSName.getPDFName( "currency" ) ); + addCharacterEncoding( 0144, COSName.getPDFName( "d" ) ); + addCharacterEncoding( 0206, COSName.getPDFName( "dagger" ) ); + addCharacterEncoding( 0207, COSName.getPDFName( "daggerdbl" ) ); + addCharacterEncoding( 0260, COSName.getPDFName( "degree" ) ); + addCharacterEncoding( 0250, COSName.getPDFName( "dieresis" ) ); + addCharacterEncoding( 0367, COSName.getPDFName( "divide" ) ); + addCharacterEncoding( 044, COSName.getPDFName( "dollar" ) ); + addCharacterEncoding( 0145, COSName.getPDFName( "e" ) ); + addCharacterEncoding( 0351, COSName.getPDFName( "eacute" ) ); + addCharacterEncoding( 0352, COSName.getPDFName( "ecircumflex" ) ); + addCharacterEncoding( 0353, COSName.getPDFName( "edieresis" ) ); + addCharacterEncoding( 0350, COSName.getPDFName( "egrave" ) ); + addCharacterEncoding( 070, COSName.getPDFName( "eight" ) ); + addCharacterEncoding( 0205, COSName.getPDFName( "ellipsis" ) ); + addCharacterEncoding( 0227, COSName.getPDFName( "emdash" ) ); + addCharacterEncoding( 0226, COSName.getPDFName( "endash" ) ); + addCharacterEncoding( 075, COSName.getPDFName( "equal" ) ); + addCharacterEncoding( 0360, COSName.getPDFName( "eth" ) ); + addCharacterEncoding( 041, COSName.getPDFName( "exclam" ) ); + addCharacterEncoding( 0241, COSName.getPDFName( "exclamdown" ) ); + addCharacterEncoding( 0146, COSName.getPDFName( "f" ) ); + addCharacterEncoding( 065, COSName.getPDFName( "five" ) ); + addCharacterEncoding( 0203, COSName.getPDFName( "florin" ) ); + addCharacterEncoding( 064, COSName.getPDFName( "four" ) ); + addCharacterEncoding( 0147, COSName.getPDFName( "g" ) ); + addCharacterEncoding( 0337, COSName.getPDFName( "germandbls" ) ); + addCharacterEncoding( 0140, COSName.getPDFName( "grave" ) ); + addCharacterEncoding( 076, COSName.getPDFName( "greater" ) ); + addCharacterEncoding( 0253, COSName.getPDFName( "guillemotleft" ) ); + addCharacterEncoding( 0273, COSName.getPDFName( "guillemotright" ) ); + addCharacterEncoding( 0213, COSName.getPDFName( "guilsinglleft" ) ); + addCharacterEncoding( 0233, COSName.getPDFName( "guilsinglright" ) ); + addCharacterEncoding( 0150, COSName.getPDFName( "h" ) ); + addCharacterEncoding( 055, COSName.getPDFName( "hyphen" ) ); + addCharacterEncoding( 0151, COSName.getPDFName( "i" ) ); + addCharacterEncoding( 0355, COSName.getPDFName( "iacute" ) ); + addCharacterEncoding( 0356, COSName.getPDFName( "icircumflex" ) ); + addCharacterEncoding( 0357, COSName.getPDFName( "idieresis" ) ); + addCharacterEncoding( 0354, COSName.getPDFName( "igrave" ) ); + addCharacterEncoding( 0152, COSName.getPDFName( "j" ) ); + addCharacterEncoding( 0153, COSName.getPDFName( "k" ) ); + addCharacterEncoding( 0154, COSName.getPDFName( "l" ) ); + addCharacterEncoding( 074, COSName.getPDFName( "less" ) ); + addCharacterEncoding( 0254, COSName.getPDFName( "logicalnot" ) ); + addCharacterEncoding( 0155, COSName.getPDFName( "m" ) ); + addCharacterEncoding( 0257, COSName.getPDFName( "macron" ) ); + addCharacterEncoding( 0265, COSName.getPDFName( "mu" ) ); + addCharacterEncoding( 0327, COSName.getPDFName( "multiply" ) ); + addCharacterEncoding( 0156, COSName.getPDFName( "n" ) ); + addCharacterEncoding( 071, COSName.getPDFName( "nine" ) ); + addCharacterEncoding( 0361, COSName.getPDFName( "ntilde" ) ); + addCharacterEncoding( 043, COSName.getPDFName( "numbersign" ) ); + addCharacterEncoding( 0157, COSName.getPDFName( "o" ) ); + addCharacterEncoding( 0363, COSName.getPDFName( "oacute" ) ); + addCharacterEncoding( 0364, COSName.getPDFName( "ocircumflex" ) ); + addCharacterEncoding( 0366, COSName.getPDFName( "odieresis" ) ); + addCharacterEncoding( 0234, COSName.getPDFName( "oe" ) ); + addCharacterEncoding( 0362, COSName.getPDFName( "ograve" ) ); + addCharacterEncoding( 061, COSName.getPDFName( "one" ) ); + addCharacterEncoding( 0275, COSName.getPDFName( "onehalf" ) ); + addCharacterEncoding( 0274, COSName.getPDFName( "onequarter" ) ); + addCharacterEncoding( 0271, COSName.getPDFName( "onesuperior" ) ); + addCharacterEncoding( 0252, COSName.getPDFName( "ordfeminine" ) ); + addCharacterEncoding( 0272, COSName.getPDFName( "ordmasculine" ) ); + addCharacterEncoding( 0370, COSName.getPDFName( "oslash" ) ); + addCharacterEncoding( 0365, COSName.getPDFName( "otilde" ) ); + addCharacterEncoding( 0160, COSName.getPDFName( "p" ) ); + addCharacterEncoding( 0266, COSName.getPDFName( "paragraph" ) ); + addCharacterEncoding( 050, COSName.getPDFName( "parenleft" ) ); + addCharacterEncoding( 051, COSName.getPDFName( "parenright" ) ); + addCharacterEncoding( 045, COSName.getPDFName( "percent" ) ); + addCharacterEncoding( 056, COSName.getPDFName( "period" ) ); + addCharacterEncoding( 0267, COSName.getPDFName( "periodcentered" ) ); + addCharacterEncoding( 0211, COSName.getPDFName( "perthousand" ) ); + addCharacterEncoding( 053, COSName.getPDFName( "plus" ) ); + addCharacterEncoding( 0261, COSName.getPDFName( "plusminus" ) ); + addCharacterEncoding( 0161, COSName.getPDFName( "q" ) ); + addCharacterEncoding( 077, COSName.getPDFName( "question" ) ); + addCharacterEncoding( 0277, COSName.getPDFName( "questiondown" ) ); + addCharacterEncoding( 042, COSName.getPDFName( "quotedbl" ) ); + addCharacterEncoding( 0204, COSName.getPDFName( "quotedblbase" ) ); + addCharacterEncoding( 0223, COSName.getPDFName( "quotedblleft" ) ); + addCharacterEncoding( 0224, COSName.getPDFName( "quotedblright" ) ); + addCharacterEncoding( 0221, COSName.getPDFName( "quoteleft" ) ); + addCharacterEncoding( 0222, COSName.getPDFName( "quoteright" ) ); + addCharacterEncoding( 0202, COSName.getPDFName( "quotesinglbase" ) ); + addCharacterEncoding( 047, COSName.getPDFName( "quotesingle" ) ); + addCharacterEncoding( 0162, COSName.getPDFName( "r" ) ); + addCharacterEncoding( 0256, COSName.getPDFName( "registered" ) ); + addCharacterEncoding( 0163, COSName.getPDFName( "s" ) ); + addCharacterEncoding( 0232, COSName.getPDFName( "scaron" ) ); + addCharacterEncoding( 0247, COSName.getPDFName( "section" ) ); + addCharacterEncoding( 073, COSName.getPDFName( "semicolon" ) ); + addCharacterEncoding( 067, COSName.getPDFName( "seven" ) ); + addCharacterEncoding( 066, COSName.getPDFName( "six" ) ); + addCharacterEncoding( 057, COSName.getPDFName( "slash" ) ); + addCharacterEncoding( 040, COSName.getPDFName( "space" ) ); + addCharacterEncoding( 0243, COSName.getPDFName( "sterling" ) ); + addCharacterEncoding( 0164, COSName.getPDFName( "t" ) ); + addCharacterEncoding( 0376, COSName.getPDFName( "thorn" ) ); + addCharacterEncoding( 063, COSName.getPDFName( "three" ) ); + addCharacterEncoding( 0276, COSName.getPDFName( "threequarters" ) ); + addCharacterEncoding( 0263, COSName.getPDFName( "threesuperior" ) ); + addCharacterEncoding( 0230, COSName.getPDFName( "tilde" ) ); + addCharacterEncoding( 0231, COSName.getPDFName( "trademark" ) ); + addCharacterEncoding( 062, COSName.getPDFName( "two" ) ); + addCharacterEncoding( 0262, COSName.getPDFName( "twosuperior" ) ); + addCharacterEncoding( 0165, COSName.getPDFName( "u" ) ); + addCharacterEncoding( 0372, COSName.getPDFName( "uacute" ) ); + addCharacterEncoding( 0373, COSName.getPDFName( "ucircumflex" ) ); + addCharacterEncoding( 0374, COSName.getPDFName( "udieresis" ) ); + addCharacterEncoding( 0371, COSName.getPDFName( "ugrave" ) ); + addCharacterEncoding( 0137, COSName.getPDFName( "underscore" ) ); + addCharacterEncoding( 0166, COSName.getPDFName( "v" ) ); + addCharacterEncoding( 0167, COSName.getPDFName( "w" ) ); + addCharacterEncoding( 0170, COSName.getPDFName( "x" ) ); + addCharacterEncoding( 0171, COSName.getPDFName( "y" ) ); + addCharacterEncoding( 0375, COSName.getPDFName( "yacute" ) ); + addCharacterEncoding( 0377, COSName.getPDFName( "ydieresis" ) ); + addCharacterEncoding( 0245, COSName.getPDFName( "yen" ) ); + addCharacterEncoding( 0172, COSName.getPDFName( "z" ) ); + addCharacterEncoding( 0236, COSName.getPDFName( "zcaron" ) ); + addCharacterEncoding( 060, COSName.getPDFName( "zero" ) ); + } + + /** + * Convert this standard java object to a COS object. + * + * @return The cos object that matches this Java object. + */ + public COSBase getCOSObject() + { + return COSName.WIN_ANSI_ENCODING; + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/encoding/package.html b/src/main/java/org/pdfbox/encoding/package.html new file mode 100644 index 0000000..2fcc5c7 --- /dev/null +++ b/src/main/java/org/pdfbox/encoding/package.html @@ -0,0 +1,9 @@ + + + + + + +This package contains the implementations for all of the encodings that are used in PDF documents. + + -- cgit v1.2.3