From 6025b6016517c6d898d8957d1d7e03ba71431912 Mon Sep 17 00:00:00 2001 From: tknall Date: Fri, 1 Dec 2006 12:20:24 +0000 Subject: Initial import of release 2.2. git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@4 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- src/main/java/org/pdfbox/filter/ASCII85Filter.java | 103 +++ .../java/org/pdfbox/filter/ASCIIHexFilter.java | 205 ++++++ .../org/pdfbox/filter/CCITTFaxDecodeFilter.java | 735 +++++++++++++++++++++ src/main/java/org/pdfbox/filter/DCTFilter.java | 77 +++ src/main/java/org/pdfbox/filter/Filter.java | 68 ++ src/main/java/org/pdfbox/filter/FilterManager.java | 135 ++++ src/main/java/org/pdfbox/filter/FlateFilter.java | 303 +++++++++ src/main/java/org/pdfbox/filter/LZWDictionary.java | 215 ++++++ src/main/java/org/pdfbox/filter/LZWFilter.java | 235 +++++++ src/main/java/org/pdfbox/filter/LZWNode.java | 115 ++++ .../org/pdfbox/filter/RunLengthDecodeFilter.java | 126 ++++ src/main/java/org/pdfbox/filter/package.html | 9 + 12 files changed, 2326 insertions(+) create mode 100644 src/main/java/org/pdfbox/filter/ASCII85Filter.java create mode 100644 src/main/java/org/pdfbox/filter/ASCIIHexFilter.java create mode 100644 src/main/java/org/pdfbox/filter/CCITTFaxDecodeFilter.java create mode 100644 src/main/java/org/pdfbox/filter/DCTFilter.java create mode 100644 src/main/java/org/pdfbox/filter/Filter.java create mode 100644 src/main/java/org/pdfbox/filter/FilterManager.java create mode 100644 src/main/java/org/pdfbox/filter/FlateFilter.java create mode 100644 src/main/java/org/pdfbox/filter/LZWDictionary.java create mode 100644 src/main/java/org/pdfbox/filter/LZWFilter.java create mode 100644 src/main/java/org/pdfbox/filter/LZWNode.java create mode 100644 src/main/java/org/pdfbox/filter/RunLengthDecodeFilter.java create mode 100644 src/main/java/org/pdfbox/filter/package.html (limited to 'src/main/java/org/pdfbox/filter') diff --git a/src/main/java/org/pdfbox/filter/ASCII85Filter.java b/src/main/java/org/pdfbox/filter/ASCII85Filter.java new file mode 100644 index 0000000..335208c --- /dev/null +++ b/src/main/java/org/pdfbox/filter/ASCII85Filter.java @@ -0,0 +1,103 @@ +/** + * Copyright (c) 2003, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.filter; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.pdfbox.io.ASCII85InputStream; +import org.pdfbox.io.ASCII85OutputStream; + +import org.pdfbox.cos.COSDictionary; + +/** + * This is the used for the ASCIIHexDecode filter. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.6 $ + */ +public class ASCII85Filter implements Filter +{ + /** + * This will decode some compressed data. + * + * @param compressedData The compressed byte stream. + * @param result The place to write the uncompressed byte stream. + * @param options The options to use to encode the data. + * + * @throws IOException If there is an error decompressing the stream. + */ + public void decode( InputStream compressedData, OutputStream result, COSDictionary options ) throws IOException + { + ASCII85InputStream is = null; + try + { + is = new ASCII85InputStream(compressedData); + byte[] buffer = new byte[1024]; + int amountRead = 0; + while( (amountRead = is.read( buffer, 0, 1024) ) != -1 ) + { + result.write(buffer, 0, amountRead); + } + result.flush(); + } + finally + { + if( is != null ) + { + is.close(); + } + } + } + + /** + * This will encode some data. + * + * @param rawData The raw data to encode. + * @param result The place to write to encoded results to. + * @param options The options to use to encode the data. + * + * @throws IOException If there is an error compressing the stream. + */ + public void encode( InputStream rawData, OutputStream result, COSDictionary options ) throws IOException + { + ASCII85OutputStream os = new ASCII85OutputStream(result); + byte[] buffer = new byte[1024]; + int amountRead = 0; + while( (amountRead = rawData.read( buffer, 0, 1024 )) != -1 ) + { + os.write( buffer, 0, amountRead ); + } + os.close(); + result.flush(); + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/filter/ASCIIHexFilter.java b/src/main/java/org/pdfbox/filter/ASCIIHexFilter.java new file mode 100644 index 0000000..b3cf506 --- /dev/null +++ b/src/main/java/org/pdfbox/filter/ASCIIHexFilter.java @@ -0,0 +1,205 @@ +/** + * Copyright (c) 2003, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.filter; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.pdfbox.cos.COSDictionary; + +import org.pdfbox.persistence.util.COSHEXTable; + +/** + * This is the used for the ASCIIHexDecode filter. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.7 $ + */ +public class ASCIIHexFilter implements Filter +{ + private static final int ASCII_ZERO = (int)'0'; + + + /** + * This will decode some compressed data. + * + * @param compressedData The compressed byte stream. + * @param result The place to write the uncompressed byte stream. + * @param options The options to use to encode the data. + * + * @throws IOException If there is an error decompressing the stream. + */ + public void decode( InputStream compressedData, OutputStream result, COSDictionary options ) throws IOException + { + int value =0; + int firstByte = 0; + int secondByte = 0; + while( (firstByte = compressedData.read()) != -1 ) + { + value = REVERSE_HEX[firstByte] * 16; + secondByte = compressedData.read(); + if( secondByte >= 0 ) + { + value += REVERSE_HEX[ secondByte ]; + } + result.write( value ); + } + result.flush(); + } + + private static final int[] REVERSE_HEX = + { + -1, //0 + -1, //1 + -1, //2 + -1, //3 + -1, //4 + -1, //5 + -1, //6 + -1, //7 + -1, //8 + -1, //9 + -1, //10 + -1, //11 + -1, //12 + -1, //13 + -1, //14 + -1, //15 + -1, //16 + -1, //17 + -1, //18 + -1, //19 + -1, //20 + -1, //21 + -1, //22 + -1, //23 + -1, //24 + -1, //25 + -1, //26 + -1, //27 + -1, //28 + -1, //29 + -1, //30 + -1, //31 + -1, //32 + -1, //33 + -1, //34 + -1, //35 + -1, //36 + -1, //37 + -1, //38 + -1, //39 + -1, //40 + -1, //41 + -1, //42 + -1, //43 + -1, //44 + -1, //45 + -1, //46 + -1, //47 + 0, //48 + 1, //49 + 2, //50 + 3, //51 + 4, //52 + 5, //53 + 6, //54 + 7, //55 + 8, //56 + 9, //57 + -1, //58 + -1, //59 + -1, //60 + -1, //61 + -1, //62 + -1, //63 + -1, //64 + 10, //65 + 11, //66 + 12, //67 + 13, //68 + 14, //69 + 15, //70 + -1, //71 + -1, //72 + -1, //73 + -1, //74 + -1, //75 + -1, //76 + -1, //77 + -1, //78 + -1, //79 + -1, //80 + -1, //81 + -1, //82 + -1, //83 + -1, //84 + -1, //85 + -1, //86 + -1, //87 + -1, //88 + -1, //89 + -1, //90 + -1, //91 + -1, //92 + -1, //93 + -1, //94 + -1, //95 + -1, //96 + 10, //97 + 11, //98 + 12, //99 + 13, //100 + 14, //101 + 15, //102 + }; + + /** + * This will encode some data. + * + * @param rawData The raw data to encode. + * @param result The place to write to encoded results to. + * @param options The options to use to encode the data. + * + * @throws IOException If there is an error compressing the stream. + */ + public void encode( InputStream rawData, OutputStream result, COSDictionary options ) throws IOException + { + int byteRead = 0; + while( (byteRead = rawData.read()) != -1 ) + { + int value = (byteRead+256)%256; + result.write( COSHEXTable.TABLE[value] ); + } + result.flush(); + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/filter/CCITTFaxDecodeFilter.java b/src/main/java/org/pdfbox/filter/CCITTFaxDecodeFilter.java new file mode 100644 index 0000000..15f4c58 --- /dev/null +++ b/src/main/java/org/pdfbox/filter/CCITTFaxDecodeFilter.java @@ -0,0 +1,735 @@ +/** + * Copyright (c) 2003-2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.filter; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.text.SimpleDateFormat; +import java.util.Date; + +import org.apache.log4j.Logger; +import org.pdfbox.cos.COSDictionary; +import org.pdfbox.cos.COSName; + +/** + * This is a filter for the CCITTFax Decoder. + * + * @author Ben Litchfield, Marcel Kammer, Paul King + * @version $Revision: 1.10 $ + */ +public class CCITTFaxDecodeFilter implements Filter +{ + private static Logger log = Logger.getLogger(CCITTFaxDecodeFilter.class); + + // Filter will write 15 TAG's + // If you add or remove TAG's you will have to modify this value + private static final int TAG_COUNT = 15; + + // HEADERLENGTH(fix 8 Bytes) plus ImageLength(variable) + private int offset = 8; + + // Bytecounter for Bytes that will be written after the TAG-DICTIONARY + private int tailingBytesCount = 0; + + // Bytes to write after TAG-DICTIONARY + private final ByteArrayOutputStream tailer = new ByteArrayOutputStream(); + + /** + * Constructor. + */ + public CCITTFaxDecodeFilter() + { + } + + /** + * This will decode some compressed data. + * + * @param compressedData + * The compressed byte stream. + * @param result + * The place to write the uncompressed byte stream. + * @param options + * The options to use to encode the data. + * + * @throws IOException + * If there is an error decompressing the stream. + */ + public void decode(InputStream compressedData, OutputStream result, COSDictionary options) throws IOException + { + // log.warn( "Warning: CCITTFaxDecode.decode is not implemented yet, + // skipping this stream." ); + + + // Get ImageParams from PDF + COSDictionary dict = (COSDictionary) options.getDictionaryObject("DecodeParms"); + int width = options.getInt("Width"); + int height = options.getInt("Height"); + int length = options.getInt(COSName.LENGTH); + int compressionType = dict.getInt("K"); + boolean blackIs1 = dict.getBoolean("BlackIs1", false); + + + // HEADER-INFO and starting point of TAG-DICTIONARY + writeTagHeader(result, length); + + // IMAGE-DATA + int i = 0; + //int sum = 0; + byte[] buffer = new byte[32768]; + int lentoread = length; + + while ((lentoread > 0) && ((i = compressedData.read(buffer, 0, Math.min(lentoread, 32768))) != -1)) + { + //sum += i; + result.write(buffer, 0, i); + lentoread = lentoread - i; + } + + // If lentoread is > 0 then we need to write out some padding to equal the header + // We'll use what we have in the buffer it's just padding after all + while (lentoread > 0) + { + result.write(buffer, 0, Math.min(lentoread, 32768)); + lentoread = lentoread - Math.min(lentoread, 32738); + } + //System.out.println("Gelesen: " + sum); + + // TAG-COUNT + writeTagCount(result); + + // WIDTH 0x0100 + writeTagWidth(result, width); + + // HEIGHT 0x0101 + writeTagHeight(result, height); + + // BITSPERSAMPLE 0x0102 + // Always 1 for CCITTFax + writeTagBitsPerSample(result, 1); + + // COMPRESSION 0x0103 + writeTagCompression(result, compressionType); + + // PHOTOMETRIC 0x0106 + writeTagPhotometric(result, blackIs1); + + // STRIPOFFSET 0x0111 + // HERE ALWAYS 8, because ImageData comes before TAG-DICTIONARY + writeTagStripOffset(result, 8); + + // ORIENTATION 0x0112 + writeTagOrientation(result, 1); + + // SamplesPerPixel 0x0115 + writeTagSamplesPerPixel(result, 1); + + // RowsPerStrip 0x0116 + writeTagRowsPerStrip(result, height); + + // Stripcount 0x0117 + writeTagStripByteCount(result, length); + + // XRESOLUTION 0x011A + // HERE: 200 DPI + writeTagXRes(result, 200, 1); + + // YRESOLITION 0x011B + // HERE: 200 DPI + writeTagYRes(result, 200, 1); + + // ResolutionUnit 0x0128 + // HERE: DPI + writeTagResolutionUnit(result, 2); + + // SOFTWARE 0x0131 + // minimum 4 chars + writeTagSoftware(result, "pdfbox".getBytes()); + + // DATE AND TIME 0x0132 + writeTagDateTime(result, new Date()); + + // END OF TAG-DICT + writeTagTailer(result); + } + + private void writeTagHeader(OutputStream result, int length) throws IOException + { + byte[] header = { 'M', 'M', 0, '*' };// Big-endian + result.write(header); + + + // Add imagelength to offset + offset += length; + + // OFFSET TAG-DICTIONARY + int i1 = offset/16777216;//=value/(256*256*256) + int i2 = (offset-i1*16777216)/65536; + int i3 = (offset-i1*16777216-i2*65536)/256; + int i4 = offset % 256; + result.write(i1); + result.write(i2); + result.write(i3); + result.write(i4); + } + + private void writeTagCount(OutputStream result) throws IOException + { + result.write(TAG_COUNT / 256); + result.write(TAG_COUNT % 256);// tagCount + } + + private void writeTagWidth(OutputStream result, int width) throws IOException + { + // @todo width berechnen + + // TAG-ID 100 + result.write(1); + result.write(0); + + + // TAG-TYPE SHORT=3 + result.write(0); + result.write(3); + + + // TAG-LENGTH = 1 + result.write(0); + result.write(0); + result.write(0); + result.write(1); + + + // TAG-VALUE = width + result.write(width/256); + result.write(width%256); + result.write(0);// SHORT=0 + result.write(0);// SHORT=0 + + } + + private void writeTagHeight(OutputStream result, int height) throws IOException + { + //@todo height berechnen + // TAG-ID 101 + result.write(1); + result.write(1); + + + // TAG-TYPE SHORT=3 + result.write(0); + result.write(3); + + + // TAG-LENGTH = 1 + result.write(0); + result.write(0); + result.write(0); + result.write(1); + + + // TAG-VALUE + result.write(height/256); + result.write(height%256); + result.write(0);// SHORT=0 + result.write(0);// SHORT=0 + + } + + private void writeTagBitsPerSample(OutputStream result, int value) throws IOException + { + // TAG-ID 102 + result.write(1); + result.write(2); + + + // TAG-TYPE SHORT=3 + result.write(0); + result.write(3); + + // TAG-LENGTH = 1 + result.write(0); + result.write(0); + result.write(0); + result.write(1); + + + // TAG-VALUE + result.write(value/256); + result.write(value%256); + result.write(0);//SHORT=0 + result.write(0);//SHORT=0 + + } + + /** + * Write the tag compression. + * + * @param result The stream to write to. + * @param type The type to write. + * @throws IOException If there is an error writing to the stream. + */ + public void writeTagCompression(OutputStream result, int type) throws IOException + { + // TAG-ID 103 + result.write(1); + result.write(3); + + // TAG-TYPE SHORT=3 + result.write(0); + result.write(3); + + + // TAG-LEGNTH = 1 + result.write(0); + result.write(0); + result.write(0); + result.write(1); + + // TAG-VALUE + //@todo typ eintragen; hier immer 4 + result.write(0); + if (type < 0) + { + result.write(4);// G4 + } + else if (type == 0) + { + result.write(3);// G3-1D + } + else + { + result.write(2);// G3-2D + } + result.write(0); + result.write(0); + + } + + private void writeTagPhotometric(OutputStream result, boolean blackIs1) throws IOException + { + // TAG-ID 106 + result.write(1); + result.write(6); + + + // TAG-TYPE SHORT + result.write(0); + result.write(3); + + + // TAG-LENGTH = 1 + result.write(0); + result.write(0); + result.write(0); + result.write(1); + + + // TAG-VALUE + result.write(0); + if (blackIs1) + { + result.write(1); + } + else + { + result.write(0); + } + result.write(0);// SHORT=0 + result.write(0);// SHORT=0 + + } + + private void writeTagStripOffset(OutputStream result, int value) throws IOException + { + // TAG-ID 111 + result.write(1); + result.write(17); + + // TAG-TYPE LONG=4 + result.write(0); + result.write(4); + + + // TAG-LENGTH=1 + result.write(0); + result.write(0); + result.write(0); + result.write(1); + + + // TAG-VALUE = 8 //VOR TAG-DICTIONARY + int i1 = value/16777216;//=value/(256*256*256) + int i2 = (value-i1*16777216)/65536; + int i3 = (value-i1*16777216-i2*65536)/256; + int i4 = value % 256; + result.write(i1); + result.write(i2); + result.write(i3); + result.write(i4); + + } + + private void writeTagSamplesPerPixel(OutputStream result, int value) throws IOException + { + // TAG-ID 115 + result.write(1); + result.write(21); + + + // TAG-TYPE SHORT=3 + result.write(0); + result.write(3); + + + // TAG-LENGTH=1 + result.write(0); + result.write(0); + result.write(0); + result.write(1); + + + // TAG-VALUE + result.write(value / 256); + result.write(value % 256); + result.write(0);// SHORT=0 + result.write(0);// SHORT=0 + + } + + private void writeTagRowsPerStrip(OutputStream result, int value) throws IOException + { + // TAG-ID 116 + result.write(1); + result.write(22); + + + // TAG-TYPE SHORT=3 + result.write(0); + result.write(3); + + + // TAG-LENGTH=1 + result.write(0); + result.write(0); + result.write(0); + result.write(1); + + + // TAG-VALUE + result.write(value / 256); + result.write(value % 256); + result.write(0);// SHORT=0 + result.write(0);// SHORT=0 + + } + + private void writeTagStripByteCount(OutputStream result, int value) throws IOException + { + //@todo value auswerten + // TAG-ID 117 + result.write(1); + result.write(23); + + // TAG-TYPE LONG=4 + result.write(0); + result.write(4); + + + // TAG-LENGTH = 1 + result.write(0); + result.write(0); + result.write(0); + result.write(1); + + // TAG-VALUE + int i1 = value/16777216;//=value/(256*256*256) + int i2 = (value-i1*16777216)/65536; + int i3 = (value-i1*16777216-i2*65536)/256; + int i4 = value % 256; + result.write(i1); + result.write(i2); + result.write(i3); + result.write(i4); + + } + + private void writeTagXRes(OutputStream result, int value1, int value2) throws IOException + { + // TAG-ID 11A + result.write(1); + result.write(26); + + // TAG-TYPE RATIONAL=5 + result.write(0); + result.write(5); + + // TAG-LENGTH=1 + result.write(0); + result.write(0); + result.write(0); + result.write(1); + + + // TAG-VALUE=OFFSET TO RATIONAL + int valueOffset = offset + 6 + 12 * TAG_COUNT + tailer.size(); + int i1 = valueOffset/16777216;//=value/(256*256*256) + int i2 = (valueOffset-i1*16777216)/65536; + int i3 = (valueOffset-i1*16777216-i2*65536)/256; + int i4 = valueOffset % 256; + result.write(i1); + result.write(i2); + result.write(i3); + result.write(i4); + + i1 = value1 /16777216; + i2 = (value1-i1*16777216)/65536; + i3 = (value1-i1*16777216 - i2*65536)/256; + i4 = value1 % 256; + tailer.write(i1); + tailer.write(i2); + tailer.write(i3); + tailer.write(i4); + + i1 = value2 /16777216; + i2 = (value2-i1*16777216)/65536; + i3 = (value2-i1*16777216 - i2*65536)/256; + i4 = value2 % 256; + tailer.write(i1); + tailer.write(i2); + tailer.write(i3); + tailer.write(i4); + + tailingBytesCount += 8; + } + + private void writeTagYRes(OutputStream result, int value1, int value2) throws IOException + { + // TAG-ID 11B + result.write(1); + result.write(27); + + + // TAG-TYPE RATIONAL=5 + result.write(0); + result.write(5); + + // TAG-LENGTH=1 + result.write(0); + result.write(0); + result.write(0); + result.write(1); + + + // TAG-VALUE=OFFSET TO RATIONAL + int valueOffset = offset + 6 + 12 * TAG_COUNT + tailer.size(); + int i1 = valueOffset/16777216;//=value/(256*256*256) + int i2 = (valueOffset-i1*16777216)/65536; + int i3 = (valueOffset-i1*16777216-i2*65536)/256; + int i4 = valueOffset % 256; + result.write(i1); + result.write(i2); + result.write(i3); + result.write(i4); + + i1 = value1 /16777216; + i2 = (value1-i1*16777216)/65536; + i3 = (value1-i1*16777216 - i2*65536)/256; + i4 = value1 % 256; + tailer.write(i1); + tailer.write(i2); + tailer.write(i3); + tailer.write(i4); + + i1 = value2 /16777216; + i2 = (value2-i1*16777216)/65536; + i3 = (value2-i1*16777216 - i2*65536)/256; + i4 = value2 % 256; + tailer.write(i1); + tailer.write(i2); + tailer.write(i3); + tailer.write(i4); + + tailingBytesCount += 8; + } + + private void writeTagResolutionUnit(OutputStream result, int value) throws IOException + { + // TAG-ID 128 + result.write(1); + result.write(40); + + // TAG-TYPE SHORT=3 + result.write(0); + result.write(3); + + // TAG-LENGTH = 1 + result.write(0); + result.write(0); + result.write(0); + result.write(1); + + // TAG-VALUE + result.write(value/256); + result.write(value%256); + result.write(0);// SHORT=0 + result.write(0);// SHORT=0 + + } + + private void writeTagOrientation(OutputStream result, int value) throws IOException + { + // TAG-ID 112 + result.write(1); + result.write(18); + + // TAG-TYPE SHORT = 3 + result.write(0); + result.write(3); + + + // TAG-LENGTH=1 + result.write(0); + result.write(0); + result.write(0); + result.write(1); + + // TAG-VALUE + result.write(value / 256); + result.write(value % 256); + result.write(0);// SHORT=0 + result.write(0);// SHORT=0 + + } + + private void writeTagTailer(OutputStream result) throws IOException + { + // END OF TAG-DICTIONARY + result.write(0); + result.write(0); + result.write(0); + result.write(0); + + // TAILER WITH VALUES OF RATIONALFIELD's + result.write(tailer.toByteArray()); + } + + private void writeTagSoftware(OutputStream result, byte[] text) throws IOException + { + // TAG-ID 131 + result.write(1); + result.write(49); + + // TAG-TYPE ASCII=2 + result.write(0); + result.write(2); + + + // TAG-LENGTH=id.length+1 + result.write(0); + result.write(0); + result.write((text.length + 1) / 256); + result.write((text.length + 1) % 256); + + // TAG-VALUE + int valueOffset = offset + 6 + 12 * TAG_COUNT + tailer.size(); + int i1 = valueOffset/16777216;//=value/(256*256*256) + int i2 = (valueOffset-i1*16777216)/65536; + int i3 = (valueOffset-i1*16777216-i2*65536)/256; + int i4 = valueOffset % 256; + result.write(i1); + result.write(i2); + result.write(i3); + result.write(i4); + + + tailer.write(text); + tailer.write(0); + tailingBytesCount += text.length + 1; + } + + private void writeTagDateTime(OutputStream result, Date date) throws IOException + { + // TAG-ID 132 + result.write(1); + result.write(50); + + + // TAG-TYPE ASCII=2 + result.write(0); + result.write(2); + + + // TAG-LENGTH=20 + result.write(0); + result.write(0); + result.write(0); + result.write(20); + + + // TAG-VALUE + int valueOffset = offset + 6 + 12 * TAG_COUNT + tailer.size(); + int i1 = valueOffset/16777216;//=value/(256*256*256) + int i2 = (valueOffset-i1*16777216)/65536; + int i3 = (valueOffset-i1*16777216-i2*65536)/256; + int i4 = valueOffset % 256; + result.write(i1); + result.write(i2); + result.write(i3); + result.write(i4); + + SimpleDateFormat sdf = new SimpleDateFormat("yyyy:MM:dd HH:mm:ss"); + String datetime = sdf.format(date); + tailer.write(datetime.getBytes()); + tailer.write(0); + + tailingBytesCount += 20; + } + + /** + * This will encode some data. + * + * @param rawData + * The raw data to encode. + * @param result + * The place to write to encoded results to. + * @param options + * The options to use to encode the data. + * + * @throws IOException + * If there is an error compressing the stream. + */ + public void encode(InputStream rawData, OutputStream result, COSDictionary options) throws IOException + { + log.warn("Warning: CCITTFaxDecode.encode is not implemented yet, skipping this stream."); + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/filter/DCTFilter.java b/src/main/java/org/pdfbox/filter/DCTFilter.java new file mode 100644 index 0000000..8f371ea --- /dev/null +++ b/src/main/java/org/pdfbox/filter/DCTFilter.java @@ -0,0 +1,77 @@ +/** + * Copyright (c) 2003, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.filter; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.log4j.Logger; +import org.pdfbox.cos.COSDictionary; + +/** + * This is the used for the DCTDecode filter. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.7 $ + */ +public class DCTFilter implements Filter +{ + private static Logger log = Logger.getLogger( DCTFilter.class ); + + /** + * This will decode some compressed data. + * + * @param compressedData The compressed byte stream. + * @param result The place to write the uncompressed byte stream. + * @param options The options to use to encode the data. + * + * @throws IOException If there is an error decompressing the stream. + */ + public void decode( InputStream compressedData, OutputStream result, COSDictionary options ) throws IOException + { + log.warn( "Warning: DCTFilter.decode is not implemented yet, skipping this stream." ); + } + + /** + * This will encode some data. + * + * @param rawData The raw data to encode. + * @param result The place to write to encoded results to. + * @param options The options to use to encode the data. + * + * @throws IOException If there is an error compressing the stream. + */ + public void encode( InputStream rawData, OutputStream result, COSDictionary options ) throws IOException + { + log.warn( "Warning: DCTFilter.encode is not implemented yet, skipping this stream." ); + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/filter/Filter.java b/src/main/java/org/pdfbox/filter/Filter.java new file mode 100644 index 0000000..8de3f81 --- /dev/null +++ b/src/main/java/org/pdfbox/filter/Filter.java @@ -0,0 +1,68 @@ +/** + * Copyright (c) 2003, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.filter; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.pdfbox.cos.COSDictionary; + +/** + * This is the interface that will be used to apply filters to a byte stream. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.5 $ + */ +public interface Filter +{ + /** + * This will decode some compressed data. + * + * @param compressedData The compressed byte stream. + * @param result The place to write the uncompressed byte stream. + * @param options The options to use to encode the data. + * + * @throws IOException If there is an error decompressing the stream. + */ + public void decode( InputStream compressedData, OutputStream result, COSDictionary options ) throws IOException; + + /** + * This will encode some data. + * + * @param rawData The raw data to encode. + * @param result The place to write to encoded results to. + * @param options The options to use to encode the data. + * + * @throws IOException If there is an error compressing the stream. + */ + public void encode( InputStream rawData, OutputStream result, COSDictionary options ) throws IOException; +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/filter/FilterManager.java b/src/main/java/org/pdfbox/filter/FilterManager.java new file mode 100644 index 0000000..87b1ad4 --- /dev/null +++ b/src/main/java/org/pdfbox/filter/FilterManager.java @@ -0,0 +1,135 @@ +/** + * Copyright (c) 2003, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.filter; + +import java.io.IOException; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +import org.pdfbox.cos.COSName; + +/** + * This will contain manage all the different types of filters that are available. + * + * @author Ben Litchfield + * @version $Revision: 1.12 $ + */ +public class FilterManager +{ + private Map filters = new HashMap(); + + /** + * Constructor. + */ + public FilterManager() + { + Filter flateFilter = new FlateFilter(); + Filter dctFilter = new DCTFilter(); + Filter ccittFaxFilter = new CCITTFaxDecodeFilter(); + Filter lzwFilter = new LZWFilter(); + Filter asciiHexFilter = new ASCIIHexFilter(); + Filter ascii85Filter = new ASCII85Filter(); + Filter runLengthFilter = new RunLengthDecodeFilter(); + + addFilter( COSName.FLATE_DECODE, flateFilter ); + addFilter( COSName.FLATE_DECODE_ABBREVIATION, flateFilter ); + addFilter( COSName.DCT_DECODE, dctFilter ); + addFilter( COSName.DCT_DECODE_ABBREVIATION, dctFilter ); + addFilter( COSName.CCITTFAX_DECODE, ccittFaxFilter ); + addFilter( COSName.CCITTFAX_DECODE_ABBREVIATION, ccittFaxFilter ); + addFilter( COSName.LZW_DECODE, lzwFilter ); + addFilter( COSName.LZW_DECODE_ABBREVIATION, lzwFilter ); + addFilter( COSName.ASCII_HEX_DECODE, asciiHexFilter ); + addFilter( COSName.ASCII_HEX_DECODE_ABBREVIATION, asciiHexFilter ); + addFilter( COSName.ASCII85_DECODE, ascii85Filter ); + addFilter( COSName.ASCII85_DECODE_ABBREVIATION, ascii85Filter ); + addFilter( COSName.RUN_LENGTH_DECODE, runLengthFilter ); + addFilter( COSName.RUN_LENGTH_DECODE_ABBREVIATION, runLengthFilter ); + + } + + /** + * This will get all of the filters that are available in the system. + * + * @return All available filters in the system. + */ + public Collection getFilters() + { + return filters.values(); + } + + /** + * This will add an available filter. + * + * @param filterName The name of the filter. + * @param filter The filter to use. + */ + public void addFilter( COSName filterName, Filter filter ) + { + filters.put( filterName, filter ); + } + + /** + * This will get a filter by name. + * + * @param filterName The name of the filter to retrieve. + * + * @return The filter that matches the name. + * + * @throws IOException If the filter could not be found. + */ + public Filter getFilter( COSName filterName ) throws IOException + { + Filter filter = (Filter)filters.get( filterName ); + if( filter == null ) + { + throw new IOException( "Unknown stream filter:" + filterName ); + } + + return filter; + } + + /** + * This will get a filter by name. + * + * @param filterName The name of the filter to retrieve. + * + * @return The filter that matches the name. + * + * @throws IOException If the filter could not be found. + */ + public Filter getFilter( String filterName ) throws IOException + { + return getFilter( COSName.getPDFName( filterName ) ); + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/filter/FlateFilter.java b/src/main/java/org/pdfbox/filter/FlateFilter.java new file mode 100644 index 0000000..c239686 --- /dev/null +++ b/src/main/java/org/pdfbox/filter/FlateFilter.java @@ -0,0 +1,303 @@ +/** + * Copyright (c) 2003-2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.filter; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.zip.DeflaterOutputStream; +import java.util.zip.InflaterInputStream; + +import org.pdfbox.cos.COSDictionary; + +/** + * This is the used for the FlateDecode filter. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @author Marcel Kammer + * @version $Revision: 1.9 $ + */ +public class FlateFilter implements Filter +{ + private static final int BUFFER_SIZE = 2048; + + /** + * This will decode some compressed data. + * + * @param compressedData + * The compressed byte stream. + * @param result + * The place to write the uncompressed byte stream. + * @param options + * The options to use to encode the data. + * + * @throws IOException + * If there is an error decompressing the stream. + */ + + public void decode(InputStream compressedData, OutputStream result, COSDictionary options) throws IOException + { + COSDictionary dict = (COSDictionary) options.getDictionaryObject("DecodeParms"); + int predictor = -1; + int colors = -1; + int bitsPerPixel = -1; + int columns = -1; + InflaterInputStream decompressor = null; + ByteArrayInputStream bais = null; + ByteArrayOutputStream baos = null; + if (dict!=null) + { + predictor = dict.getInt("Predictor"); + colors = dict.getInt("Colors"); + bitsPerPixel = options.getInt("BitsPerComponent"); + columns = dict.getInt("Columns"); + } + + try + { + // Decompress data to temporary ByteArrayOutputStream + decompressor = new InflaterInputStream(compressedData); + byte[] buffer = new byte[BUFFER_SIZE]; + int amountRead; + + // Decode data using given predictor + if (predictor==-1 || predictor == 1 && predictor == 10) + { + // decoding not needed + while ((amountRead = decompressor.read(buffer, 0, BUFFER_SIZE)) != -1) + { + result.write(buffer, 0, amountRead); + } + } + else + { + if (colors==-1 || bitsPerPixel==-1 || columns==-1) + { + throw new IOException("Could not read all parameters to decode image"); + } + + baos = new ByteArrayOutputStream(); + while ((amountRead = decompressor.read(buffer, 0, BUFFER_SIZE)) != -1) + { + baos.write(buffer, 0, amountRead); + } + baos.flush(); + + // Copy data to ByteArrayInputStream for reading + bais = new ByteArrayInputStream(baos.toByteArray()); + baos.close(); + baos = null; + + byte[] decodedData = decodePredictor(predictor, colors, bitsPerPixel, columns, bais); + bais.close(); + bais = new ByteArrayInputStream(decodedData); + + // write decoded data to result + while ((amountRead = bais.read(buffer)) != -1) + { + result.write(buffer, 0, amountRead); + } + bais.close(); + bais = null; + } + + + result.flush(); + } + finally + { + if (decompressor != null) + { + decompressor.close(); + } + if (bais != null) + { + bais.close(); + } + if (baos != null) + { + baos.close(); + } + } + } + + private byte[] decodePredictor(int predictor, int colors, int bitsPerComponent, int columns, InputStream data) + throws IOException + { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + byte[] buffer = new byte[2048]; + + if (predictor == 1 || predictor == 10) + { + // No prediction or PNG NONE + int i = 0; + while ((i = data.read(buffer)) != -1) + { + baos.write(buffer, 0, i); + } + } + else + { + // calculate sizes + int bpp = (colors * bitsPerComponent + 7) / 8; + int rowlength = (columns * colors * bitsPerComponent + 7) / 8 + bpp; + byte[] actline = new byte[rowlength]; + byte[] lastline = new byte[rowlength];// Initialize lastline with + // Zeros according to + // PNG-specification + boolean done = false; + int linepredictor = predictor; + + while (!done) + { + if (predictor == 15) + { + linepredictor = data.read();// read per line predictor + if (linepredictor == -1) + { + done = true;// reached EOF + break; + } + else + { + linepredictor += 10; // add 10 to tread value 1 as 11 + } + // (instead of PRED NONE) and 2 + // as 12 (instead of PRED TIFF) + } + + // read line + int i = 0; + int offset = bpp; + while (offset < rowlength && ((i = data.read(actline, offset, rowlength - offset)) != -1)) + { + offset += i; + } + + // Do prediction as specified in PNG-Specification 1.2 + switch (linepredictor) + { + case 2:// PRED TIFF SUB + /** + * @todo decode tiff + */ + throw new IOException("TIFF-Predictor not supported"); + case 11:// PRED SUB + for (int p = bpp; p < rowlength; p++) + { + int sub = actline[p] & 0xff; + int left = actline[p - bpp] & 0xff; + actline[p] = (byte) (sub + left); + } + break; + case 12:// PRED UP + for (int p = bpp; p < rowlength; p++) + { + int up = actline[p] & 0xff; + int prior = lastline[p] & 0xff; + actline[p] = (byte) (up + prior); + } + break; + case 13:// PRED AVG + for (int p = bpp; p < rowlength; p++) + { + int avg = actline[p] & 0xff; + int left = actline[p - bpp] & 0xff; + int up = lastline[p] & 0xff; + actline[p] = (byte) (avg + ((left + up) / 2)); + } + break; + case 14:// PRED PAETH + for (int p = bpp; p < rowlength; p++) + { + int paeth = actline[p] & 0xff; + int a = actline[p - bpp] & 0xff;// left + int b = lastline[p] & 0xff;// upper + int c = lastline[p - bpp] & 0xff;// upperleft + int value = a + b - c; + int absa = Math.abs(value - a); + int absb = Math.abs(value - b); + int absc = Math.abs(value - c); + + if (absa <= absb && absa <= absc) + { + actline[p] = (byte) (paeth + absa); + } + else if (absb <= absc) + { + actline[p] += (byte) (paeth + absb); + } + else + { + actline[p] += (byte) (paeth + absc); + } + } + break; + default: + break; + } + + lastline = actline; + baos.write(actline, bpp, actline.length - bpp); + } + } + + return baos.toByteArray(); + } + + /** + * This will encode some data. + * + * @param rawData + * The raw data to encode. + * @param result + * The place to write to encoded results to. + * @param options + * The options to use to encode the data. + * + * @throws IOException + * If there is an error compressing the stream. + */ + public void encode(InputStream rawData, OutputStream result, COSDictionary options) throws IOException + { + DeflaterOutputStream out = new DeflaterOutputStream(result); + byte[] buffer = new byte[BUFFER_SIZE]; + int amountRead = 0; + while ((amountRead = rawData.read(buffer, 0, BUFFER_SIZE)) != -1) + { + out.write(buffer, 0, amountRead); + } + out.close(); + result.flush(); + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/filter/LZWDictionary.java b/src/main/java/org/pdfbox/filter/LZWDictionary.java new file mode 100644 index 0000000..075dab8 --- /dev/null +++ b/src/main/java/org/pdfbox/filter/LZWDictionary.java @@ -0,0 +1,215 @@ +/** + * Copyright (c) 2003, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.filter; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import java.util.HashMap; +import java.util.Map; + +/** + * This is the used for the LZWDecode filter. This represents the dictionary mappings + * between codes and their values. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.3 $ + */ +class LZWDictionary +{ + private Map codeToData = new HashMap(); + private LZWNode root = new LZWNode(); + + private ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + private long nextCode = 258; + private int codeSize = 9; + + /** + * constructor. + */ + public LZWDictionary() + { + for( long i=0; i<256; i++ ) + { + LZWNode node = new LZWNode(); + node.setCode( i ); + root.setNode( (byte)i, node ); + codeToData.put( new Long( i ), new byte[]{ (byte)i } ); + } + } + + /** + * This will get the value for the code. It will return null if the code is not + * defined. + * + * @param code The key to the data. + * + * @return The data that is mapped to the code. + */ + public byte[] getData( long code ) + { + return (byte[])codeToData.get( new Long( code ) ); + } + + /** + * This will take a visit from a byte[]. This will create new code entries as + * necessary. + * + * @param data The byte to get a visit from. + * + * @throws IOException If there is an error visiting this data. + */ + public void visit( byte[] data ) throws IOException + { + for( int i=0; i= 2048 ) + { + codeSize = 12; + } + else if( nextCode >= 1024 ) + { + codeSize = 11; + } + else if( nextCode >= 512 ) + { + codeSize = 10; + } + else + { + codeSize = 9; + } + } + + /** + * This will crear the internal buffer that the dictionary uses. + */ + public void clear() + { + buffer.reset(); + } + + /** + * This will folow the path to the data node. + * + * @param data The path to the node. + * + * @return The node that resides at that path. + */ + public LZWNode getNode( byte[] data ) + { + return root.getNode( data ); + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/filter/LZWFilter.java b/src/main/java/org/pdfbox/filter/LZWFilter.java new file mode 100644 index 0000000..e8ba003 --- /dev/null +++ b/src/main/java/org/pdfbox/filter/LZWFilter.java @@ -0,0 +1,235 @@ +/** + * Copyright (c) 2003-2005, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.filter; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.PushbackInputStream; +import java.io.StreamCorruptedException; + +import org.pdfbox.cos.COSDictionary; + +import org.pdfbox.io.NBitInputStream; +import org.pdfbox.io.NBitOutputStream; + +/** + * This is the used for the LZWDecode filter. + * + * @author Ben Litchfield (ben@benlitchfield.com) + * @version $Revision: 1.13 $ + */ +public class LZWFilter implements Filter +{ + + /** + * The LZW clear table code. + */ + public static final long CLEAR_TABLE = 256; + /** + * The LZW end of data code. + */ + public static final long EOD = 257; + + /** + * This will decode some compressed data. + * + * @param compressedData The compressed byte stream. + * @param result The place to write the uncompressed byte stream. + * @param options The options to use to encode the data. + * + * @throws IOException If there is an error decompressing the stream. + */ + public void decode( InputStream compressedData, OutputStream result, COSDictionary options ) throws IOException + { + //log.debug("decode( )"); + NBitInputStream in = null; + in = new NBitInputStream( compressedData ); + in.setBitsInChunk( 9 ); + LZWDictionary dic = new LZWDictionary(); + byte firstByte = 0; + long nextCommand = 0; + while( (nextCommand = in.read() ) != EOD ) + { + // log.debug( "decode - nextCommand=" + nextCommand + ", bitsInChunk: " + in.getBitsInChunk()); + + if( nextCommand == CLEAR_TABLE ) + { + in.setBitsInChunk( 9 ); + dic = new LZWDictionary(); + } + else + { + byte[] data = dic.getData( nextCommand ); + if( data == null ) + { + dic.visit( firstByte ); + data = dic.getData( nextCommand ); + dic.clear(); + } + if( data == null ) + { + throw new StreamCorruptedException( "Error: data is null" ); + } + dic.visit(data); + + //log.debug( "decode - dic.getNextCode(): " + dic.getNextCode()); + + if( dic.getNextCode() >= 2047 ) + { + in.setBitsInChunk( 12 ); + } + else if( dic.getNextCode() >= 1023 ) + { + in.setBitsInChunk( 11 ); + } + else if( dic.getNextCode() >= 511 ) + { + in.setBitsInChunk( 10 ); + } + else + { + in.setBitsInChunk( 9 ); + } + /** + if( in.getBitsInChunk() != dic.getCodeSize() ) + { + in.unread( nextCommand ); + in.setBitsInChunk( dic.getCodeSize() ); + System.out.print( "Switching " + nextCommand + " to " ); + nextCommand = in.read(); + System.out.println( "" + nextCommand ); + data = dic.getData( nextCommand ); + }**/ + firstByte = data[0]; + result.write( data ); + } + } + result.flush(); + } + + + /** + * This will encode some data. + * + * @param rawData The raw data to encode. + * @param result The place to write to encoded results to. + * @param options The options to use to encode the data. + * + * @throws IOException If there is an error compressing the stream. + */ + public void encode( InputStream rawData, OutputStream result, COSDictionary options ) throws IOException + { + //log.debug("encode( )"); + PushbackInputStream input = new PushbackInputStream( rawData, 4096 ); + LZWDictionary dic = new LZWDictionary(); + NBitOutputStream out = new NBitOutputStream( result ); + out.setBitsInChunk( 9 ); //initially nine + out.write( CLEAR_TABLE ); + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + int byteRead = 0; + for( int i=0; (byteRead = input.read()) != -1; i++ ) + { + //log.debug( "byteRead = '" + (char)byteRead + "' (0x" + Integer.toHexString(byteRead) + "), i=" + i); + buffer.write( byteRead ); + dic.visit( (byte)byteRead ); + out.setBitsInChunk( dic.getCodeSize() ); + + //log.debug( "Getting node '" + new String( buffer.toByteArray() ) + "', buffer.size = " + buffer.size() ); + LZWNode node = dic.getNode( buffer.toByteArray() ); + int nextByte = input.read(); + if( nextByte != -1 ) + { + //log.debug( "nextByte = '" + (char)nextByte + "' (0x" + Integer.toHexString(nextByte) + ")"); + LZWNode next = node.getNode( (byte)nextByte ); + if( next == null ) + { + //log.debug("encode - No next node, writing node and resetting buffer (" + + // " node.getCode: " + node.getCode() + ")" + + // " bitsInChunk: " + out.getBitsInChunk() + + // ")"); + out.write( node.getCode() ); + buffer.reset(); + } + + input.unread( nextByte ); + } + else + { + //log.debug("encode - EOF on lookahead: writing node, resetting buffer, and terminating read loop (" + + // " node.getCode: " + node.getCode() + ")" + + // " bitsInChunk: " + out.getBitsInChunk() + + // ")"); + out.write( node.getCode() ); + buffer.reset(); + break; + } + + if( dic.getNextCode() == 4096 ) + { + //log.debug("encode - Clearing dictionary and unreading pending buffer data (" + + // " bitsInChunk: " + out.getBitsInChunk() + + // ")"); + out.write( CLEAR_TABLE ); + dic = new LZWDictionary(); + input.unread( buffer.toByteArray() ); + buffer.reset(); + } + } + + // Fix the code size based on the fact that we are writing the EOD + // + if( dic.getNextCode() >= 2047 ) + { + out.setBitsInChunk( 12 ); + } + else if( dic.getNextCode() >= 1023 ) + { + out.setBitsInChunk( 11 ); + } + else if( dic.getNextCode() >= 511 ) + { + out.setBitsInChunk( 10 ); + } + else + { + out.setBitsInChunk( 9 ); + } + + //log.debug("encode - Writing EOD (" + + // " bitsInChunk: " + out.getBitsInChunk() + + // ")"); + out.write( EOD ); + out.close(); + result.flush(); + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/filter/LZWNode.java b/src/main/java/org/pdfbox/filter/LZWNode.java new file mode 100644 index 0000000..563ea5d --- /dev/null +++ b/src/main/java/org/pdfbox/filter/LZWNode.java @@ -0,0 +1,115 @@ +/** + * Copyright (c) 2003, www.pdfbox.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of pdfbox; nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * http://www.pdfbox.org + * + */ +package org.pdfbox.filter; + +import java.util.HashMap; +import java.util.Map; + +/** + * This is the used for the LZWDecode filter. + * + * @author Ben Litchfield (ben@csh.rit.edu) + * @version $Revision: 1.3 $ + */ +class LZWNode +{ + private long code; + private Map subNodes = new HashMap(); + + /** + * This will get the number of children. + * + * @return The number of children. + */ + public int childCount() + { + return subNodes.size(); + } + + /** + * This will set the node for a particular byte. + * + * @param b The byte for that node. + * @param node The node to add. + */ + public void setNode( byte b, LZWNode node ) + { + subNodes.put( new Byte( b ), node ); + } + + /** + * This will get the node that is a direct sub node of this node. + * + * @param data The byte code to the node. + * + * @return The node at that value if it exists. + */ + public LZWNode getNode( byte data ) + { + return (LZWNode)subNodes.get( new Byte( data ) ); + } + + + /** + * This will traverse the tree until it gets to the sub node. + * This will return null if the node does not exist. + * + * @param data The path to the node. + * + * @return The node that resides at the data path. + */ + public LZWNode getNode( byte[] data ) + { + LZWNode current = this; + for( int i=0; i + * The RunLengthDecode filter decodes data that has been encoded in a simple + * byte-oriented format based on run length. The encoded data is a sequence of + * runs, where each run consists of a length byte followed by 1 to 128 bytes of data. If + * the length byte is in the range 0 to 127, the following length + 1 (1 to 128) bytes + * are copied literally during decompression. If length is in the range 129 to 255, the + * following single byte is to be copied 257 ? length (2 to 128) times during decompression. + * A length value of 128 denotes EOD. + * + * The compression achieved by run-length encoding depends on the input data. In + * the best case (all zeros), a compression of approximately 64:1 is achieved for long + * files. The worst case (the hexadecimal sequence 00 alternating with FF) results in + * an expansion of 127:128. + * + * + * @author Ben Litchfield + * @version $Revision: 1.3 $ + */ +public class RunLengthDecodeFilter implements Filter +{ + private static Logger log = Logger.getLogger( RunLengthDecodeFilter.class ); + + private static final int RUN_LENGTH_EOD = 128; + + /** + * Constructor. + */ + public RunLengthDecodeFilter() + { + //default constructor + } + + /** + * This will decode some compressed data. + * + * @param compressedData The compressed byte stream. + * @param result The place to write the uncompressed byte stream. + * @param options The options to use to encode the data. + * + * @throws IOException If there is an error decompressing the stream. + */ + public void decode( InputStream compressedData, OutputStream result, COSDictionary options ) throws IOException + { + int dupAmount = -1; + byte[] buffer = new byte[128]; + while( (dupAmount = compressedData.read()) != -1 && dupAmount != RUN_LENGTH_EOD ) + { + if( dupAmount <= 127 ) + { + int amountToCopy = dupAmount+1; + int compressedRead = 0; + while( amountToCopy > 0 ) + { + compressedRead = compressedData.read( buffer, 0, amountToCopy ); + result.write( buffer, 0, compressedRead ); + amountToCopy -= compressedRead; + } + } + else + { + int dupByte = compressedData.read(); + for( int i=0; i<257-dupAmount; i++ ) + { + result.write( dupByte ); + } + } + } + } + + /** + * This will encode some data. + * + * @param rawData The raw data to encode. + * @param result The place to write to encoded results to. + * @param options The options to use to encode the data. + * + * @throws IOException If there is an error compressing the stream. + */ + public void encode( InputStream rawData, OutputStream result, COSDictionary options ) throws IOException + { + log.warn( "Warning: RunLengthDecodeFilter.encode is not implemented yet, skipping this stream." ); + } +} \ No newline at end of file diff --git a/src/main/java/org/pdfbox/filter/package.html b/src/main/java/org/pdfbox/filter/package.html new file mode 100644 index 0000000..36dbb7d --- /dev/null +++ b/src/main/java/org/pdfbox/filter/package.html @@ -0,0 +1,9 @@ + + + + + + +This package will hold the PDFBox implementations of the filters that are used in PDF documents. + + -- cgit v1.2.3