aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/org/pdfbox/pdmodel/graphics/xobject/PDCcitt.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/org/pdfbox/pdmodel/graphics/xobject/PDCcitt.java')
-rw-r--r--src/main/java/org/pdfbox/pdmodel/graphics/xobject/PDCcitt.java598
1 files changed, 598 insertions, 0 deletions
diff --git a/src/main/java/org/pdfbox/pdmodel/graphics/xobject/PDCcitt.java b/src/main/java/org/pdfbox/pdmodel/graphics/xobject/PDCcitt.java
new file mode 100644
index 0000000..59387a0
--- /dev/null
+++ b/src/main/java/org/pdfbox/pdmodel/graphics/xobject/PDCcitt.java
@@ -0,0 +1,598 @@
+/**
+ * Copyright (c) 2005, www.pdfbox.org
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * 3. Neither the name of pdfbox; nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * http://www.pdfbox.org
+ *
+ */
+package org.pdfbox.pdmodel.graphics.xobject;
+
+import java.awt.image.BufferedImage;
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.RandomAccessFile;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.imageio.ImageIO;
+
+import org.pdfbox.cos.COSDictionary;
+import org.pdfbox.cos.COSName;
+
+import org.pdfbox.pdmodel.PDDocument;
+import org.pdfbox.pdmodel.common.PDStream;
+import org.pdfbox.pdmodel.graphics.color.PDDeviceGray;
+
+/**
+ * An image class for CCITT Fax.
+ *
+ * @author paul king
+ * @version $Revision: 1.2 $
+ */
+public class PDCcitt extends PDXObjectImage
+{
+ private static final List FAX_FILTERS = new ArrayList();
+
+ static
+ {
+ FAX_FILTERS.add( COSName.CCITTFAX_DECODE.getName() );
+ FAX_FILTERS.add( COSName.CCITTFAX_DECODE_ABBREVIATION.getName() );
+ }
+
+ /**
+ * Standard constructor.
+ *
+ * @param ccitt The PDStream that already contains all ccitt information.
+ */
+ public PDCcitt(PDStream ccitt)
+ {
+ super(ccitt, "tiff");
+
+ }
+
+ /**
+ * Construct from a tiff file.
+ *
+ * @param doc The document to create the image as part of.
+ * @param raf The random access TIFF file which contains a suitable CCITT compressed image
+ * @throws IOException If there is an error reading the tiff data.
+ */
+
+ public PDCcitt( PDDocument doc, java.io.RandomAccessFile raf ) throws IOException
+ {
+ super( new PDStream(doc),"tiff");
+ // super( new PDStream( doc, null, true ), "tiff" );
+
+ COSDictionary decodeParms = new COSDictionary();
+
+ COSDictionary dic = getCOSStream();
+
+ extractFromTiff(raf, getCOSStream().createFilteredStream(),decodeParms);
+
+ dic.setItem( COSName.FILTER, COSName.CCITTFAX_DECODE);
+ dic.setItem( COSName.SUBTYPE, COSName.IMAGE);
+ dic.setItem( COSName.TYPE, COSName.getPDFName( "XObject" ) );
+ dic.setItem( "DecodeParms", decodeParms);
+
+ setBitsPerComponent( 1 );
+ setColorSpace( new PDDeviceGray() );
+ setWidth( decodeParms.getInt("Columns") );
+ setHeight( decodeParms.getInt("Rows") );
+
+ }
+
+ /**
+ * Returns an image of the CCITT Fax, or null if TIFFs are not supported. (Requires additional JAI Image filters )
+ * @see org.pdfbox.pdmodel.graphics.xobject.PDXObjectImage#getRGBImage()
+ */
+ public BufferedImage getRGBImage() throws IOException
+ {
+ // ImageIO.scanForPlugins();
+ return ImageIO.read(new TiffWrapper(getPDStream().getPartiallyFilteredStream( FAX_FILTERS ),getCOSStream()));
+ }
+
+ /**
+ * This writes a tiff to out.
+ * @see org.pdfbox.pdmodel.graphics.xobject.PDXObjectImage#write2OutputStream(java.io.OutputStream)
+ */
+ public void write2OutputStream(OutputStream out) throws IOException
+ {
+ InputStream data = new TiffWrapper(getPDStream().getPartiallyFilteredStream( FAX_FILTERS ),getCOSStream());
+ byte[] buf = new byte[1024];
+ int amountRead = -1;
+ while( (amountRead = data.read( buf )) != -1 )
+ {
+ out.write( buf, 0, amountRead );
+ }
+ }
+
+ /**
+ * Extract the ccitt stream from the tiff file.
+ *
+ * @param raf - TIFF File
+ * @param os - Stream to write raw ccitt data two
+ * @param parms - COSDictionary which the encoding parameters are added to
+ * @throws IOException If there is an error reading/writing to/from the stream
+ */
+ private void extractFromTiff(RandomAccessFile raf, OutputStream os, COSDictionary parms) throws IOException
+ {
+ try
+ {
+
+ // First check the basic tiff header
+ raf.seek(0);
+ char endianess = (char) raf.read();
+ if ((char) raf.read() != endianess)
+ {
+ throw new IOException("Not a valid tiff file");
+ }
+ //ensure that endianess is either M or I
+ if (endianess != 'M' && endianess != 'I')
+ {
+ throw new IOException("Not a valid tiff file");
+ }
+ int magicNumber = readshort(endianess, raf);
+ if( magicNumber != 42)
+ {
+ throw new IOException("Not a valid tiff file");
+ }
+
+ // Relocate to the first set of tags
+ raf.seek(readlong(endianess, raf));
+
+ int numtags = readshort(endianess, raf);
+
+ // The number 50 is somewhat arbitary, it just stops us load up junk from somewhere and tramping on
+ if (numtags > 50)
+ {
+ throw new IOException("Not a valid tiff file");
+ }
+
+ // Loop through the tags, some will convert to items in the parms dictionary
+ // Other point us to where to find the data stream
+ // The only parm which might change as a result of other options is K, so
+ // We'll deal with that as a special;
+
+ int k=-1000; // Default Non CCITT compression
+ int dataoffset=0;
+ int datalength=0;
+
+ for (int i=0; i < numtags; i++)
+ {
+ int tag = readshort(endianess, raf);
+ int type = readshort(endianess, raf);
+ int count = readlong(endianess, raf);
+ int val = readlong(endianess, raf); // See note
+
+ // Note, we treated that value as a long. The value always occupies 4 bytes
+ // But it might only use the first byte or two. Depending on endianess we might need to correct
+ // Note we ignore all other types, they are of little interest for PDFs/CCITT Fax
+ if (endianess == 'M')
+ {
+ switch (type)
+ {
+ case 1:
+ {
+ val = val >> 24;
+ break; // byte value
+ }
+ case 3:
+ {
+ val = val >> 16;
+ break; // short value
+ }
+ case 4:
+ {
+ break; // long value
+ }
+ default:
+ {
+ //do nothing
+ }
+ }
+ }
+ switch (tag)
+ {
+ case 256:
+ {
+ parms.setInt("Columns",val);
+ break;
+ }
+ case 257:
+ {
+ parms.setInt("Rows",val);
+ break;
+ }
+ case 259:
+ {
+ if (val == 4)
+ {
+ k=-1;
+ }
+ if (val == 3)
+ {
+ k=0;
+ }
+ break; // T6/T4 Compression
+ }
+ case 262:
+ {
+ if (val == 1)
+ {
+ parms.setBoolean("BlackIs1", true);
+ }
+ break;
+ }
+ case 273:
+ {
+ if (count == 1)
+ {
+ dataoffset=val;
+ }
+ break;
+ }
+ case 279:
+ {
+ if (count == 1)
+ {
+ datalength=val;
+ }
+ break;
+ }
+ case 292:
+ {
+ if (val == 1)
+ {
+ k=50; // T4 2D - arbitary K value
+ }
+ break;
+ }
+ case 324:
+ {
+ if (count == 1)
+ {
+ dataoffset=val;
+ }
+ break;
+ }
+ case 325:
+ {
+ if (count == 1)
+ {
+ datalength=val;
+ }
+ break;
+ }
+ default:
+ {
+ //do nothing
+ }
+ }
+ }
+
+ if (k == -1000)
+ {
+ throw new IOException("First image in tiff is not CCITT T4 or T6 compressed");
+ }
+ if (dataoffset == 0)
+ {
+ throw new IOException("First image in tiff is not a single tile/strip");
+ }
+
+ parms.setInt("K",k);
+
+ raf.seek(dataoffset);
+
+ byte[] buf = new byte[8192];
+ int amountRead = -1;
+ while( (amountRead = raf.read( buf,0, Math.min(8192,datalength) )) > 0 )
+ {
+ datalength -= amountRead;
+ os.write( buf, 0, amountRead );
+ }
+
+ }
+ finally
+ {
+ os.close();
+ }
+ }
+
+ private int readshort(char endianess, RandomAccessFile raf) throws IOException
+ {
+ if (endianess == 'I')
+ {
+ return raf.read() | (raf.read() << 8);
+ }
+ return (raf.read() << 8) | raf.read();
+ }
+
+ private int readlong(char endianess, RandomAccessFile raf) throws IOException
+ {
+ if (endianess == 'I')
+ {
+ return raf.read() | (raf.read() << 8) | (raf.read() << 16) | (raf.read() << 24);
+ }
+ return (raf.read() << 24) | (raf.read() << 16) | (raf.read() << 8) | raf.read();
+ }
+
+
+ /**
+ * Extends InputStream to wrap the data from the CCITT Fax with a suitable TIFF Header.
+ * For details see www.tiff.org, which contains useful information including pointers to the
+ * TIFF 6.0 Specification
+ *
+ */
+ private class TiffWrapper extends InputStream
+ {
+
+ private int currentOffset; // When reading, where in the tiffheader are we.
+ private byte[] tiffheader; // Byte array to store tiff header data
+ private InputStream datastream; // Original InputStream
+
+ public TiffWrapper(InputStream rawstream, COSDictionary options)
+ {
+ buildHeader(options);
+ currentOffset=0;
+ datastream = rawstream;
+ }
+
+ // Implement basic methods from InputStream
+
+ public boolean markSupported()
+ {
+ return false;
+ }
+
+ public void reset() throws IOException
+ {
+ throw new IOException("reset not supported");
+ }
+
+ // For simple read, take a byte from the tiffheader array or pass through.
+ public int read() throws IOException
+ {
+ if (currentOffset < tiffheader.length)
+ {
+ return tiffheader[currentOffset++];
+ }
+ return datastream.read();
+ }
+
+ // For read methods only return as many bytes as we have left in the header
+ // if we've exhausted the header, pass through to the InputStream of the raw CCITT data
+ public int read(byte[] data) throws IOException
+ {
+ if (currentOffset < tiffheader.length)
+ {
+ int length = java.lang.Math.min(tiffheader.length - currentOffset, data.length);
+ if (length > 0)
+ {
+ System.arraycopy(tiffheader, currentOffset, data, 0, length);
+ }
+ currentOffset += length;
+ return length;
+ }
+ else
+ {
+ return datastream.read(data);
+ }
+ }
+
+ // For read methods only return as many bytes as we have left in the header
+ // if we've exhausted the header, pass through to the InputStream of the raw CCITT data
+ public int read(byte[] data, int off, int len) throws IOException
+ {
+ if (currentOffset < tiffheader.length)
+ {
+ int length = java.lang.Math.min(tiffheader.length - currentOffset, len);
+ if (length > 0)
+ {
+ System.arraycopy(tiffheader, currentOffset, data, off, length);
+ }
+ currentOffset += length;
+ return length;
+ }
+ else
+ {
+ return datastream.read(data,off,len);
+ }
+ }
+
+ // When skipping if any header data not yet read, only allow to skip what we've in the buffer
+ // Otherwise just pass through.
+ public long skip(long n) throws IOException
+ {
+ if (currentOffset < tiffheader.length)
+ {
+ long length = Math.min(tiffheader.length - currentOffset, n);
+ currentOffset += length;
+ return length;
+ }
+ else
+ {
+ return datastream.skip(n);
+ }
+ }
+
+ // Static data for the beginning of the TIFF header
+ private final byte[] basicHeader = {
+ 'I','I',42,0,8,0,0,0, // File introducer and pointer to first IFD
+ 0,0}; // Number of tags start with two
+
+
+ private int additionalOffset; // Offset in header to additional data
+
+ // Builds up the tiffheader based on the options passed through.
+ private void buildHeader(COSDictionary options)
+ {
+
+ final int numOfTags = 10; // The maximum tags we'll fill
+ final int maxAdditionalData = 24; // The maximum amount of additional data
+ // outside the IFDs. (bytes)
+
+ // The length of the header will be the length of the basic header (10)
+ // plus 12 bytes for each IFD, 4 bytes as a pointer to the next IFD (will be 0)
+ // plus the length of the additional data
+
+ tiffheader = new byte[10 + (12 * numOfTags ) + 4 + maxAdditionalData];
+ java.util.Arrays.fill(tiffheader,(byte)0);
+ System.arraycopy(basicHeader,0,tiffheader,0,basicHeader.length);
+
+ // Additional data outside the IFD starts after the IFD's and pointer to the next IFD (0)
+ additionalOffset = 10 + (12 * numOfTags ) + 4;
+
+ // Now work out the variable values from TIFF defaults,
+ // PDF Defaults and the Dictionary for this XObject
+ short cols = 1728;
+ short rows = 0;
+ short blackis1 = 0;
+ short comptype = 3; // T4 compression
+ long t4options = 0; // Will set if 1d or 2d T4
+
+ COSDictionary decodeParms = (COSDictionary) options.getDictionaryObject("DecodeParms");
+
+ if (decodeParms != null)
+ {
+ cols = (short) decodeParms.getInt("Columns", cols);
+ rows = (short) decodeParms.getInt("Rows", rows);
+ if (decodeParms.getBoolean("BlackIs1", false))
+ {
+ blackis1 = 1;
+ }
+ int k = decodeParms.getInt("K"); // Mandatory parm
+ if (k < 0)
+ {
+ //T6
+ comptype = 4;
+ }
+ if (k > 0)
+ {
+ //T4 2D
+ comptype = 3;
+ t4options = 1;
+ }
+ // else k = 0, leave as default T4 1D compression
+ }
+
+ // If we couldn't get the number of rows, use the main item from XObject
+ if (rows == 0)
+ {
+ rows = (short) options.getInt("Height", rows);
+ }
+
+ // Now put the tags into the tiffheader
+ // These musn't exceed the maximum set above, and by TIFF spec should be sorted into
+ // Numeric sequence.
+
+ addTag(256, cols); // Columns
+ addTag(257, rows); // Rows
+ addTag(259, comptype); // T6
+ addTag(262, blackis1); // Photometric Interpretation
+ addTag(273, (long) tiffheader.length); // Offset to start of image data - updated below
+ addTag(279, (long) options.getInt("Length")); // Length of image data
+ addTag(282, 300, 1); // X Resolution 300 (default unit Inches) This is arbitary
+ addTag(283, 300, 1); // Y Resolution 300 (default unit Inches) This is arbitary
+ if (comptype == 3)
+ {
+ addTag(292, t4options);
+ }
+ addTag(305, "PDFBOX"); // Software generating image
+ }
+
+ /* Tiff types 1 = byte, 2=ascii, 3=short, 4=ulong 5=rational */
+
+ private void addTag(int tag,long value)
+ {
+ // Adds a tag of type 4 (ulong)
+ int count = ++tiffheader[8];
+ int offset = (count-1)*12 + 10;
+ tiffheader[offset]=(byte)(tag & 0xff);
+ tiffheader[offset+1]=(byte)((tag>>8) & 0xff);
+ tiffheader[offset+2]=4; // Type Long
+ tiffheader[offset+4]=1; // One Value
+ tiffheader[offset+8]=(byte)(value & 0xff);
+ tiffheader[offset+9]=(byte)((value>>8) & 0xff);
+ tiffheader[offset+10]=(byte)((value>>16) & 0xff);
+ tiffheader[offset+11]=(byte)((value>>24) & 0xff);
+ }
+
+ private void addTag(int tag, short value)
+ {
+ // Adds a tag of type 3 (short)
+ int count = ++tiffheader[8];
+ int offset = (count-1)*12 + 10;
+ tiffheader[offset]=(byte)(tag & 0xff);
+ tiffheader[offset+1]=(byte)((tag>>8) & 0xff);
+ tiffheader[offset+2]=3; // Type Short
+ tiffheader[offset+4]=1; // One Value
+ tiffheader[offset+8]=(byte)(value & 0xff);
+ tiffheader[offset+9]=(byte)((value>>8) & 0xff);
+ }
+
+ private void addTag(int tag, String value)
+ {
+ // Adds a tag of type 2 (ascii)
+ int count = ++tiffheader[8];
+ int offset = (count-1)*12 + 10;
+ tiffheader[offset]=(byte)(tag & 0xff);
+ tiffheader[offset+1]=(byte)((tag>>8) & 0xff);
+ tiffheader[offset+2]=2; // Type Ascii
+ tiffheader[offset+4]=1; // One Value
+ tiffheader[offset+8]=(byte)(additionalOffset & 0xff);
+ tiffheader[offset+9]=(byte)((additionalOffset>>8) & 0xff);
+ tiffheader[offset+10]=(byte)((additionalOffset>>16) & 0xff);
+ tiffheader[offset+11]=(byte)((additionalOffset>>24) & 0xff);
+ System.arraycopy(value.getBytes(), 0, tiffheader, additionalOffset, value.length());
+ additionalOffset += value.length() + 1;
+ }
+
+ private void addTag(int tag, long numerator, long denominator)
+ {
+ // Adds a tag of type 5 (rational)
+ int count = ++tiffheader[8];
+ int offset = (count-1)*12 + 10;
+ tiffheader[offset]=(byte)(tag & 0xff);
+ tiffheader[offset+1]=(byte)((tag>>8) & 0xff);
+ tiffheader[offset+2]=5; // Type Rational
+ tiffheader[offset+4]=1; // One Value
+ tiffheader[offset+8]=(byte)(additionalOffset & 0xff);
+ tiffheader[offset+9]=(byte)((additionalOffset>>8) & 0xff);
+ tiffheader[offset+10]=(byte)((additionalOffset>>16) & 0xff);
+ tiffheader[offset+11]=(byte)((additionalOffset>>24) & 0xff);
+ tiffheader[additionalOffset++]=(byte) ((numerator) & 0xFF);
+ tiffheader[additionalOffset++]=(byte) ((numerator>>8) & 0xFF);
+ tiffheader[additionalOffset++]=(byte) ((numerator>>16) & 0xFF);
+ tiffheader[additionalOffset++]=(byte) ((numerator>>24) & 0xFF);
+ tiffheader[additionalOffset++]=(byte) ((denominator) & 0xFF);
+ tiffheader[additionalOffset++]=(byte) ((denominator>>8) & 0xFF);
+ tiffheader[additionalOffset++]=(byte) ((denominator>>16) & 0xFF);
+ tiffheader[additionalOffset++]=(byte) ((denominator>>24) & 0xFF);
+ }
+ }
+} \ No newline at end of file