/**
* Copyright (c) 2006 by Know-Center, Graz, Austria
*
* This software is the confidential and proprietary information of Know-Center,
* Graz, Austria. You shall not disclose such Confidential Information and shall
* use it only in accordance with the terms of the license agreement you entered
* into with Know-Center.
*
* KNOW-CENTER MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
* THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
* IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
* NON-INFRINGEMENT. KNOW-CENTER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY
* LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS
* DERIVATIVES.
*
* $Id: PDFPage.java,v 1.5 2006/10/31 08:09:33 wprinz Exp $
*/
package at.knowcenter.wag.egov.egiz.pdf;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import org.pdfbox.cos.COSName;
import org.pdfbox.cos.COSStream;
import org.pdfbox.pdmodel.PDPage;
import org.pdfbox.pdmodel.PDResources;
import org.pdfbox.pdmodel.common.PDStream;
import org.pdfbox.pdmodel.graphics.xobject.PDXObject;
import org.pdfbox.pdmodel.graphics.xobject.PDXObjectForm;
import org.pdfbox.util.Matrix;
import org.pdfbox.util.PDFOperator;
import org.pdfbox.util.PDFTextStripper;
import org.pdfbox.util.TextPosition;
import org.pdfbox.util.operator.OperatorProcessor;
import at.knowcenter.wag.egov.egiz.cfg.ConfigLogger;
/**
* PDFPage is an inner class that is used to calculate the page length of a PDF
* Document page. It extends the PDFTextStripper class and implement one
* interested method: {@link PDFPage#showCharacter(TextPosition)}
* This method is called when processing the FileStream. By calling the method
* {@link org.pdfbox.util.PDFStreamEngine#processStream(org.pdfbox.pdmodel.PDPage, org.pdfbox.pdmodel.PDResources, org.pdfbox.cos.COSStream)}
* the implemented method showCharacter is called.
*
* @author wlackner
* @see PDFTextStripper
*/
public class PDFPage extends PDFTextStripper
{
/**
* The logger definition.
*/
private static final Logger logger_ = ConfigLogger.getLogger(PDFPage.class);
/**
* The maximum (lowest) y position of a character.
*/
protected float max_character_ypos = Float.NEGATIVE_INFINITY;
/**
* The maximum (lowest y position of an image.
*/
protected float max_image_ypos = Float.NEGATIVE_INFINITY;
/**
* The y coordinate of the footer line. PDF elements below this footer line will not be regarded.
*/
protected float footer_line = 0.0f;
/**
* Constructor.
*
* @param footer_line The y coordinate of the footer line. PDF elements below this footer line will not be regarded.
*
* @throws IOException
*/
public PDFPage(float footer_line) throws IOException
{
super();
this.footer_line = footer_line;
OperatorProcessor newInvoke = new MyInvoke();
newInvoke.setContext(this);
operators.put("Do", newInvoke);
}
// /**
// * You should override this method if you want to perform an action when a
// * string is being shown.
// *
// * @param string The string to display.
// *
// * @throws IOException If there is an error showing the string
// */
// public void showString( byte[] string ) throws IOException
// {
// float spaceWidth = 0;
// float spacing = 0;
// StringBuffer stringResult = new StringBuffer(string.length);
//
// float characterDisplacement = 0;
// float spaceDisplacement = 0;
//
// PDGraphicsState graphicsState = getGraphicsState();
// float fontSize = graphicsState.getTextState().getFontSize();
// float horizontalScaling =
// graphicsState.getTextState().getHorizontalScalingPercent()/100f;
// float rise = graphicsState.getTextState().getRise();
// final float wordSpacing = graphicsState.getTextState().getWordSpacing();
// final float characterSpacing =
// graphicsState.getTextState().getCharacterSpacing();
// float wordSpacingDisplacement = 0;
//
// PDFont font = graphicsState.getTextState().getFont();
//
// //This will typically be 1000 but in the case of a type3 font
// //this might be a different number
// float glyphSpaceToTextSpaceFactor = 1f/font.getFontMatrix().getValue( 0, 0
// );
// Float averageWidth = (Float)fontToAverageWidths.get( font );
// if( averageWidth == null )
// {
// averageWidth = new Float( font.getAverageFontWidth() );
// fontToAverageWidths.put( font, averageWidth );
// }
//
// Matrix initialMatrix = new Matrix();
// initialMatrix.setValue(0,0,1);
// initialMatrix.setValue(0,1,0);
// initialMatrix.setValue(0,2,0);
// initialMatrix.setValue(1,0,0);
// initialMatrix.setValue(1,1,1);
// initialMatrix.setValue(1,2,0);
// initialMatrix.setValue(2,0,0);
// initialMatrix.setValue(2,1,rise);
// initialMatrix.setValue(2,2,1);
//
//
// //this
// int codeLength = 1;
// Matrix ctm = graphicsState.getCurrentTransformationMatrix();
//
// //lets see what the space displacement should be
// spaceDisplacement = (font.getFontWidth( SPACE_BYTES, 0, 1
// )/glyphSpaceToTextSpaceFactor);
// if( spaceDisplacement == 0 )
// {
// spaceDisplacement =
// (averageWidth.floatValue()/glyphSpaceToTextSpaceFactor);
// //The average space width appears to be higher than necessary
// //so lets make it a little bit smaller.
// spaceDisplacement *= .80f;
// if( log.isDebugEnabled() )
// {
// log.debug( "Font: Space From Average=" + spaceDisplacement );
// }
// }
// int pageRotation = page.findRotation();
//
// // very strange.... the ctms are multiplied by right, but suddenly the
// textM is multiplied from the left.
// // but: PDF matrices are multiplied from left ==> ctm is wrong
// Matrix trm = initialMatrix.multiply( textMatrix ).multiply( ctm );
// float x = trm.getValue(2,0);
// float y = trm.getValue(2,1);
// float flipped_y = -y + page.findMediaBox().getHeight();
// if( pageRotation == 0 )
// {
// trm.setValue( 2,1, flipped_y );
// }
// else if( pageRotation == 90 )
// {
// trm.setValue( 2,0, y );
// trm.setValue( 2,1, x );
// }
// else if( pageRotation == 270 )
// {
// trm.setValue( 2,0, flipped_y );
// trm.setValue( 2,1, x );
// }
// for( int i=0; i calculate there y position.
*/
protected void showCharacter(TextPosition text)
{
final float current_y = text.getY();
final String character = text.getCharacter();
if (current_y > this.footer_line)
{
//logger_.debug("character is below footer_line. footer_line = " + this.footer_line + ", text.character=" + character + ", y=" + current_y);
return;
}
// store ypos of the char if it is not empty
if (!character.equals(" ") && current_y > this.max_character_ypos)
{
this.max_character_ypos = current_y;
//logger_.debug("text.character=" + character + ", y=" + current_y);
// System.err.println(character + "|" + current_y);
}
//logger_.debug("text.character=" + character + ", y=" + current_y);
// System.err.println(character + "|" + current_y);
}
// use this funtion getting an unsorted text output
// public void showString(byte[] string) {
// logger_.debug(new String(string));
// }
/**
* Returns the calculated page length.
*
* @return the max page length value
*/
public float getMaxPageLength()
{
float max_ypos = Float.NEGATIVE_INFINITY;
if (this.max_character_ypos > this.max_image_ypos)
{
max_ypos = this.max_character_ypos;
}
else
{
max_ypos = this.max_image_ypos;
}
return max_ypos;
}
public class MyInvoke extends OperatorProcessor
{
public void process(PDFOperator operator, List arguments) throws IOException
{
COSName name = (COSName) arguments.get(0);
//logger_.debug("");
// PDResources res = context.getResources();
Map xobjects = context.getXObjects();
PDXObject xobject = (PDXObject) xobjects.get(name.getName());
PDStream stream = xobject.getPDStream();
COSStream cos_stream = stream.getStream();
COSName subtype = (COSName) cos_stream.getDictionaryObject(COSName.SUBTYPE);
if (subtype.equals(COSName.IMAGE))
{
logger_.debug("XObject Image");
Matrix ctm = context.getGraphicsState().getCurrentTransformationMatrix();
logger_.debug("ctm = " + ctm);
Pos [] coordinates = new Pos [] {
new Pos(0, 0, 1),
new Pos(1, 0, 1),
new Pos(0, 1, 1),
new Pos(1, 1, 1) };
Pos [] transformed_coordinates = transtormCoordinates(coordinates, ctm);
float actual_lowest_point = Float.NaN;
int pageRotation = page.findRotation();
logger_.debug("PageRotation = " + pageRotation);
if (pageRotation == 0)
{
float min_y = findMinY(transformed_coordinates);
logger_.debug("min_y = " + min_y);
float page_height = page.findMediaBox().getHeight();
logger_.debug("page_height = " + page_height);
actual_lowest_point = page_height - min_y;
}
if (pageRotation == 90)
{
float max_x = findMaxX(transformed_coordinates);
logger_.debug("max_x = " + max_x);
// float page_width = page.findMediaBox().getWidth();
// logger_.debug("page_width = " + page_width);
actual_lowest_point = max_x;
}
if (pageRotation == 180)
{
float min_y = findMinY(transformed_coordinates);
logger_.debug("min_y = " + min_y);
actual_lowest_point = min_y;
}
if (pageRotation == 270)
{
float min_x = findMinX(transformed_coordinates);
logger_.debug("min_x = " + min_x);
// float page_width = page.findMediaBox().getWidth();
// logger_.debug("page_width = " + page_width);
actual_lowest_point = min_x;
}
logger_.debug("actual_lowest_point = " + actual_lowest_point);
if (actual_lowest_point > PDFPage.this.footer_line)
{
logger_.debug("image is below footer_line. footer_line = " + PDFPage.this.footer_line);
return;
}
if (actual_lowest_point > PDFPage.this.max_image_ypos)
{
PDFPage.this.max_image_ypos = actual_lowest_point;
}
return;
}
if (xobject instanceof PDXObjectForm)
{
PDXObjectForm form = (PDXObjectForm) xobject;
COSStream invoke = (COSStream) form.getCOSObject();
PDResources pdResources = form.getResources();
PDPage page = context.getCurrentPage();
if (pdResources == null)
{
pdResources = page.findResources();
}
getContext().processSubStream(page, pdResources, invoke);
}
}
}
public static Pos [] transtormCoordinates (Pos [] coordinates, Matrix m)
{
Pos [] transformed = new Pos [coordinates.length];
for (int i = 0; i < coordinates.length; i++)
{
transformed[i] = transtormCoordinate(coordinates[i], m);
}
return transformed;
}
public static Pos transtormCoordinate (Pos pos, Matrix m)
{
Pos transformed = new Pos();
transformed.x = pos.x * m.getValue(0, 0) + pos.y * m.getValue(1, 0) + pos.z * m.getValue(2, 0);
transformed.y = pos.x * m.getValue(0, 1) + pos.y * m.getValue(1, 1) + pos.z * m.getValue(2, 1);
transformed.z = pos.x * m.getValue(0, 2) + pos.y * m.getValue(1, 2) + pos.z * m.getValue(2, 2);
logger_.debug(" transformed " + pos + " --> " + transformed);
return transformed;
}
public static float findMinY (Pos [] coordinates)
{
float min = Float.POSITIVE_INFINITY;
for (int i = 0; i < coordinates.length; i++)
{
if (coordinates[i].y < min)
{
min = coordinates[i].y;
}
}
return min;
}
public static float findMaxX (Pos [] coordinates)
{
float max = Float.NEGATIVE_INFINITY;
for (int i = 0; i < coordinates.length; i++)
{
if (coordinates[i].x > max)
{
max = coordinates[i].x;
}
}
return max;
}
public static float findMinX (Pos [] coordinates)
{
float min = Float.POSITIVE_INFINITY;
for (int i = 0; i < coordinates.length; i++)
{
if (coordinates[i].x < min)
{
min = coordinates[i].x;
}
}
return min;
}
}