From c68ad0ec056b37c82debebcecfcde1866d61b4d9 Mon Sep 17 00:00:00 2001 From: tknall Date: Tue, 25 Nov 2008 12:03:13 +0000 Subject: Removing pdfbox from source. git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@301 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../org/pdfbox/util/PDFTextStripperByArea.java | 165 --------------------- 1 file changed, 165 deletions(-) delete mode 100644 src/main/java/org/pdfbox/util/PDFTextStripperByArea.java (limited to 'src/main/java/org/pdfbox/util/PDFTextStripperByArea.java') diff --git a/src/main/java/org/pdfbox/util/PDFTextStripperByArea.java b/src/main/java/org/pdfbox/util/PDFTextStripperByArea.java deleted file mode 100644 index 91c76a5..0000000 --- a/src/main/java/org/pdfbox/util/PDFTextStripperByArea.java +++ /dev/null @@ -1,165 +0,0 @@ -/** - * Copyright (c) 2005, www.pdfbox.org - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * 3. Neither the name of pdfbox; nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * http://www.pdfbox.org - * - */ -package org.pdfbox.util; - -import java.awt.Rectangle; -import java.io.IOException; -import java.io.StringWriter; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Vector; - -import org.pdfbox.cos.COSStream; -import org.pdfbox.pdmodel.PDPage; -import org.pdfbox.pdmodel.common.PDStream; - -/** - * This will extract text from a specified region in the PDF. - * - * @author Ben Litchfield (ben@benlitchfield.com) - * @version $Revision: 1.3 $ - */ -public class PDFTextStripperByArea extends PDFTextStripper -{ - private List regions = new ArrayList(); - private Map regionArea = new HashMap(); - private Map regionCharacterList = new HashMap(); - private Map regionText = new HashMap(); - - /** - * Constructor. - * @throws IOException If there is an error loading properties. - */ - public PDFTextStripperByArea() throws IOException - { - super(); - } - - /** - * Add a new region to group text by. - * - * @param regionName The name of the region. - * @param rect The rectangle area to retrieve the text from. - */ - public void addRegion( String regionName, Rectangle rect ) - { - regions.add( regionName ); - regionArea.put( regionName, rect ); - } - - /** - * Get the list of regions that have been setup. - * - * @return A list of java.lang.String objects to identify the region names. - */ - public List getRegions() - { - return regions; - } - - /** - * Get the text for the region, this should be called after extractRegions(). - * - * @param regionName The name of the region to get the text from. - * @return The text that was identified in that region. - */ - public String getTextForRegion( String regionName ) - { - StringWriter text = (StringWriter)regionText.get( regionName ); - return text.toString(); - } - - /** - * Process the page to extract the region text. - * - * @param page The page to extract the regions from. - * @throws IOException If there is an error while extracting text. - */ - public void extractRegions( PDPage page ) throws IOException - { - Iterator regionIter = regions.iterator(); - while( regionIter.hasNext() ) - { - //reset the stored text for the region so this class - //can be reused. - String regionName = (String)regionIter.next(); - Vector regionCharactersByArticle = new Vector(); - regionCharactersByArticle.add( new ArrayList() ); - regionCharacterList.put( regionName, regionCharactersByArticle ); - regionText.put( regionName, new StringWriter() ); - } - - PDStream contentStream = page.getContents(); - if( contentStream != null ) - { - COSStream contents = contentStream.getStream(); - processPage( page, contents ); - } - } - - /** - * @see PDFTextStripper#showCharacter(TextPosition) - */ - protected void showCharacter( TextPosition text ) - { - Iterator regionIter = regionArea.keySet().iterator(); - while( regionIter.hasNext() ) - { - String region = (String)regionIter.next(); - Rectangle rect = (Rectangle)regionArea.get( region ); - if( rect.contains( text.getX(), text.getY() ) ) - { - charactersByArticle = (Vector)regionCharacterList.get( region ); - super.showCharacter( text ); - } - } - } - - /** - * This will print the text to the output stream. - * - * @throws IOException If there is an error writing the text. - */ - protected void flushText() throws IOException - { - Iterator regionIter = regionArea.keySet().iterator(); - while( regionIter.hasNext() ) - { - String region = (String)regionIter.next(); - charactersByArticle = (Vector)regionCharacterList.get( region ); - output = (StringWriter)regionText.get( region ); - super.flushText(); - } - } -} -- cgit v1.2.3