From c68ad0ec056b37c82debebcecfcde1866d61b4d9 Mon Sep 17 00:00:00 2001 From: tknall Date: Tue, 25 Nov 2008 12:03:13 +0000 Subject: Removing pdfbox from source. git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/trunk@301 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- src/main/java/org/pdfbox/util/PDFHighlighter.java | 213 ---------------------- 1 file changed, 213 deletions(-) delete mode 100644 src/main/java/org/pdfbox/util/PDFHighlighter.java (limited to 'src/main/java/org/pdfbox/util/PDFHighlighter.java') diff --git a/src/main/java/org/pdfbox/util/PDFHighlighter.java b/src/main/java/org/pdfbox/util/PDFHighlighter.java deleted file mode 100644 index 6c27225..0000000 --- a/src/main/java/org/pdfbox/util/PDFHighlighter.java +++ /dev/null @@ -1,213 +0,0 @@ -package org.pdfbox.util; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.pdfbox.pdmodel.PDDocument; -import org.pdfbox.pdmodel.PDPage; - - -/** - * Highlighting of words in a PDF document with an XML file. - * - * @author slagraulet (slagraulet@cardiweb.com) - * @author Ben Litchfield (ben@csh.rit.edu) - * @version $Revision: 1.6 $ - * - * @see - * Adobe Highlight File Format - */ -public class PDFHighlighter extends PDFTextStripper -{ - private Writer highlighterOutput = null; - //private Color highlightColor = Color.YELLOW; - - private String[] searchedWords; - private ByteArrayOutputStream textOS = null; - private Writer textWriter = null; - - /** - * Default constructor. - * - * @throws IOException If there is an error constructing this class. - */ - public PDFHighlighter() throws IOException - { - super(); - super.setLineSeparator( "" ); - super.setPageSeparator( "" ); - super.setWordSeparator( "" ); - super.setShouldSeparateByBeads( false ); - super.setSuppressDuplicateOverlappingText( false ); - } - - /** - * Generate an XML highlight string based on the PDF. - * - * @param pdDocument The PDF to find words in. - * @param highlightWord The word to search for. - * @param xmlOutput The resulting output xml file. - * - * @throws IOException If there is an error reading from the PDF, or writing to the XML. - */ - public void generateXMLHighlight(PDDocument pdDocument, String highlightWord, Writer xmlOutput ) throws IOException - { - generateXMLHighlight( pdDocument, new String[] { highlightWord }, xmlOutput ); - } - - /** - * Generate an XML highlight string based on the PDF. - * - * @param pdDocument The PDF to find words in. - * @param sWords The words to search for. - * @param xmlOutput The resulting output xml file. - * - * @throws IOException If there is an error reading from the PDF, or writing to the XML. - */ - public void generateXMLHighlight(PDDocument pdDocument, String[] sWords, Writer xmlOutput ) throws IOException - { - highlighterOutput = xmlOutput; - searchedWords = sWords; - highlighterOutput.write("\n\n\n"); - textOS = new ByteArrayOutputStream(); - textWriter = new OutputStreamWriter( textOS, "UTF-16" ); - writeText(pdDocument, textWriter); - highlighterOutput.write("\n\n"); - highlighterOutput.flush(); - } - - /** - * @see PDFTextStripper#endPage( PDPage ) - */ - protected void endPage( PDPage pdPage ) throws IOException - { - textWriter.flush(); - - String page = new String( textOS.toByteArray(), "UTF-16" ); - textOS.reset(); - //page = page.replaceAll( "\n", "" ); - //page = page.replaceAll( "\r", "" ); - //page = CCRStringUtil.stripChar(page, '\n'); - //page = CCRStringUtil.stripChar(page, '\r'); - - // Traitement des listes à puces (caractères spéciaux) - if (page.indexOf("a") != -1) - { - page = page.replaceAll("a[0-9]{1,3}", "."); - } - - for (int i = 0; i < searchedWords.length; i++) - { - Pattern pattern = Pattern.compile(searchedWords[i], Pattern.CASE_INSENSITIVE); - Matcher matcher = pattern.matcher(page); - while( matcher.find() ) - { - int begin = matcher.start(); - int end = matcher.end(); - highlighterOutput.write(" \n"); - } - } - } - - /** - * Command line application. - * - * @param args The command line arguments to the application. - * - * @throws IOException If there is an error generating the highlight file. - */ - public static void main(String[] args) throws IOException - { - PDFHighlighter xmlExtractor = new PDFHighlighter(); - PDDocument doc = null; - try - { - if( args.length < 2 ) - { - usage(); - } - String[] highlightStrings = new String[ args.length - 1]; - System.arraycopy( args, 1, highlightStrings, 0, highlightStrings.length ); - doc = PDDocument.load( args[0] ); - - xmlExtractor.generateXMLHighlight( - doc, - highlightStrings, - new OutputStreamWriter( System.out ) ); - } - finally - { - if( doc != null ) - { - doc.close(); - } - } - } - - private static void usage() - { - System.err.println( "usage: java " + PDFHighlighter.class.getName() + " word1 word2 word3 ..." ); - System.exit( 1 ); - } - - - /** - * Get the color to highlight the strings with. Default is Color.YELLOW. - * - * @return The color to highlight strings with. - */ - /*public Color getHighlightColor() - { - return highlightColor; - }**/ - - /** - * Get the color to highlight the strings with. Default is Color.YELLOW. - * - * @param color The color to highlight strings with. - */ - /*public void setHighlightColor(Color color) - { - this.highlightColor = color; - }**/ - - /** - * Set the highlight color using HTML like rgb string. The string must be 6 characters long. - * - * @param color The color to use for highlighting. Should be in the format of "FF0000". - */ - /*public void setHighlightColor( String color ) - { - highlightColor = Color.decode( color ); - }**/ - - /** - * Get the highlight color as an HTML like string. This will return a string of six characters. - * - * @return The current highlight color. For example FF0000 - */ - /*public String getHighlightColorAsString() - { - //BJL: kudos to anyone that has a cleaner way of doing this! - String red = Integer.toHexString( highlightColor.getRed() ); - String green = Integer.toHexString( highlightColor.getGreen() ); - String blue = Integer.toHexString( highlightColor.getBlue() ); - - return (red.length() < 2 ? "0" + red : red) + - (green.length() < 2 ? "0" + green : green) + - (blue.length() < 2 ? "0" + blue : blue); - }**/ -} \ No newline at end of file -- cgit v1.2.3