1 files changed, 0 insertions, 271 deletions
diff --git a/src/main/java/org/pdfbox/util/PDFText2HTML.java b/src/main/java/org/pdfbox/util/PDFText2HTML.java
deleted file mode 100644
index 0409eaa..0000000
--- a/src/main/java/org/pdfbox/util/PDFText2HTML.java
+++ /dev/null
@@ -1,271 +0,0 @@
-/**
- * Copyright (c) 2003-2004, www.pdfbox.org
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- *    this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- * 3. Neither the name of pdfbox; nor the names of its
- *    contributors may be used to endorse or promote products derived from this
- *    software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * http://www.pdfbox.org
- *
- */
-package org.pdfbox.util;
-
-import java.io.IOException;
-
-import java.util.Iterator;
-import java.util.List;
-
-import org.pdfbox.pdmodel.PDDocument;
-
-import org.apache.log4j.Logger;
-
-/**
- * Wrap stripped text in simple HTML, trying to form HTML paragraphs.
- * Paragraphs broken by pages, columns, or figures are not mended.
- * 
- * 
- * @author jjb - http://www.johnjbarton.com
- * @version  $Revision: 1.1 $
- * 
- */
-public class PDFText2HTML extends PDFTextStripper 
-{
-    private static Logger log = Logger.getLogger(PDFText2HTML.class);
-    private static final int INITIAL_PDF_TO_HTML_BYTES = 8192;
-
-    private TextPosition beginTitle;
-    private TextPosition afterEndTitle;
-    private String titleGuess;
-    private boolean suppressParagraphs;
-    private boolean onFirstPage = true;
-   
-    /**
-     * Constructor.
-     * 
-     * @throws IOException If there is an error during initialization.
-     */
-    public PDFText2HTML() throws IOException 
-    {
-        titleGuess = "";
-        beginTitle = null;
-        afterEndTitle = null;
-        suppressParagraphs = false;
-    }
-
-    /**
-     * Write the header to the output document.
-     * 
-     * @throws IOException If there is a problem writing out the header to the document.
-     */
-    protected void writeHeader() throws IOException 
-    {
-        StringBuffer buf = new StringBuffer(INITIAL_PDF_TO_HTML_BYTES);
-        buf.append("<html><head>");
-        buf.append("<title>");
-        buf.append(getTitleGuess());
-        buf.append("</title>");
-        buf.append("</head>");
-        buf.append("<body>\n");
-        getOutput().write(buf.toString());
-    }
-   
-    /**
-     * The guess to the document title.
-     * 
-     * @return A string that is the title of this document.
-     */
-    protected String getTitleGuess() 
-    {
-        return titleGuess;
-    }
-   
-    /**
-     * @see PDFTextStripper#flushText
-     */
-    protected void flushText() throws IOException 
-    {
-        Iterator textIter = getCharactersByArticle().iterator();
-      
-        if (onFirstPage) 
-        {
-            guessTitle(textIter);
-            writeHeader();
-            onFirstPage = false;
-        }
-        super.flushText();
-    }
-    
-    /**
-     * @see PDFTextStripper#endDocument( PDDocument )
-     */
-    public void endDocument(PDDocument pdf) throws IOException 
-    {
-        output.write("</body></html>");      
-    }
-
-    /**
-     * This method will attempt to guess the title of the document.
-     * 
-     * @param textIter The characters on the first page.
-     * @return The text position that is guessed to be the title.
-     */
-    protected TextPosition guessTitle(Iterator textIter) 
-    {
-        float lastFontSize = -1.0f;
-        int stringsInFont = 0;
-        StringBuffer titleText = new StringBuffer();
-        while (textIter.hasNext()) 
-        {
-            Iterator textByArticle = ((List)textIter.next()).iterator();
-            while( textByArticle.hasNext() )
-            {
-                TextPosition position = (TextPosition) textByArticle.next();
-                float currentFontSize = position.getFontSize();
-                if (currentFontSize != lastFontSize) 
-                {
-                    if (beginTitle != null) 
-                    { // font change in candidate title.
-                        if (stringsInFont == 0) 
-                        {
-                            beginTitle = null; // false alarm
-                            titleText.setLength(0);
-                        } 
-                        else 
-                        {
-                            // had a significant font with some words: call it a title
-                            titleGuess = titleText.toString();
-                            log.debug("Title candidate =" + titleGuess);
-                            afterEndTitle = position;
-                            return beginTitle;
-                        }
-                    } 
-                    else 
-                    { // font change and begin == null
-                        if (currentFontSize > 13.0f) 
-                        { // most body text is 12pt max I guess
-                            beginTitle = position;
-                        }
-                    }
-         
-                    lastFontSize = currentFontSize;
-                    stringsInFont = 0;
-                } 
-                stringsInFont++;
-                if (beginTitle != null)
-                {
-                    titleText.append(position.getCharacter()+" ");
-                }
-            }
-        }
-        return beginTitle; // null
-    }
-    
-    /**
-     * Write out the paragraph separator.
-     * 
-     * @throws IOException If there is an error writing to the stream.
-     */
-    protected void startParagraph() throws IOException 
-    {
-        if (! suppressParagraphs) 
-        {
-            getOutput().write("<p>");
-        }
-    }
-    /**
-     * Write out the paragraph separator.
-     * 
-     * @throws IOException If there is an error writing to the stream.
-     */
-    protected void endParagraph() throws IOException 
-    {
-        if (! suppressParagraphs) 
-        {
-            getOutput().write("</p>");
-        }
-    }
-    
-    /**
-     * @see PDFTextStripper#writeCharacters( TextPosition )
-     */
-    protected void writeCharacters(TextPosition position ) throws IOException 
-    {
-        if (position == beginTitle) 
-        {
-            output.write("<H1>");
-            suppressParagraphs = true;
-        } 
-        if (position == afterEndTitle) 
-        {
-            output.write("</H1>");  // end title and start first paragraph
-            suppressParagraphs = false;
-        }
-      
-        String chars = position.getCharacter();
-
-        for (int i = 0; i < chars.length(); i++) 
-        {
-            char c = chars.charAt(i);
-            if ((c < 32) || (c > 126)) 
-            {
-                int charAsInt = c;
-                output.write("&#" + charAsInt + ";");
-            } 
-            else 
-            {
-                switch (c) 
-                {
-                    case 34:
-                        output.write("&quot;");
-                        break;
-                    case 38:
-                        output.write("&amp;");
-                        break;
-                    case 60:
-                        output.write("&lt;");
-                        break;
-                    case 62:
-                        output.write("&gt;");
-                        break;
-                    default:
-                        output.write(c);
-                }
-            }
-        }
-    }
-    
-    /**
-     * @return Returns the suppressParagraphs.
-     */
-    public boolean isSuppressParagraphs()
-    {
-        return suppressParagraphs;
-    }
-    /**
-     * @param shouldSuppressParagraphs The suppressParagraphs to set.
-     */
-    public void setSuppressParagraphs(boolean shouldSuppressParagraphs)
-    {
-        this.suppressParagraphs = shouldSuppressParagraphs;
-    }
-}
-\ No newline at end of file