1 files changed, 201 insertions, 0 deletions
diff --git a/src/main/java/org/pdfbox/util/Splitter.java b/src/main/java/org/pdfbox/util/Splitter.java
new file mode 100644
index 0000000..c03a989
--- /dev/null
+++ b/src/main/java/org/pdfbox/util/Splitter.java
@@ -0,0 +1,201 @@
+/**
+ * Copyright (c) 2004-2005, www.pdfbox.org
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 3. Neither the name of pdfbox; nor the names of its
+ *    contributors may be used to endorse or promote products derived from this
+ *    software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * http://www.pdfbox.org
+ *
+ */
+package org.pdfbox.util;
+
+import org.pdfbox.pdmodel.PDDocument;
+import org.pdfbox.pdmodel.PDPage;
+
+import java.io.IOException;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * Split a document into several other documents.
+ *
+ * @author Mario Ivankovits (mario@ops.co.at)
+ * @author Ben Litchfield (ben@benlitchfield.com)
+ * @version $Revision: 1.6 $
+ */
+public class Splitter
+{
+
+    /**
+     * The source PDF document.
+     */
+    protected PDDocument pdfDocument;
+    
+    /**
+     * The current PDF document that contains the splitted page.
+     */
+    protected PDDocument currentDocument = null;
+    
+    private int splitAtPage=1;
+    private List newDocuments = null;
+
+    /**
+     * The current page number that we are processing, zero based.
+     */
+    protected int pageNumber = 0;
+
+    /**
+     * This will take a document and split into several other documents.
+     *
+     * @param document The document to split.
+     *
+     * @return A list of all the split documents.
+     *
+     * @throws IOException If there is an IOError
+     */
+    public List split( PDDocument document ) throws IOException
+    {
+        newDocuments = new ArrayList();
+        pdfDocument = document;
+
+        List pages = pdfDocument.getDocumentCatalog().getAllPages();
+        processPages(pages);
+        return newDocuments;
+    }
+
+    /**
+     * This will tell the splitting algorithm where to split the pages.  The default
+     * is 1, so every page will become a new document.  If it was to then each document would
+     * contain 2 pages.  So it the source document had 5 pages it would split into
+     * 3 new documents, 2 documents containing 2 pages and 1 document containing one
+     * page.
+     *
+     * @param split The number of pages each split document should contain.
+     */
+    public void setSplitAtPage( int split )
+    {
+        if( split <= 0 )
+        {
+            throw new RuntimeException( "Error split must be at least one page." );
+        }
+        splitAtPage = split;
+    }
+
+    /**
+     * This will return how many pages each split document will contain.
+     *
+     * @return The split parameter.
+     */
+    public int getSplitAtPage()
+    {
+        return splitAtPage;
+    }
+
+    /**
+     * Interface method to handle the start of the page processing.
+     *
+     * @param pages The list of pages from the source document.
+     *
+     * @throws IOException If an IO error occurs.
+     */
+    protected void processPages(List pages) throws IOException
+    {
+        Iterator iter = pages.iterator();
+        while( iter.hasNext() )
+        {
+            PDPage page = (PDPage)iter.next();
+            processNextPage( page );
+        }
+    }
+    
+    /**
+     * Interface method, you can control where a document gets split by implementing
+     * this method.  By default a split occurs at every page.  If you wanted to split
+     * based on some complex logic then you could override this method.  For example.
+     * <code>
+     * protected void createNewDocumentIfNecessary()
+     * {
+     *     if( isPrime( pageNumber ) )
+     *     {
+     *         super.createNewDocumentIfNecessary();
+     *     }
+     * }
+     * </code>
+     *
+     * @throws IOException If there is an error creating the new document.
+     */
+    protected void createNewDocumentIfNecessary() throws IOException 
+    {
+        if (isNewDocNecessary()) 
+        {
+            createNewDocument();
+        }
+    }
+    
+    /**
+     * Check if it is necessary to create a new document.
+     * 
+     * @return true If a new document should be created.
+     */
+    protected boolean isNewDocNecessary() 
+    {
+        return pageNumber % splitAtPage == 0 || currentDocument == null;
+    }
+    
+    /**
+     * Create a new document to write the splitted contents to.
+     * 
+     * @throws IOException If there is an problem creating the new document.
+     */
+    protected void createNewDocument() throws IOException 
+    {
+        currentDocument = new PDDocument();
+        currentDocument.setDocumentInformation(pdfDocument.getDocumentInformation());
+        currentDocument.getDocumentCatalog().setViewerPreferences(
+        pdfDocument.getDocumentCatalog().getViewerPreferences());
+        newDocuments.add(currentDocument);
+    }
+
+
+
+    /**
+     * Interface to start processing a new page.
+     *
+     * @param page The page that is about to get processed.
+     *
+     * @throws IOException If there is an error creating the new document.
+     */
+    protected void processNextPage( PDPage page ) throws IOException
+    {
+        createNewDocumentIfNecessary();
+        PDPage imported = currentDocument.importPage( page );
+        imported.setCropBox( page.findCropBox() );
+        imported.setMediaBox( page.findMediaBox() );
+        imported.setResources( page.findResources() );
+        imported.setRotation( page.findRotation() );
+        pageNumber++;
+    }
+}
+\ No newline at end of file