aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java
diff options
context:
space:
mode:
authortknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2013-01-09 15:41:29 +0000
committertknall <tknall@7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c>2013-01-09 15:41:29 +0000
commit535a04fa05f739ec16dd81666e3b0f82dfbd442d (patch)
tree0804f301c1a9ceb303a8441b7b29244fc8eb7ff0 /src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java
parent1efaf6fd5619dfa95c9d7e8c71eda4c2ffba4998 (diff)
downloadpdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.tar.gz
pdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.tar.bz2
pdf-as-3-535a04fa05f739ec16dd81666e3b0f82dfbd442d.zip
pdf-as-lib maven project files moved to pdf-as-lib
git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/pdf-as/trunk@926 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c
Diffstat (limited to 'src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java')
-rw-r--r--src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java272
1 files changed, 0 insertions, 272 deletions
diff --git a/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java b/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java
deleted file mode 100644
index fbaa4de..0000000
--- a/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java
+++ /dev/null
@@ -1,272 +0,0 @@
-/**
- * <copyright> Copyright 2006 by Know-Center, Graz, Austria </copyright>
- * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a
- * joint initiative of the Federal Chancellery Austria and Graz University of
- * Technology.
- *
- * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
- * the European Commission - subsequent versions of the EUPL (the "Licence");
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- * http://www.osor.eu/eupl/
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
- * limitations under the Licence.
- *
- * This product combines work with different licenses. See the "NOTICE" text
- * file for details on the various modules and licenses.
- * The "NOTICE" text file is part of the distribution. Any derivative works
- * that you distribute must include a readable copy of the "NOTICE" text file.
- *
- * $Id: ParseDocument.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $
- */
-package at.knowcenter.wag.exactparser;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import at.knowcenter.wag.exactparser.parsing.PDFUtils;
-import at.knowcenter.wag.exactparser.parsing.results.DictionaryParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.IndirectObjectReferenceParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.NameParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.NumberParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.ObjectParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.StartXRefParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.TrailerParseResult;
-import at.knowcenter.wag.exactparser.parsing.results.XRefSectionParseResult;
-
-
-/**
- * Test class.
- * @author wprinz
- */
-public class ParseDocument
-{
-
- public static final String DOCUMENT = "C:/wprinz/temp.pdf";
-
- public static final byte[] EGIZ_DICT_NAME = { 'E', 'G', 'I', 'Z', 'S', 'i',
- 'g', 'D', 'i', 'c', 't' };
-
- public static final byte[] EGIZ_ODS_NAME = { 'O', 'D', 'S' };
-
- public static final byte[] EGIZ_XOBJ_NAME = { 'S', 'i', 'g', 'X', 'O', 'b',
- 'j', 'e', 'c', 't' };
-
- /**
- * @param args
- */
- public static void main(String[] args)
- {
-
- try
- {
- File in = new File(DOCUMENT);
- FileInputStream fis = new FileInputStream(in);
- byte[] pdf = new byte[(int) in.length()];
- fis.read(pdf);
- fis.close();
- fis = null;
-
- List blocks = parseDocument(pdf);
-
- Iterator it = blocks.iterator();
- while (it.hasNext())
- {
- FooterParseResult bpr = (FooterParseResult) it.next();
-
- System.out.print("block from " + bpr.start_index + " to " + bpr.next_index);
-
- if (bpr.tpr.root != null)
- {
- int root_index = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(bpr.xpr, bpr.tpr.root.ior);
- ObjectParseResult root_opr = PDFUtils.parseObject(pdf, root_index);
- DictionaryParseResult root_dpr = (DictionaryParseResult) root_opr.object;
-
- int egiz_index = PDFUtils.indexOfName(pdf, root_dpr.names, EGIZ_DICT_NAME);
- if (egiz_index >= 0)
- {
- System.out.print(" == EGIZDict");
- }
- }
-
- System.out.println();
- }
-
- }
- catch (IOException e)
- {
- e.printStackTrace();
- }
- }
-
- public static List parseDocument(final byte[] pdf) throws IOException
- {
- //HeaderParseResult hpr = PDFUtils.parseHeader(pdf, 0);
- //System.out.println("PDF-version = " + hpr.major + "." + hpr.minor);
-
- List blocks = new ArrayList();
-
- int last_start_xref = PDFUtils.findLastStartXRef(pdf);
- StartXRefParseResult last_sxpr = PDFUtils.parseStartXRef(pdf, last_start_xref);
- int xref_index = last_sxpr.xref_index;
-
- for (;;)
- {
- FooterParseResult fpr = PDFUtils.parseFooter(pdf, xref_index);
- blocks.add(0, fpr);
-
- //System.out.println("tpr.has_predecessor = " + fpr.tpr.has_predecessor);
- if (!fpr.tpr.has_predecessor)
- {
- // eventually parse the PDF header here.
- break;
- }
-
- //System.out.println("tpr.prev = " + fpr.tpr.getPrev());
-
- xref_index = fpr.tpr.getPrev();
- }
-
- return blocks;
- }
-
- // public static void parseEGIZ()
- // {
- //
- // int root_index =
- // PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(bpr.xpr,
- // bpr.tpr.root.ior);
- // ObjectParseResult root_opr = PDFUtils.parseObject(pdf, root_index);
- // DictionaryParseResult root_dpr = (DictionaryParseResult) root_opr.object;
- //
- // int egiz_index = PDFUtils.indexOfName(pdf, root_dpr.names, EGIZ_DICT_NAME);
- // if (egiz_index >= 0)
- // {
- // IndirectObjectReferenceParseResult egiz_iorpr =
- // (IndirectObjectReferenceParseResult) root_dpr.values.get(egiz_index);
- // System.out.println("EGIZ signature info at = " + egiz_iorpr);
- //
- // int egiz_dict_index =
- // PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(bpr.xpr,
- // egiz_iorpr.ior);
- // ObjectParseResult opr = PDFUtils.parseObject(pdf, egiz_dict_index);
- // DictionaryParseResult egiz_dict = (DictionaryParseResult) opr.object;
- //
- // for (int i = 0; i < egiz_dict.names.size(); i++)
- // {
- // NameParseResult npr = egiz_dict.names.get(i);
- // int len = npr.next_index - npr.name_start_index;
- // byte[] name = new byte[len];
- // System.arraycopy(pdf, npr.name_start_index, name, 0, len);
- // System.out.print(" " + new String(name, "US-ASCII") + " = ");
- //
- // System.out.println(egiz_dict.values.get(i));
- // }
- //
- // // int key = PDFUtils.indexOfName(pdf, egiz_dict.names, new byte [] { 'K',
- // // 'e', 'y'});
- // // IndirectObjectReferenceParseResult key_iorpr =
- // // (IndirectObjectReferenceParseResult) egiz_dict.values.get(key);
- // // int key_offset =
- // // PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(xpr,
- // // key_iorpr.ior);
- // // ObjectParseResult key_opr = PDFUtils.parseObject(pdf, key_offset);
- // // StreamParseResult spr = (StreamParseResult) key_opr.object;
- // // System.out.println(" key stream from " + spr.content_start_index + " to
- // // " + spr.content_end_index);
- // //
- // // int data_len = spr.content_end_index - spr.content_start_index;
- // // byte [] data = new byte[data_len];
- // // System.arraycopy(pdf, spr.content_start_index, data, 0, data_len);
- // // System.out.println(new String(data, "US-ASCII"));
- //
- // }
- // else
- // {
- // System.out.println("No EGIZ block found.");
- // }
- //
- // }
-
- public static byte[] getOriginalDocument(final File file_name) throws IOException
- {
- FileInputStream fis = new FileInputStream(file_name);
- byte[] pdf = new byte[(int) file_name.length()];
- fis.read(pdf);
- fis.close();
- fis = null;
-
- int last_start_xref = PDFUtils.findLastStartXRef(pdf);
-
- StartXRefParseResult sxpr = PDFUtils.parseStartXRef(pdf, last_start_xref);
-
- XRefSectionParseResult xpr = PDFUtils.parseXRefSection(pdf, sxpr.xref_index);
-
- TrailerParseResult tpr = PDFUtils.parseTrailer(pdf, xpr.next_index);
-
- System.out.println("tpr.info = " + tpr.info);
- System.out.println("tpr.root = " + tpr.root);
- System.out.println("tpr.size = " + tpr.size);
-
- System.out.println("tpr.has_predecessor = " + tpr.has_predecessor);
- if (tpr.has_predecessor)
- {
- System.out.println("tpr.prev = " + tpr.getPrev());
- }
-
- int root_index = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(xpr, tpr.root.ior);
- ObjectParseResult root_opr = PDFUtils.parseObject(pdf, root_index);
- DictionaryParseResult root_dpr = (DictionaryParseResult) root_opr.object;
-
- byte[] EGIZ_TYPE = new String("EGIZSigDict").getBytes("US-ASCII");
- int egiz_index = PDFUtils.indexOfName(pdf, root_dpr.names, EGIZ_TYPE);
- if (egiz_index >= 0)
- {
- System.out.println("The document is EGIZ-signed. ==> extract original document");
-
- IndirectObjectReferenceParseResult egiz_iorpr = (IndirectObjectReferenceParseResult) root_dpr.values.get(egiz_index);
- System.out.println("EGIZ signature info at = " + egiz_iorpr);
-
- int egiz_dict_index = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(xpr, egiz_iorpr.ior);
- ObjectParseResult opr = PDFUtils.parseObject(pdf, egiz_dict_index);
- DictionaryParseResult egiz_dict = (DictionaryParseResult) opr.object;
-
- for (int i = 0; i < egiz_dict.names.size(); i++)
- {
- NameParseResult npr = (NameParseResult) egiz_dict.names.get(i);
- int len = npr.next_index - npr.name_start_index;
- byte[] name = new byte[len];
- System.arraycopy(pdf, npr.name_start_index, name, 0, len);
- System.out.print(" " + new String(name, "US-ASCII") + " = ");
-
- System.out.println(egiz_dict.values.get(i));
- }
-
- // Original document size
- int key = PDFUtils.indexOfName(pdf, egiz_dict.names, new byte[] { 'O',
- 'D', 'S' });
- NumberParseResult ods = (NumberParseResult) egiz_dict.values.get(key);
-
- int original_document_size = ods.number;
- System.out.println("Original Document Size = " + original_document_size);
-
- byte[] original = new byte[original_document_size];
- System.arraycopy(pdf, 0, original, 0, original_document_size);
-
- return original;
- }
-
- System.out.println("No EGIZ block found. ==> the whold document is the original document");
- return pdf;
- }
-
-}