From 535a04fa05f739ec16dd81666e3b0f82dfbd442d Mon Sep 17 00:00:00 2001 From: tknall Date: Wed, 9 Jan 2013 15:41:29 +0000 Subject: pdf-as-lib maven project files moved to pdf-as-lib git-svn-id: https://joinup.ec.europa.eu/svn/pdf-as/pdf-as/trunk@926 7b5415b0-85f9-ee4d-85bd-d5d0c3b42d1c --- .../knowcenter/wag/exactparser/ParseDocument.java | 272 --------------------- 1 file changed, 272 deletions(-) delete mode 100644 src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java (limited to 'src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java') diff --git a/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java b/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java deleted file mode 100644 index fbaa4de..0000000 --- a/src/main/java/at/knowcenter/wag/exactparser/ParseDocument.java +++ /dev/null @@ -1,272 +0,0 @@ -/** - * Copyright 2006 by Know-Center, Graz, Austria - * PDF-AS has been contracted by the E-Government Innovation Center EGIZ, a - * joint initiative of the Federal Chancellery Austria and Graz University of - * Technology. - * - * Licensed under the EUPL, Version 1.1 or - as soon they will be approved by - * the European Commission - subsequent versions of the EUPL (the "Licence"); - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * http://www.osor.eu/eupl/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and - * limitations under the Licence. - * - * This product combines work with different licenses. See the "NOTICE" text - * file for details on the various modules and licenses. - * The "NOTICE" text file is part of the distribution. Any derivative works - * that you distribute must include a readable copy of the "NOTICE" text file. - * - * $Id: ParseDocument.java,v 1.1 2006/08/25 17:00:59 wprinz Exp $ - */ -package at.knowcenter.wag.exactparser; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import at.knowcenter.wag.exactparser.parsing.PDFUtils; -import at.knowcenter.wag.exactparser.parsing.results.DictionaryParseResult; -import at.knowcenter.wag.exactparser.parsing.results.FooterParseResult; -import at.knowcenter.wag.exactparser.parsing.results.IndirectObjectReferenceParseResult; -import at.knowcenter.wag.exactparser.parsing.results.NameParseResult; -import at.knowcenter.wag.exactparser.parsing.results.NumberParseResult; -import at.knowcenter.wag.exactparser.parsing.results.ObjectParseResult; -import at.knowcenter.wag.exactparser.parsing.results.StartXRefParseResult; -import at.knowcenter.wag.exactparser.parsing.results.TrailerParseResult; -import at.knowcenter.wag.exactparser.parsing.results.XRefSectionParseResult; - - -/** - * Test class. - * @author wprinz - */ -public class ParseDocument -{ - - public static final String DOCUMENT = "C:/wprinz/temp.pdf"; - - public static final byte[] EGIZ_DICT_NAME = { 'E', 'G', 'I', 'Z', 'S', 'i', - 'g', 'D', 'i', 'c', 't' }; - - public static final byte[] EGIZ_ODS_NAME = { 'O', 'D', 'S' }; - - public static final byte[] EGIZ_XOBJ_NAME = { 'S', 'i', 'g', 'X', 'O', 'b', - 'j', 'e', 'c', 't' }; - - /** - * @param args - */ - public static void main(String[] args) - { - - try - { - File in = new File(DOCUMENT); - FileInputStream fis = new FileInputStream(in); - byte[] pdf = new byte[(int) in.length()]; - fis.read(pdf); - fis.close(); - fis = null; - - List blocks = parseDocument(pdf); - - Iterator it = blocks.iterator(); - while (it.hasNext()) - { - FooterParseResult bpr = (FooterParseResult) it.next(); - - System.out.print("block from " + bpr.start_index + " to " + bpr.next_index); - - if (bpr.tpr.root != null) - { - int root_index = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(bpr.xpr, bpr.tpr.root.ior); - ObjectParseResult root_opr = PDFUtils.parseObject(pdf, root_index); - DictionaryParseResult root_dpr = (DictionaryParseResult) root_opr.object; - - int egiz_index = PDFUtils.indexOfName(pdf, root_dpr.names, EGIZ_DICT_NAME); - if (egiz_index >= 0) - { - System.out.print(" == EGIZDict"); - } - } - - System.out.println(); - } - - } - catch (IOException e) - { - e.printStackTrace(); - } - } - - public static List parseDocument(final byte[] pdf) throws IOException - { - //HeaderParseResult hpr = PDFUtils.parseHeader(pdf, 0); - //System.out.println("PDF-version = " + hpr.major + "." + hpr.minor); - - List blocks = new ArrayList(); - - int last_start_xref = PDFUtils.findLastStartXRef(pdf); - StartXRefParseResult last_sxpr = PDFUtils.parseStartXRef(pdf, last_start_xref); - int xref_index = last_sxpr.xref_index; - - for (;;) - { - FooterParseResult fpr = PDFUtils.parseFooter(pdf, xref_index); - blocks.add(0, fpr); - - //System.out.println("tpr.has_predecessor = " + fpr.tpr.has_predecessor); - if (!fpr.tpr.has_predecessor) - { - // eventually parse the PDF header here. - break; - } - - //System.out.println("tpr.prev = " + fpr.tpr.getPrev()); - - xref_index = fpr.tpr.getPrev(); - } - - return blocks; - } - - // public static void parseEGIZ() - // { - // - // int root_index = - // PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(bpr.xpr, - // bpr.tpr.root.ior); - // ObjectParseResult root_opr = PDFUtils.parseObject(pdf, root_index); - // DictionaryParseResult root_dpr = (DictionaryParseResult) root_opr.object; - // - // int egiz_index = PDFUtils.indexOfName(pdf, root_dpr.names, EGIZ_DICT_NAME); - // if (egiz_index >= 0) - // { - // IndirectObjectReferenceParseResult egiz_iorpr = - // (IndirectObjectReferenceParseResult) root_dpr.values.get(egiz_index); - // System.out.println("EGIZ signature info at = " + egiz_iorpr); - // - // int egiz_dict_index = - // PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(bpr.xpr, - // egiz_iorpr.ior); - // ObjectParseResult opr = PDFUtils.parseObject(pdf, egiz_dict_index); - // DictionaryParseResult egiz_dict = (DictionaryParseResult) opr.object; - // - // for (int i = 0; i < egiz_dict.names.size(); i++) - // { - // NameParseResult npr = egiz_dict.names.get(i); - // int len = npr.next_index - npr.name_start_index; - // byte[] name = new byte[len]; - // System.arraycopy(pdf, npr.name_start_index, name, 0, len); - // System.out.print(" " + new String(name, "US-ASCII") + " = "); - // - // System.out.println(egiz_dict.values.get(i)); - // } - // - // // int key = PDFUtils.indexOfName(pdf, egiz_dict.names, new byte [] { 'K', - // // 'e', 'y'}); - // // IndirectObjectReferenceParseResult key_iorpr = - // // (IndirectObjectReferenceParseResult) egiz_dict.values.get(key); - // // int key_offset = - // // PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(xpr, - // // key_iorpr.ior); - // // ObjectParseResult key_opr = PDFUtils.parseObject(pdf, key_offset); - // // StreamParseResult spr = (StreamParseResult) key_opr.object; - // // System.out.println(" key stream from " + spr.content_start_index + " to - // // " + spr.content_end_index); - // // - // // int data_len = spr.content_end_index - spr.content_start_index; - // // byte [] data = new byte[data_len]; - // // System.arraycopy(pdf, spr.content_start_index, data, 0, data_len); - // // System.out.println(new String(data, "US-ASCII")); - // - // } - // else - // { - // System.out.println("No EGIZ block found."); - // } - // - // } - - public static byte[] getOriginalDocument(final File file_name) throws IOException - { - FileInputStream fis = new FileInputStream(file_name); - byte[] pdf = new byte[(int) file_name.length()]; - fis.read(pdf); - fis.close(); - fis = null; - - int last_start_xref = PDFUtils.findLastStartXRef(pdf); - - StartXRefParseResult sxpr = PDFUtils.parseStartXRef(pdf, last_start_xref); - - XRefSectionParseResult xpr = PDFUtils.parseXRefSection(pdf, sxpr.xref_index); - - TrailerParseResult tpr = PDFUtils.parseTrailer(pdf, xpr.next_index); - - System.out.println("tpr.info = " + tpr.info); - System.out.println("tpr.root = " + tpr.root); - System.out.println("tpr.size = " + tpr.size); - - System.out.println("tpr.has_predecessor = " + tpr.has_predecessor); - if (tpr.has_predecessor) - { - System.out.println("tpr.prev = " + tpr.getPrev()); - } - - int root_index = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(xpr, tpr.root.ior); - ObjectParseResult root_opr = PDFUtils.parseObject(pdf, root_index); - DictionaryParseResult root_dpr = (DictionaryParseResult) root_opr.object; - - byte[] EGIZ_TYPE = new String("EGIZSigDict").getBytes("US-ASCII"); - int egiz_index = PDFUtils.indexOfName(pdf, root_dpr.names, EGIZ_TYPE); - if (egiz_index >= 0) - { - System.out.println("The document is EGIZ-signed. ==> extract original document"); - - IndirectObjectReferenceParseResult egiz_iorpr = (IndirectObjectReferenceParseResult) root_dpr.values.get(egiz_index); - System.out.println("EGIZ signature info at = " + egiz_iorpr); - - int egiz_dict_index = PDFUtils.getObjectOffsetFromXRefByIndirectObjectReference(xpr, egiz_iorpr.ior); - ObjectParseResult opr = PDFUtils.parseObject(pdf, egiz_dict_index); - DictionaryParseResult egiz_dict = (DictionaryParseResult) opr.object; - - for (int i = 0; i < egiz_dict.names.size(); i++) - { - NameParseResult npr = (NameParseResult) egiz_dict.names.get(i); - int len = npr.next_index - npr.name_start_index; - byte[] name = new byte[len]; - System.arraycopy(pdf, npr.name_start_index, name, 0, len); - System.out.print(" " + new String(name, "US-ASCII") + " = "); - - System.out.println(egiz_dict.values.get(i)); - } - - // Original document size - int key = PDFUtils.indexOfName(pdf, egiz_dict.names, new byte[] { 'O', - 'D', 'S' }); - NumberParseResult ods = (NumberParseResult) egiz_dict.values.get(key); - - int original_document_size = ods.number; - System.out.println("Original Document Size = " + original_document_size); - - byte[] original = new byte[original_document_size]; - System.arraycopy(pdf, 0, original, 0, original_document_size); - - return original; - } - - System.out.println("No EGIZ block found. ==> the whold document is the original document"); - return pdf; - } - -} -- cgit v1.2.3