diff options
Diffstat (limited to 'eaaf_core/src/main/java/at/gv/egiz/eaaf/core/impl/utils/DomUtils.java')
-rw-r--r-- | eaaf_core/src/main/java/at/gv/egiz/eaaf/core/impl/utils/DomUtils.java | 1158 |
1 files changed, 1158 insertions, 0 deletions
diff --git a/eaaf_core/src/main/java/at/gv/egiz/eaaf/core/impl/utils/DomUtils.java b/eaaf_core/src/main/java/at/gv/egiz/eaaf/core/impl/utils/DomUtils.java new file mode 100644 index 00000000..818523d0 --- /dev/null +++ b/eaaf_core/src/main/java/at/gv/egiz/eaaf/core/impl/utils/DomUtils.java @@ -0,0 +1,1158 @@ +/* + * Copyright 2017 Graz University of Technology EAAF-Core Components has been developed in a + * cooperation between EGIZ, A-SIT Plus, A-SIT, and Graz University of Technology. + * + * Licensed under the EUPL, Version 1.2 or - as soon they will be approved by the European + * Commission - subsequent versions of the EUPL (the "Licence"); You may not use this work except in + * compliance with the Licence. You may obtain a copy of the Licence at: + * https://joinup.ec.europa.eu/news/understanding-eupl-v12 + * + * Unless required by applicable law or agreed to in writing, software distributed under the Licence + * is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the Licence for the specific language governing permissions and limitations under + * the Licence. + * + * This product combines work with different licenses. See the "NOTICE" text file for details on the + * various modules and licenses. The "NOTICE" text file is part of the distribution. Any derivative + * works that you distribute must include a readable copy of the "NOTICE" text file. +*/ + + + +package at.gv.egiz.eaaf.core.impl.utils; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.Vector; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Result; +import javax.xml.transform.Source; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import at.gv.egiz.eaaf.core.api.data.XMLNamespaceConstants; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.xerces.parsers.DOMParser; +import org.apache.xerces.parsers.SAXParser; +import org.apache.xerces.parsers.XMLGrammarPreparser; +import org.apache.xerces.util.SymbolTable; +import org.apache.xerces.util.XMLGrammarPoolImpl; +import org.apache.xerces.xni.grammars.XMLGrammarDescription; +import org.apache.xerces.xni.grammars.XMLGrammarPool; +import org.apache.xerces.xni.parser.XMLInputSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Attr; +import org.w3c.dom.Document; +import org.w3c.dom.DocumentFragment; +import org.w3c.dom.Element; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.EntityResolver; +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +/** + * Various utility functions for handling XML DOM trees. + * + *<p> + * The parsing methods in this class make use of some features internal to the Xerces DOM parser, + * mainly for performance reasons. As soon as JAXP (currently at version 1.2) is better at schema + * handling, it should be used as the parser interface. + *</p> + * + */ +public class DomUtils { + private static final Logger log = LoggerFactory.getLogger(DomUtils.class); + + /** Feature URI for namespace aware parsing. */ + private static final String NAMESPACES_FEATURE = "http://xml.org/sax/features/namespaces"; + /** Feature URI for validating parsing. */ + private static final String VALIDATION_FEATURE = "http://xml.org/sax/features/validation"; + /** Feature URI for schema validating parsing. */ + private static final String SCHEMA_VALIDATION_FEATURE = + "http://apache.org/xml/features/validation/schema"; + /** Feature URI for normalization of element/attribute values. */ + private static final String NORMALIZED_VALUE_FEATURE = + "http://apache.org/xml/features/validation/schema/normalized-value"; + /** Feature URI for parsing ignorable whitespace. */ + private static final String INCLUDE_IGNORABLE_WHITESPACE_FEATURE = + "http://apache.org/xml/features/dom/include-ignorable-whitespace"; + /** Feature URI for creating EntityReference nodes in the DOM tree. */ + private static final String CREATE_ENTITY_REF_NODES_FEATURE = + "http://apache.org/xml/features/dom/create-entity-ref-nodes"; + /** Property URI for providing external schema locations. */ + private static final String EXTERNAL_SCHEMA_LOCATION_PROPERTY = + "http://apache.org/xml/properties/schema/external-schemaLocation"; + /** + * Property URI for providing the external schema location for elements without a namespace. + */ + private static final String EXTERNAL_NO_NAMESPACE_SCHEMA_LOCATION_PROPERTY = + "http://apache.org/xml/properties/schema/external-noNamespaceSchemaLocation"; + + private static final String EXTERNAL_GENERAL_ENTITIES_FEATURE = + "http://xml.org/sax/features/external-general-entities"; + + private static final String EXTERNAL_PARAMETER_ENTITIES_FEATURE = + "http://xml.org/sax/features/external-parameter-entities"; + + public static final String DISALLOW_DOCTYPE_FEATURE = + "http://apache.org/xml/features/disallow-doctype-decl"; + + + + /** Property URI for the Xerces grammar pool. */ + private static final String GRAMMAR_POOL = org.apache.xerces.impl.Constants.XERCES_PROPERTY_PREFIX + + org.apache.xerces.impl.Constants.XMLGRAMMAR_POOL_PROPERTY; + /** A prime number for initializing the symbol table. */ + private static final int BIG_PRIME = 2039; + /** Symbol table for the grammar pool. */ + private static SymbolTable symbolTable = new SymbolTable(BIG_PRIME); + /** Xerces schema grammar pool. */ + private static XMLGrammarPool grammarPool = new XMLGrammarPoolImpl(); + /** + * Set holding the NamespaceURIs of the grammarPool, to prevent multiple entries of same grammars + * to the pool. + */ + private static Set grammarNamespaces; + + static { + grammarPool.lockPool(); + grammarNamespaces = new HashSet(); + } + + /** + * Preparse a schema and add it to the schema pool. The method only adds the schema to the pool if + * a schema having the same <code>systemId</code> (namespace URI) is not already present in the + * pool. + * + * @param inputStream An <code>InputStream</code> providing the contents of the schema. + * @param systemId The systemId (namespace URI) to use for the schema. + * @throws IOException An error occurred reading the schema. + */ + public static void addSchemaToPool(final InputStream inputStream, final String systemId) + throws IOException { + XMLGrammarPreparser preparser; + + if (!grammarNamespaces.contains(systemId)) { + + grammarNamespaces.add(systemId); + + // unlock the pool so that we can add another grammar + grammarPool.unlockPool(); + + // prepare the preparser + preparser = new XMLGrammarPreparser(symbolTable); + preparser.registerPreparser(XMLGrammarDescription.XML_SCHEMA, null); + preparser.setProperty(GRAMMAR_POOL, grammarPool); + preparser.setFeature(NAMESPACES_FEATURE, true); + preparser.setFeature(VALIDATION_FEATURE, true); + + // add the grammar to the pool + preparser.preparseGrammar(XMLGrammarDescription.XML_SCHEMA, + new XMLInputSource(null, systemId, null, inputStream, null)); + + // lock the pool again so that schemas are not added automatically + grammarPool.lockPool(); + } + } + + /** + * Parse an XML document from an <code>InputStream</code>. + * + *<p> + * It uses a <code>MOAEntityResolver</code> as the <code>EntityResolver</code> and a + * <code>MOAErrorHandler</code> as the <code>ErrorHandler</code>. + *</p> + * + * @param inputStream The <code>InputStream</code> containing the XML document. + * @param validating If <code>true</code>, parse validating. + * @param externalSchemaLocations A <code>String</code> containing namespace URI to schema + * location pairs, the same way it is accepted by the <code>xsi: + * schemaLocation</code> attribute. + * @param externalNoNamespaceSchemaLocation The schema location of the schema for elements without + * a namespace, the same way it is accepted by the + * <code>xsi:noNamespaceSchemaLocation</code> attribute. + * @param parserFeatures Map of features that should be set into XML parser + * @return The parsed XML document as a DOM tree. + * @throws SAXException An error occurred parsing the document. + * @throws IOException An error occurred reading the document. + * @throws ParserConfigurationException An error occurred configuring the XML parser. + */ + public static Document parseDocument(final InputStream inputStream, final boolean validating, + final String externalSchemaLocations, final String externalNoNamespaceSchemaLocation, + final Map<String, Object> parserFeatures) + throws SAXException, IOException, ParserConfigurationException { + + return parseDocument(inputStream, validating, externalSchemaLocations, + externalNoNamespaceSchemaLocation, new EaafDomEntityResolver(), null, parserFeatures); + } + + /** + * Parse an XML document from a <code>String</code>. + * + *<p> + * It uses a <code>MOAEntityResolver</code> as the <code>EntityResolver</code> and a + * <code>MOAErrorHandler</code> as the <code>ErrorHandler</code>. + *</p> + * + * @param xmlString The <code>String</code> containing the XML document. + * @param encoding The encoding of the XML document. + * @param validating If <code>true</code>, parse validating. + * @param externalSchemaLocations A <code>String</code> containing namespace URI to schema + * location pairs, the same way it is accepted by the <code>xsi: + * schemaLocation</code> attribute. + * @param externalNoNamespaceSchemaLocation The schema location of the schema for elements without + * a namespace, the same way it is accepted by the + * <code>xsi:noNamespaceSchemaLocation</code> attribute. + * @return The parsed XML document as a DOM tree. + * @throws SAXException An error occurred parsing the document. + * @throws IOException An error occurred reading the document. + * @throws ParserConfigurationException An error occurred configuring the XML parser. + */ + public static Document parseDocument(final String xmlString, final String encoding, + final boolean validating, final String externalSchemaLocations, + final String externalNoNamespaceSchemaLocation, final Map<String, Object> parserFeatures) + throws SAXException, IOException, ParserConfigurationException { + + final InputStream in = new ByteArrayInputStream(xmlString.getBytes(encoding)); + return parseDocument(in, validating, externalSchemaLocations, externalNoNamespaceSchemaLocation, + parserFeatures); + } + + /** + * Parse an XML document from a <code>String</code>. + * + *<p> + * It uses a <code>MOAEntityResolver</code> as the <code>EntityResolver</code> and a + * <code>MOAErrorHandler</code> as the <code>ErrorHandler</code>. + *</p> + * + * @param xmlString The <code>String</code> containing the XML document. + * @param encoding The encoding of the XML document. + * @param validating If <code>true</code>, parse validating. + * @param externalSchemaLocations A <code>String</code> containing namespace URI to schema + * location pairs, the same way it is accepted by the <code>xsi: + * schemaLocation</code> attribute. + * @param externalNoNamespaceSchemaLocation The schema location of the schema for elements without + * a namespace, the same way it is accepted by the + * <code>xsi:noNamespaceSchemaLocation</code> attribute. + * @return The parsed XML document as a DOM tree. + * @throws SAXException An error occurred parsing the document. + * @throws IOException An error occurred reading the document. + * @throws ParserConfigurationException An error occurred configuring the XML parser. + */ + public static Document parseDocument(final String xmlString, final String encoding, + final boolean validating, final String externalSchemaLocations, + final String externalNoNamespaceSchemaLocation) + throws SAXException, IOException, ParserConfigurationException { + + final InputStream in = new ByteArrayInputStream(xmlString.getBytes(encoding)); + return parseDocument(in, validating, externalSchemaLocations, externalNoNamespaceSchemaLocation, + null); + } + + /** + * Parse an UTF-8 encoded XML document from a <code>String</code>. + * + * @param xmlString The <code>String</code> containing the XML document. + * @param validating If <code>true</code>, parse validating. + * @param externalSchemaLocations A <code>String</code> containing namespace URI to schema + * location pairs, the same way it is accepted by the <code>xsi: + * schemaLocation</code> attribute. + * @param externalNoNamespaceSchemaLocation The schema location of the schema for elements without + * a namespace, the same way it is accepted by the + * <code>xsi:noNamespaceSchemaLocation</code> attribute. + * @return The parsed XML document as a DOM tree. + * @throws SAXException An error occurred parsing the document. + * @throws IOException An error occurred reading the document. + * @throws ParserConfigurationException An error occurred configuring the XML parser. + */ + public static Document parseDocument(final String xmlString, final boolean validating, + final String externalSchemaLocations, final String externalNoNamespaceSchemaLocation) + throws SAXException, IOException, ParserConfigurationException { + + return parseDocument(xmlString, "UTF-8", validating, externalSchemaLocations, + externalNoNamespaceSchemaLocation); + } + + /** + * Parse an XML document from an <code>InputStream</code>. + * + * @param inputStream The <code>InputStream</code> containing the XML document. + * @param validating If <code>true</code>, parse validating. + * @param externalSchemaLocations A <code>String</code> containing namespace URI to schema + * location pairs, the same way it is accepted by the <code>xsi: + * schemaLocation</code> attribute. + * @param externalNoNamespaceSchemaLocation The schema location of the schema for elements without + * a namespace, the same way it is accepted by the + * <code>xsi:noNamespaceSchemaLocation</code> attribute. + * @param entityResolver An <code>EntityResolver</code> to resolve external entities (schemas and + * DTDs). If <code>null</code>, it will not be set. + * @param errorHandler An <code>ErrorHandler</code> to decide what to do with parsing errors. If + * <code>null</code>, it will not be set. + * @return The parsed XML document as a DOM tree. + * @throws SAXException An error occurred parsing the document. + * @throws IOException An error occurred reading the document. + * @throws ParserConfigurationException An error occurred configuring the XML parser. + */ + public static Document parseDocument(final InputStream inputStream, final boolean validating, + final String externalSchemaLocations, final String externalNoNamespaceSchemaLocation, + final EntityResolver entityResolver, final ErrorHandler errorHandler, + final Map<String, Object> parserFeatures) + throws SAXException, IOException, ParserConfigurationException { + + DOMParser parser; + + // class MyEntityResolver implements EntityResolver { + // + // public InputSource resolveEntity(String publicId, String systemId) + // throws SAXException, IOException { + // return new InputSource(new ByteArrayInputStream(new byte[0])); + // } + // } + + + // if Debug is enabled make a copy of inputStream to enable debug output in case of SAXException + byte[] buffer = null; + ByteArrayInputStream baStream = null; + if (true == log.isDebugEnabled()) { + buffer = IOUtils.toByteArray(inputStream); + baStream = new ByteArrayInputStream(buffer); + + } + + + + // create the DOM parser + if (symbolTable != null) { + parser = new DOMParser(symbolTable, grammarPool); + } else { + parser = new DOMParser(); + } + + // set parser features and properties + try { + parser.setFeature(NAMESPACES_FEATURE, true); + parser.setFeature(VALIDATION_FEATURE, validating); + parser.setFeature(SCHEMA_VALIDATION_FEATURE, validating); + parser.setFeature(NORMALIZED_VALUE_FEATURE, false); + parser.setFeature(INCLUDE_IGNORABLE_WHITESPACE_FEATURE, true); + parser.setFeature(CREATE_ENTITY_REF_NODES_FEATURE, false); + parser.setFeature(EXTERNAL_GENERAL_ENTITIES_FEATURE, false); + parser.setFeature(EXTERNAL_PARAMETER_ENTITIES_FEATURE, false); + + // set external added parser features + if (parserFeatures != null) { + for (final Entry<String, Object> el : parserFeatures.entrySet()) { + final String key = el.getKey(); + if (StringUtils.isNotEmpty(key)) { + final Object value = el.getValue(); + if (value != null && value instanceof Boolean) { + parser.setFeature(key, (boolean) value); + } else { + log.warn("This XML parser only allows features with 'boolean' values"); + } + + } else { + log.warn("Can not set 'null' feature to XML parser"); + } + } + } + + // fix XXE problem + // parser.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + + + if (validating) { + if (externalSchemaLocations != null) { + parser.setProperty(EXTERNAL_SCHEMA_LOCATION_PROPERTY, externalSchemaLocations); + } + if (externalNoNamespaceSchemaLocation != null) { + parser.setProperty(EXTERNAL_NO_NAMESPACE_SCHEMA_LOCATION_PROPERTY, + externalNoNamespaceSchemaLocation); + } + } + + // set entity resolver and error handler + if (entityResolver != null) { + parser.setEntityResolver(entityResolver); + } + if (errorHandler != null) { + parser.setErrorHandler(errorHandler); + } + + // parse the document and return it + // if debug is enabled: use copy of strem (baStream) else use orig stream + if (null != baStream) { + parser.parse(new InputSource(baStream)); + } else { + parser.parse(new InputSource(inputStream)); + } + } catch (final SAXException e) { + if (true == log.isDebugEnabled() && null != buffer) { + final String xmlContent = new String(buffer, "UTF-8"); + log.debug("SAXException in:\n" + xmlContent); + } + throw (e); + } + + return parser.getDocument(); + } + + /** + * Simple document parser. + * + * @param inputStream data to parse + * @return Element + * @throws SAXException In case of an error + * @throws IOException In case of an error + * @throws ParserConfigurationException In case of an error + */ + public static Document parseDocumentSimple(final InputStream inputStream) + throws SAXException, IOException, ParserConfigurationException { + + DOMParser parser; + + parser = new DOMParser(); + // set parser features and properties + parser.setFeature(NAMESPACES_FEATURE, true); + parser.setFeature(VALIDATION_FEATURE, false); + parser.setFeature(SCHEMA_VALIDATION_FEATURE, false); + parser.setFeature(NORMALIZED_VALUE_FEATURE, false); + parser.setFeature(INCLUDE_IGNORABLE_WHITESPACE_FEATURE, true); + parser.setFeature(CREATE_ENTITY_REF_NODES_FEATURE, false); + + parser.parse(new InputSource(inputStream)); + + return parser.getDocument(); + } + + + + + + + + + + + + /** + * A convenience method to parse an XML document validating. + * + * @param inputStream The <code>InputStream</code> containing the XML document. + * @return The root element of the parsed XML document. + * @throws SAXException An error occurred parsing the document. + * @throws IOException An error occurred reading the document. + * @throws ParserConfigurationException An error occurred configuring the XML parser. + */ + public static Element parseXmlValidating(final InputStream inputStream) + throws ParserConfigurationException, SAXException, IOException { + return DomUtils + .parseDocument(inputStream, true, XMLNamespaceConstants.ALL_SCHEMA_LOCATIONS, null, null) + .getDocumentElement(); + } + + /** + * A convenience method to parse an XML document validating. + * + * @param inputStream The <code>InputStream</code> containing the XML document. + * @param parserFeatures Set additional features to XML parser + * @return The root element of the parsed XML document. + * @throws SAXException An error occurred parsing the document. + * @throws IOException An error occurred reading the document. + * @throws ParserConfigurationException An error occurred configuring the XML parser. + */ + public static Element parseXmlValidating(final InputStream inputStream, + final Map<String, Object> parserFeatures) + throws ParserConfigurationException, SAXException, IOException { + return DomUtils.parseDocument(inputStream, true, XMLNamespaceConstants.ALL_SCHEMA_LOCATIONS, + null, parserFeatures).getDocumentElement(); + } + + /** + * A convenience method to parse an XML document non validating. This method disallow DocType + * declarations + * + * @param inputStream The <code>InputStream</code> containing the XML document. + * @return The root element of the parsed XML document. + * @throws SAXException An error occurred parsing the document. + * @throws IOException An error occurred reading the document. + * @throws ParserConfigurationException An error occurred configuring the XML parser. + */ + public static Element parseXmlNonValidating(final InputStream inputStream) + throws ParserConfigurationException, SAXException, IOException { + return DomUtils.parseDocument(inputStream, false, XMLNamespaceConstants.ALL_SCHEMA_LOCATIONS, + null, Collections.unmodifiableMap(new HashMap<String, Object>() { + private static final long serialVersionUID = 1L; + { + put(DomUtils.DISALLOW_DOCTYPE_FEATURE, true); + + } + })).getDocumentElement(); + } + + /** + * Schema validate a given DOM element. + * + * @param element The element to validate. + * @param externalSchemaLocations A <code>String</code> containing namespace URI to schema + * location pairs, the same way it is accepted by the <code>xsi: + * schemaLocation</code> attribute. + * @param externalNoNamespaceSchemaLocation The schema location of the schema for elements without + * a namespace, the same way it is accepted by the + * <code>xsi:noNamespaceSchemaLocation</code> attribute. + * @return <code>true</code>, if the <code>element</code> validates against the schemas declared + * in it. + * @throws SAXException An error occurred parsing the document. + * @throws IOException An error occurred reading the document from its serialized representation. + * @throws ParserConfigurationException An error occurred configuring the XML + * @throws TransformerException An error occurred serializing the element. + */ + public static boolean validateElement(final Element element, final String externalSchemaLocations, + final String externalNoNamespaceSchemaLocation) + throws ParserConfigurationException, IOException, SAXException, TransformerException { + + byte[] docBytes; + SAXParser parser; + + // create the SAX parser + if (symbolTable != null) { + parser = new SAXParser(symbolTable, grammarPool); + } else { + parser = new SAXParser(); + } + + // serialize the document + docBytes = serializeNode(element, "UTF-8"); + + // set up parser features and attributes + parser.setFeature(NAMESPACES_FEATURE, true); + parser.setFeature(VALIDATION_FEATURE, true); + parser.setFeature(SCHEMA_VALIDATION_FEATURE, true); + parser.setFeature(EXTERNAL_GENERAL_ENTITIES_FEATURE, false); + parser.setFeature(DISALLOW_DOCTYPE_FEATURE, true); + + + if (externalSchemaLocations != null) { + parser.setProperty(EXTERNAL_SCHEMA_LOCATION_PROPERTY, externalSchemaLocations); + } + if (externalNoNamespaceSchemaLocation != null) { + parser.setProperty(EXTERNAL_NO_NAMESPACE_SCHEMA_LOCATION_PROPERTY, + "externalNoNamespaceSchemaLocation"); + } + + // set up entity resolver and error handler + parser.setEntityResolver(new EaafDomEntityResolver()); + + // parse validating + parser.parse(new InputSource(new ByteArrayInputStream(docBytes))); + return true; + } + + + /** + * Schema validate a given DOM element. + * + * @param element The element to validate. + * @param externalSchemaLocations A <code>String</code> containing namespace URI to schema + * location pairs, the same way it is accepted by the <code>xsi: + * schemaLocation</code> attribute. + * @param externalNoNamespaceSchemaLocation The schema location of the schema for elements without + * a namespace, the same way it is accepted by the + * <code>xsi:noNamespaceSchemaLocation</code> attribute. + * @return <code>true</code>, if the <code>element</code> validates against the schemas declared + * in it. + * @throws SAXException An error occurred parsing the document. + * @throws IOException An error occurred reading the document from its serialized representation. + * @throws ParserConfigurationException An error occurred configuring the XML + * @throws TransformerException An error occurred serializing the element. + */ + public static boolean validateElement(final Element element, final String externalSchemaLocations, + final String externalNoNamespaceSchemaLocation, final EntityResolver entityResolver) + throws ParserConfigurationException, IOException, SAXException, TransformerException { + + byte[] docBytes; + SAXParser parser; + + // create the SAX parser + if (symbolTable != null) { + parser = new SAXParser(symbolTable, grammarPool); + } else { + parser = new SAXParser(); + } + + // serialize the document + docBytes = serializeNode(element, "UTF-8"); + + // set up parser features and attributes + parser.setFeature(NAMESPACES_FEATURE, true); + parser.setFeature(VALIDATION_FEATURE, true); + parser.setFeature(SCHEMA_VALIDATION_FEATURE, true); + + if (externalSchemaLocations != null) { + parser.setProperty(EXTERNAL_SCHEMA_LOCATION_PROPERTY, externalSchemaLocations); + } + if (externalNoNamespaceSchemaLocation != null) { + parser.setProperty(EXTERNAL_NO_NAMESPACE_SCHEMA_LOCATION_PROPERTY, + "externalNoNamespaceSchemaLocation"); + } + + // set up entity resolver and error handler + parser.setEntityResolver(entityResolver); + + // parse validating + parser.parse(new InputSource(new ByteArrayInputStream(docBytes))); + return true; + } + + /** + * Serialize the given DOM node. + * + *<p> + * The node will be serialized using the UTF-8 encoding. + *</p> + * + * @param node The node to serialize. + * @return String The <code>String</code> representation of the given DOM node. + * @throws TransformerException An error occurred transforming the node to a <code>String</code>. + * @throws IOException An IO error occurred writing the node to a byte array. + */ + public static String serializeNode(final Node node) throws TransformerException, IOException { + return new String(serializeNode(node, "UTF-8", false), "UTF-8"); + } + + + /** + * Serialize the given DOM node. + * + *<p> + * The node will be serialized using the UTF-8 encoding. + *</p> + * + * @param node The node to serialize. + * @param omitXmlDeclaration The boolean value for omitting the XML Declaration. + * @return String The <code>String</code> representation of the given DOM node. + * @throws TransformerException An error occurred transforming the node to a <code>String</code>. + * @throws IOException An IO error occurred writing the node to a byte array. + */ + public static String serializeNode(final Node node, final boolean omitXmlDeclaration) + throws TransformerException, IOException { + return new String(serializeNode(node, "UTF-8", omitXmlDeclaration), "UTF-8"); + } + + /** + * Serialize the given DOM node. + * + *<p> + * The node will be serialized using the UTF-8 encoding. + *</p> + * + * @param node The node to serialize. + * @param omitXmlDeclaration The boolean value for omitting the XML Declaration. + * @param lineSeperator Sets the line seperator String of the parser + * @return String The <code>String</code> representation of the given DOM node. + * @throws TransformerException An error occurred transforming the node to a <code>String</code>. + * @throws IOException An IO error occurred writing the node to a byte array. + */ + public static String serializeNode(final Node node, final boolean omitXmlDeclaration, + final String lineSeperator) throws TransformerException, IOException { + return new String(serializeNode(node, "UTF-8", omitXmlDeclaration, lineSeperator), "UTF-8"); + } + + /** + * Serialize the given DOM node to a byte array. + * + * @param node The node to serialize. + * @param xmlEncoding The XML encoding to use. + * @return The serialized node, as a byte array. Using a compatible encoding this can easily be + * converted into a <code>String</code>. + * @throws TransformerException An error occurred transforming the node to a byte array. + * @throws IOException An IO error occurred writing the node to a byte array. + */ + public static byte[] serializeNode(final Node node, final String xmlEncoding) + throws TransformerException, IOException { + return serializeNode(node, xmlEncoding, false); + } + + /** + * Serialize the given DOM node to a byte array. + * + * @param node The node to serialize. + * @param xmlEncoding The XML encoding to use. + * @param omitDeclaration The boolean value for omitting the XML Declaration. + * @return The serialized node, as a byte array. Using a compatible encoding this can easily be + * converted into a <code>String</code>. + * @throws TransformerException An error occurred transforming the node to a byte array. + * @throws IOException An IO error occurred writing the node to a byte array. + */ + public static byte[] serializeNode(final Node node, final String xmlEncoding, + final boolean omitDeclaration) throws TransformerException, IOException { + return serializeNode(node, xmlEncoding, omitDeclaration, null); + } + + + /** + * Serialize the given DOM node to a byte array. + * + * @param node The node to serialize. + * @param xmlEncoding The XML encoding to use. + * @param omitDeclaration The boolean value for omitting the XML Declaration. + * @param lineSeperator Sets the line seperator String of the parser + * @return The serialized node, as a byte array. Using a compatible encoding this can easily be + * converted into a <code>String</code>. + * @throws TransformerException An error occurred transforming the node to a byte array. + * @throws IOException An IO error occurred writing the node to a byte array. + */ + public static byte[] serializeNode(final Node node, final String xmlEncoding, + final boolean omitDeclaration, final String lineSeperator) + throws TransformerException, IOException { + + final TransformerFactory transformerFactory = TransformerFactory.newInstance(); + final Transformer transformer = transformerFactory.newTransformer(); + final ByteArrayOutputStream bos = new ByteArrayOutputStream(16384); + + transformer.setOutputProperty(OutputKeys.METHOD, "xml"); + transformer.setOutputProperty(OutputKeys.ENCODING, xmlEncoding); + final String omit = omitDeclaration ? "yes" : "no"; + transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, omit); + if (null != lineSeperator) { + transformer.setOutputProperty("{http://xml.apache.org/xalan}line-separator", lineSeperator); + // does not work for xalan <= 2.5.1 + } + transformer.transform(new DOMSource(node), new StreamResult(bos)); + + bos.flush(); + bos.close(); + + return bos.toByteArray(); + } + + /** + * Return the text that a node contains. + * + *<p> + * This routine: + * </p> + * <ul> + * <li>Ignores comments and processing instructions.</li> + * <li>Concatenates TEXT nodes, CDATA nodes, and the results recursively processing EntityRef + * nodes.</li> + * <li>Ignores any element nodes in the sublist. (Other possible options are to recurse into + * element sublists or throw an exception.)</li> + * </ul> + * + * @param node A DOM node from which to extract text. + * @return A String representing its contents. + */ + public static String getText(final Node node) { + if (!node.hasChildNodes()) { + return ""; + } + + final StringBuffer result = new StringBuffer(); + final NodeList list = node.getChildNodes(); + + for (int i = 0; i < list.getLength(); i++) { + final Node subnode = list.item(i); + if (subnode.getNodeType() == Node.TEXT_NODE) { + result.append(subnode.getNodeValue()); + } else if (subnode.getNodeType() == Node.CDATA_SECTION_NODE) { + result.append(subnode.getNodeValue()); + } else if (subnode.getNodeType() == Node.ENTITY_REFERENCE_NODE) { + // Recurse into the subtree for text + // (and ignore comments) + result.append(getText(subnode)); + } + } + return result.toString(); + } + + /** + * Build the namespace prefix to namespace URL mapping in effect for a given node. + * + * @param node The context node for which build the map. + * @return The namespace prefix to namespace URL mapping ( a <code>String</code> value to + * <code>String</code> value mapping). + */ + public static Map getNamespaceDeclarations(Node node) { + final Map nsDecls = new HashMap(); + int i; + + do { + if (node.hasAttributes()) { + final NamedNodeMap attrs = node.getAttributes(); + + for (i = 0; i < attrs.getLength(); i++) { + final Attr attr = (Attr) attrs.item(i); + + // add prefix mapping if none exists + if ("xmlns".equals(attr.getPrefix()) || "xmlns".equals(attr.getName())) { + + final String nsPrefix = attr.getPrefix() != null ? attr.getLocalName() : ""; + + if (nsDecls.get(nsPrefix) == null) { + nsDecls.put(nsPrefix, attr.getValue()); + } + } + } + } + } while ((node = node.getParentNode()) != null); + + return nsDecls; + } + + /** + * Add all namespace declarations declared in the parent(s) of a given element and used in the + * subtree of the given element to the given element. + * + * @param context The element to which to add the namespaces. + */ + public static void localizeNamespaceDeclarations(final Element context) { + final Node parent = context.getParentNode(); + + if (parent != null) { + final Map namespaces = getNamespaceDeclarations(context.getParentNode()); + final Set nsUris = collectNamespaceUris(context); + Iterator iter; + + for (iter = namespaces.entrySet().iterator(); iter.hasNext();) { + final Map.Entry e = (Map.Entry) iter.next(); + + if (nsUris.contains(e.getValue())) { + final String prefix = (String) e.getKey(); + final String nsUri = (String) e.getValue(); + final String nsAttrName = "".equals(prefix) ? "xmlns" : "xmlns:" + prefix; + + context.setAttributeNS(XMLNamespaceConstants.XMLNS_NS_URI, nsAttrName, nsUri); + } + } + } + } + + /** + * Collect all the namespace URIs used in the subtree of a given element. + * + * @param context The element that should be searched for namespace URIs. + * @return All namespace URIs used in the subtree of <code>context</code>, including the ones used + * in <code>context</code> itself. + */ + public static Set collectNamespaceUris(final Element context) { + final Set result = new HashSet(); + + collectNamespaceUrisImpl(context, result); + return result; + } + + /** + * A recursive method to do the work of <code>collectNamespaceURIs</code>. + * + * @param context The context element to evaluate. + * @param result The result, passed as a parameter to avoid unnecessary instantiations of + * <code>Set</code>. + */ + private static void collectNamespaceUrisImpl(final Element context, final Set result) { + final NamedNodeMap attrs = context.getAttributes(); + final NodeList childNodes = context.getChildNodes(); + String nsUri; + int i; + + // add the namespace of the context element + nsUri = context.getNamespaceURI(); + if (nsUri != null && nsUri != XMLNamespaceConstants.XMLNS_NS_URI) { + result.add(nsUri); + } + + // add all namespace URIs from attributes + for (i = 0; i < attrs.getLength(); i++) { + nsUri = attrs.item(i).getNamespaceURI(); + if (nsUri != null && nsUri != XMLNamespaceConstants.XMLNS_NS_URI) { + result.add(nsUri); + } + } + + // add all namespaces from subelements + for (i = 0; i < childNodes.getLength(); i++) { + final Node node = childNodes.item(i); + + if (node.getNodeType() == Node.ELEMENT_NODE) { + collectNamespaceUrisImpl((Element) node, result); + } + } + } + + /** + * Check, that each attribute node in the given <code>NodeList</code> has its parent in the + * <code>NodeList</code> as well. + * + * @param nodes The <code>NodeList</code> to check. + * @return <code>true</code>, if each attribute node in <code>nodes</code> has its parent in + * <code>nodes</code> as well. + */ + public static boolean checkAttributeParentsInNodeList(final NodeList nodes) { + final Set nodeSet = new HashSet(); + int i; + + // put the nodes into the nodeSet + for (i = 0; i < nodes.getLength(); i++) { + nodeSet.add(nodes.item(i)); + } + + // check that each attribute node's parent is in the node list + for (i = 0; i < nodes.getLength(); i++) { + final Node n = nodes.item(i); + + if (n.getNodeType() == Node.ATTRIBUTE_NODE) { + final Attr attr = (Attr) n; + final Element owner = attr.getOwnerElement(); + + if (owner == null) { + if (!isNamespaceDeclaration(attr)) { + return false; + } + } + + if (!nodeSet.contains(owner) && !isNamespaceDeclaration(attr)) { + return false; + } + } + } + + return true; + } + + /** + * Convert an unstructured <code>NodeList</code> into a <code>DocumentFragment</code>. + * + * @param nodeList Contains the node list to be converted into a DOM DocumentFragment. + * @return the resulting DocumentFragment. The DocumentFragment will be backed by a new DOM + * Document, i.e. all noded of the node list will be cloned. + * @throws ParserConfigurationException An error occurred creating the DocumentFragment. + * @precondition The nodes in the node list appear in document order + * @precondition for each Attr node in the node list, the owning Element is in the node list as + * well. + * @precondition each Element or Attr node in the node list is namespace aware. + */ + public static DocumentFragment nodeList2DocumentFragment(final NodeList nodeList) + throws ParserConfigurationException { + + final DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); + final Document doc = builder.newDocument(); + final DocumentFragment result = doc.createDocumentFragment(); + + if (null == nodeList || nodeList.getLength() == 0) { + return result; + } + + int currPos = 0; + currPos = nodeList2DocumentFragment(nodeList, currPos, result, null, null) + 1; + + while (currPos < nodeList.getLength()) { + currPos = nodeList2DocumentFragment(nodeList, currPos, result, null, null) + 1; + } + return result; + } + + /** + * Helper method for the <code>nodeList2DocumentFragment</code>. + * + * @param nodeList The <code>NodeList</code> to convert. + * @param currPos The current position in the <code>nodeList</code>. + * @param result The resulting <code>DocumentFragment</code>. + * @param currOrgElem The current original element. + * @param currClonedElem The current cloned element. + * @return The current position. + */ + private static int nodeList2DocumentFragment(final NodeList nodeList, int currPos, + final DocumentFragment result, Element currOrgElem, Element currClonedElem) { + + while (currPos < nodeList.getLength()) { + final Node currentNode = nodeList.item(currPos); + switch (currentNode.getNodeType()) { + case Node.COMMENT_NODE: + case Node.PROCESSING_INSTRUCTION_NODE: + case Node.TEXT_NODE: { + // Append current node either to resulting DocumentFragment or to + // current cloned Element + if (null == currClonedElem) { + result.appendChild(result.getOwnerDocument().importNode(currentNode, false)); + } else { + // Stop processing if current Node is not a descendant of + // current Element + if (!isAncestor(currOrgElem, currentNode)) { + return --currPos; + } + + currClonedElem.appendChild(result.getOwnerDocument().importNode(currentNode, false)); + } + break; + } + + case Node.ELEMENT_NODE: { + final Element nextCurrOrgElem = (Element) currentNode; + final Element nextCurrClonedElem = result.getOwnerDocument() + .createElementNS(nextCurrOrgElem.getNamespaceURI(), nextCurrOrgElem.getNodeName()); + + // Append current Node either to resulting DocumentFragment or to + // current cloned Element + if (null == currClonedElem) { + result.appendChild(nextCurrClonedElem); + currOrgElem = nextCurrOrgElem; + currClonedElem = nextCurrClonedElem; + } else { + // Stop processing if current Node is not a descendant of + // current Element + if (!isAncestor(currOrgElem, currentNode)) { + return --currPos; + } + + currClonedElem.appendChild(nextCurrClonedElem); + } + + // Process current Node (of type Element) recursively + currPos = nodeList2DocumentFragment(nodeList, ++currPos, result, nextCurrOrgElem, + nextCurrClonedElem); + + break; + } + + case Node.ATTRIBUTE_NODE: { + final Attr currAttr = (Attr) currentNode; + + // GK 20030411: Hack to overcome problems with IAIK IXSIL + if (currAttr.getOwnerElement() == null) { + break; + } + if (currClonedElem == null) { + break; + } + + // currClonedElem must be the owner Element of currAttr if + // preconditions are met + currClonedElem.setAttributeNS(currAttr.getNamespaceURI(), currAttr.getNodeName(), + currAttr.getValue()); + break; + } + + default: { + // All other nodes will be ignored + } + } + + currPos++; + } + + return currPos; + } + + /** + * Check, if the given attribute is a namespace declaration. + * + * @param attr The attribute to check. + * @return <code>true</code>, if the attribute is a namespace declaration, <code>false</code> + * otherwise. + */ + private static boolean isNamespaceDeclaration(final Attr attr) { + return XMLNamespaceConstants.XMLNS_NS_URI.equals(attr.getNamespaceURI()); + } + + /** + * Check, if a given DOM element is an ancestor of a given node. + * + * @param candAnc The DOM element to check for being the ancestor. + * @param cand The node to check for being the child. + * @return <code>true</code>, if <code>candAnc</code> is an (indirect) ancestor of + * <code>cand</code>; <code>false</code> otherwise. + */ + public static boolean isAncestor(final Element candAnc, final Node cand) { + Node currPar = cand.getParentNode(); + + while (currPar != null) { + if (candAnc == currPar) { + return true; + } + currPar = currPar.getParentNode(); + } + return false; + } + + /** + * Selects the (first) element from a node list and returns it. + * + * @param nl The NodeList to get the element from. + * @return The (first) element included in the node list or <code>null</code> if the node list is + * <code>null</code> or empty or no element is included in the list. + */ + public static Element getElementFromNodeList(final NodeList nl) { + if ((nl == null) || (nl.getLength() == 0)) { + return null; + } + for (int i = 0; i < nl.getLength(); i++) { + final Node node = nl.item(i); + if (node.getNodeType() == Node.ELEMENT_NODE) { + return (Element) node; + } + } + return null; + } + + /** + * Returns all child elements of the given element. + * + * @param parent The element to get the child elements from. + * + * @return A list including all child elements of the given element. Maybe empty if the parent + * element has no child elements. + */ + public static List getChildElements(final Element parent) { + final Vector v = new Vector(); + final NodeList nl = parent.getChildNodes(); + final int length = nl.getLength(); + for (int i = 0; i < length; i++) { + final Node node = nl.item(i); + if (node.getNodeType() == Node.ELEMENT_NODE) { + v.add(node); + } + } + return v; + } + + /** + * Returns a byte array from given node. + * + * @param node Element node + * @return transformed node + * @throws TransformerException in case of an error + */ + public static byte[] nodeToByteArray(final Node node) throws TransformerException { + final Source source = new DOMSource(node); + final ByteArrayOutputStream out = new ByteArrayOutputStream(); + // StringWriter stringWriter = new StringWriter(); + final Result result = new StreamResult(out); + final TransformerFactory factory = TransformerFactory.newInstance(); + final Transformer transformer = factory.newTransformer(); + transformer.transform(source, result); + return out.toByteArray(); + } + + +} |