package at.gv.egovernment.moa.util;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.apache.xerces.parsers.DOMParser;
import org.apache.xerces.parsers.SAXParser;
import org.apache.xerces.parsers.XMLGrammarPreparser;
import org.apache.xerces.util.SymbolTable;
import org.apache.xerces.util.XMLGrammarPoolImpl;
import org.apache.xerces.xni.grammars.XMLGrammarDescription;
import org.apache.xerces.xni.grammars.XMLGrammarPool;
import org.apache.xerces.xni.parser.XMLInputSource;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
* Various utility functions for handling XML DOM trees.
*
* The parsing methods in this class make use of some features internal to the
* Xerces DOM parser, mainly for performance reasons. As soon as JAXP
* (currently at version 1.2) is better at schema handling, it should be used as
* the parser interface.
*
* @author Patrick Peck
* @version $Id$
*/
public class DOMUtils {
/** Feature URI for namespace aware parsing. */
private static final String NAMESPACES_FEATURE =
"http://xml.org/sax/features/namespaces";
/** Feature URI for validating parsing. */
private static final String VALIDATION_FEATURE =
"http://xml.org/sax/features/validation";
/** Feature URI for schema validating parsing. */
private static final String SCHEMA_VALIDATION_FEATURE =
"http://apache.org/xml/features/validation/schema";
/** Feature URI for normalization of element/attribute values. */
private static final String NORMALIZED_VALUE_FEATURE =
"http://apache.org/xml/features/validation/schema/normalized-value";
/** Feature URI for parsing ignorable whitespace. */
private static final String INCLUDE_IGNORABLE_WHITESPACE_FEATURE =
"http://apache.org/xml/features/dom/include-ignorable-whitespace";
/** Feature URI for creating EntityReference nodes in the DOM tree. */
private static final String CREATE_ENTITY_REF_NODES_FEATURE =
"http://apache.org/xml/features/dom/create-entity-ref-nodes";
/** Property URI for providing external schema locations. */
private static final String EXTERNAL_SCHEMA_LOCATION_PROPERTY =
"http://apache.org/xml/properties/schema/external-schemaLocation";
/** Property URI for providing the external schema location for elements
* without a namespace. */
private static final String EXTERNAL_NO_NAMESPACE_SCHEMA_LOCATION_PROPERTY =
"http://apache.org/xml/properties/schema/external-noNamespaceSchemaLocation";
/** Property URI for the Xerces grammar pool. */
private static final String GRAMMAR_POOL =
org.apache.xerces.impl.Constants.XERCES_PROPERTY_PREFIX
+ org.apache.xerces.impl.Constants.XMLGRAMMAR_POOL_PROPERTY;
/** A prime number for initializing the symbol table. */
private static final int BIG_PRIME = 2039;
/** Symbol table for the grammar pool. */
private static SymbolTable symbolTable = new SymbolTable(BIG_PRIME);
/** Xerces schema grammar pool. */
private static XMLGrammarPool grammarPool = new XMLGrammarPoolImpl();
/**
* Preparse a schema and add it to the schema pool.
*
* @param inputStream An InputStream
providing the contents of
* the schema.
* @param systemId The systemId to use for the schema.
* @throws IOException An error occurred reading the schema.
*/
public static void addSchemaToPool(InputStream inputStream, String systemId)
throws IOException {
XMLGrammarPreparser preparser;
// unlock the pool so that we can add another grammar
grammarPool.unlockPool();
// prepare the preparser
preparser = new XMLGrammarPreparser(symbolTable);
preparser.registerPreparser(XMLGrammarDescription.XML_SCHEMA, null);
preparser.setProperty(GRAMMAR_POOL, grammarPool);
preparser.setFeature(NAMESPACES_FEATURE, true);
preparser.setFeature(VALIDATION_FEATURE, true);
// add the grammar to the pool
preparser.preparseGrammar(
XMLGrammarDescription.XML_SCHEMA,
new XMLInputSource(null, systemId, null, inputStream, null));
// lock the pool again so that schemas are not added automatically
grammarPool.lockPool();
}
/**
* Parse an XML document from an InputStream
.
*
* @param inputStream The InputStream
containing the XML
* document.
* @param validating If true
, parse validating.
* @param externalSchemaLocations A String
containing namespace
* URI to schema location pairs, the same way it is accepted by the xsi:
* schemaLocation
attribute.
* @param externalNoNamespaceSchemaLocation The schema location of the
* schema for elements without a namespace, the same way it is accepted by the
* xsi:noNamespaceSchemaLocation
attribute.
* @param entityResolver An EntityResolver
to resolve external
* entities (schemas and DTDs). If null
, it will not be set.
* @param errorHandler An ErrorHandler
to decide what to do
* with parsing errors. If null
, it will not be set.
* @return The parsed XML document as a DOM tree.
* @throws SAXException An error occurred parsing the document.
* @throws IOException An error occurred reading the document.
* @throws ParserConfigurationException An error occurred configuring the XML
* parser.
*/
public static Document parseDocument(
InputStream inputStream,
boolean validating,
String externalSchemaLocations,
String externalNoNamespaceSchemaLocation,
EntityResolver entityResolver,
ErrorHandler errorHandler)
throws SAXException, IOException, ParserConfigurationException {
DOMParser parser;
// create the DOM parser
if (symbolTable != null) {
parser = new DOMParser(symbolTable, grammarPool);
} else {
parser = new DOMParser();
}
// set parser features and properties
parser.setFeature(NAMESPACES_FEATURE, true);
parser.setFeature(VALIDATION_FEATURE, validating);
parser.setFeature(SCHEMA_VALIDATION_FEATURE, validating);
parser.setFeature(NORMALIZED_VALUE_FEATURE, false);
parser.setFeature(INCLUDE_IGNORABLE_WHITESPACE_FEATURE, true);
parser.setFeature(CREATE_ENTITY_REF_NODES_FEATURE, false);
if (validating) {
if (externalSchemaLocations != null) {
parser.setProperty(
EXTERNAL_SCHEMA_LOCATION_PROPERTY,
externalSchemaLocations);
}
if (externalNoNamespaceSchemaLocation != null) {
parser.setProperty(
EXTERNAL_NO_NAMESPACE_SCHEMA_LOCATION_PROPERTY,
externalNoNamespaceSchemaLocation);
}
}
// set entity resolver and error handler
if (entityResolver != null) {
parser.setEntityResolver(entityResolver);
}
if (errorHandler != null) {
parser.setErrorHandler(errorHandler);
}
// parse the document and return it
parser.parse(new InputSource(inputStream));
return parser.getDocument();
}
/**
* Parse an XML document from an InputStream
.
*
* It uses a MOAEntityResolver
as the EntityResolver
* and a MOAErrorHandler
as the ErrorHandler
.
*
* @param inputStream The InputStream
containing the XML
* document.
* @param validating If true
, parse validating.
* @param externalSchemaLocations A String
containing namespace
* URI to schema location pairs, the same way it is accepted by the xsi:
* schemaLocation
attribute.
* @param externalNoNamespaceSchemaLocation The schema location of the
* schema for elements without a namespace, the same way it is accepted by the
* xsi:noNamespaceSchemaLocation
attribute.
* @return The parsed XML document as a DOM tree.
* @throws SAXException An error occurred parsing the document.
* @throws IOException An error occurred reading the document.
* @throws ParserConfigurationException An error occurred configuring the XML
* parser.
*/
public static Document parseDocument(
InputStream inputStream,
boolean validating,
String externalSchemaLocations,
String externalNoNamespaceSchemaLocation)
throws SAXException, IOException, ParserConfigurationException {
return parseDocument(
inputStream,
validating,
externalSchemaLocations,
externalNoNamespaceSchemaLocation,
new MOAEntityResolver(),
new MOAErrorHandler());
}
/**
* Parse an XML document from a String
.
*
* It uses a MOAEntityResolver
as the EntityResolver
* and a MOAErrorHandler
as the ErrorHandler
.
*
* @param xmlString The String
containing the XML document.
* @param encoding The encoding of the XML document.
* @param validating If true
, parse validating.
* @param externalSchemaLocations A String
containing namespace
* URI to schema location pairs, the same way it is accepted by the xsi:
* schemaLocation
attribute.
* @param externalNoNamespaceSchemaLocation The schema location of the
* schema for elements without a namespace, the same way it is accepted by the
* xsi:noNamespaceSchemaLocation
attribute.
* @return The parsed XML document as a DOM tree.
* @throws SAXException An error occurred parsing the document.
* @throws IOException An error occurred reading the document.
* @throws ParserConfigurationException An error occurred configuring the XML
* parser.
*/
public static Document parseDocument(
String xmlString,
String encoding,
boolean validating,
String externalSchemaLocations,
String externalNoNamespaceSchemaLocation)
throws SAXException, IOException, ParserConfigurationException {
InputStream in = new ByteArrayInputStream(xmlString.getBytes(encoding));
return parseDocument(
in,
validating,
externalSchemaLocations,
externalNoNamespaceSchemaLocation);
}
/**
* Parse an UTF-8 encoded XML document from a String
.
*
* @param xmlString The String
containing the XML document.
* @param validating If true
, parse validating.
* @param externalSchemaLocations A String
containing namespace
* URI to schema location pairs, the same way it is accepted by the xsi:
* schemaLocation
attribute.
* @param externalNoNamespaceSchemaLocation The schema location of the
* schema for elements without a namespace, the same way it is accepted by the
* xsi:noNamespaceSchemaLocation
attribute.
* @return The parsed XML document as a DOM tree.
* @throws SAXException An error occurred parsing the document.
* @throws IOException An error occurred reading the document.
* @throws ParserConfigurationException An error occurred configuring the XML
* parser.
*/
public static Document parseDocument(
String xmlString,
boolean validating,
String externalSchemaLocations,
String externalNoNamespaceSchemaLocation)
throws SAXException, IOException, ParserConfigurationException {
return parseDocument(
xmlString,
"UTF-8",
validating,
externalSchemaLocations,
externalNoNamespaceSchemaLocation);
}
/**
* A convenience method to parse an XML document validating.
*
* @param inputStream The InputStream
containing the XML
* document.
* @return The root element of the parsed XML document.
* @throws SAXException An error occurred parsing the document.
* @throws IOException An error occurred reading the document.
* @throws ParserConfigurationException An error occurred configuring the XML
* parser.
*/
public static Element parseXmlValidating(InputStream inputStream)
throws ParserConfigurationException, SAXException, IOException {
return DOMUtils
.parseDocument(inputStream, true, Constants.ALL_SCHEMA_LOCATIONS, null)
.getDocumentElement();
}
/**
* Schema validate a given DOM element.
*
* @param element The element to validate.
* @param externalSchemaLocations A String
containing namespace
* URI to schema location pairs, the same way it is accepted by the xsi:
* schemaLocation
attribute.
* @param externalNoNamespaceSchemaLocation The schema location of the
* schema for elements without a namespace, the same way it is accepted by the
* xsi:noNamespaceSchemaLocation
attribute.
* @return true
, if the element
validates against
* the schemas declared in it.
* @throws SAXException An error occurred parsing the document.
* @throws IOException An error occurred reading the document from its
* serialized representation.
* @throws ParserConfigurationException An error occurred configuring the XML
* @throws TransformerException An error occurred serializing the element.
*/
public static boolean validateElement(
Element element,
String externalSchemaLocations,
String externalNoNamespaceSchemaLocation)
throws
ParserConfigurationException,
IOException,
SAXException,
TransformerException {
byte[] docBytes;
SAXParser parser;
// create the SAX parser
if (symbolTable != null) {
parser = new SAXParser(symbolTable, grammarPool);
} else {
parser = new SAXParser();
}
// serialize the document
docBytes = serializeNode(element, "UTF-8");
// set up parser features and attributes
parser.setFeature(NAMESPACES_FEATURE, true);
parser.setFeature(VALIDATION_FEATURE, true);
parser.setFeature(SCHEMA_VALIDATION_FEATURE, true);
if (externalSchemaLocations != null) {
parser.setProperty(
EXTERNAL_SCHEMA_LOCATION_PROPERTY,
externalSchemaLocations);
}
if (externalNoNamespaceSchemaLocation != null) {
parser.setProperty(
EXTERNAL_NO_NAMESPACE_SCHEMA_LOCATION_PROPERTY,
"externalNoNamespaceSchemaLocation");
}
// set up entity resolver and error handler
parser.setEntityResolver(new MOAEntityResolver());
parser.setErrorHandler(new MOAErrorHandler());
// parse validating
parser.parse(new InputSource(new ByteArrayInputStream(docBytes)));
return true;
}
/**
* Serialize the given DOM node.
*
* The node will be serialized using the UTF-8 encoding.
*
* @param node The node to serialize.
* @return String The String
representation of the given DOM
* node.
* @throws TransformerException An error occurred transforming the
* node to a String
.
* @throws IOException An IO error occurred writing the node to a byte array.
*/
public static String serializeNode(Node node)
throws TransformerException, IOException {
return new String(serializeNode(node, "UTF-8"), "UTF-8");
}
/**
* Serialize the given DOM node to a byte array.
*
* @param node The node to serialize.
* @param xmlEncoding The XML encoding to use.
* @return The serialized node, as a byte array. Using a compatible encoding
* this can easily be converted into a String
.
* @throws TransformerException An error occurred transforming the node to a
* byte array.
* @throws IOException An IO error occurred writing the node to a byte array.
*/
public static byte[] serializeNode(Node node, String xmlEncoding)
throws TransformerException, IOException {
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
ByteArrayOutputStream bos = new ByteArrayOutputStream(16384);
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.ENCODING, xmlEncoding);
transformer.transform(new DOMSource(node), new StreamResult(bos));
bos.flush();
bos.close();
return bos.toByteArray();
}
/**
* Return the text that a node contains.
*
* This routine:
*
String
value to String
value mapping).
*/
public static Map getNamespaceDeclarations(Node node) {
Map nsDecls = new HashMap();
int i;
do {
if (node.hasAttributes()) {
NamedNodeMap attrs = node.getAttributes();
for (i = 0; i < attrs.getLength(); i++) {
Attr attr = (Attr) attrs.item(i);
// add prefix mapping if none exists
if ("xmlns".equals(attr.getPrefix())
|| "xmlns".equals(attr.getName())) {
String nsPrefix =
attr.getPrefix() != null ? attr.getLocalName() : "";
if (nsDecls.get(nsPrefix) == null) {
nsDecls.put(nsPrefix, attr.getValue());
}
}
}
}
} while ((node = node.getParentNode()) != null);
return nsDecls;
}
/**
* Add all namespace declarations declared in the parent(s) of a given
* element and used in the subtree of the given element to the given element.
*
* @param context The element to which to add the namespaces.
*/
public static void localizeNamespaceDeclarations(Element context) {
Node parent = context.getParentNode();
if (parent != null) {
Map namespaces = getNamespaceDeclarations(context.getParentNode());
Set nsUris = collectNamespaceURIs(context);
Iterator iter;
for (iter = namespaces.entrySet().iterator(); iter.hasNext();) {
Map.Entry e = (Map.Entry) iter.next();
if (nsUris.contains(e.getValue())) {
String prefix = (String) e.getKey();
String nsUri = (String) e.getValue();
String nsAttrName = "".equals(prefix) ? "xmlns" : "xmlns:" + prefix;
context.setAttributeNS(Constants.XMLNS_NS_URI, nsAttrName, nsUri);
}
}
}
}
/**
* Collect all the namespace URIs used in the subtree of a given element.
*
* @param context The element that should be searched for namespace URIs.
* @return All namespace URIs used in the subtree of context
,
* including the ones used in context
itself.
*/
public static Set collectNamespaceURIs(Element context) {
Set result = new HashSet();
collectNamespaceURIsImpl(context, result);
return result;
}
/**
* A recursive method to do the work of collectNamespaceURIs
.
*
* @param context The context element to evaluate.
* @param result The result, passed as a parameter to avoid unnecessary
* instantiations of Set
.
*/
private static void collectNamespaceURIsImpl(Element context, Set result) {
NamedNodeMap attrs = context.getAttributes();
NodeList childNodes = context.getChildNodes();
String nsUri;
int i;
// add the namespace of the context element
nsUri = context.getNamespaceURI();
if (nsUri != null && nsUri != Constants.XMLNS_NS_URI) {
result.add(nsUri);
}
// add all namespace URIs from attributes
for (i = 0; i < attrs.getLength(); i++) {
nsUri = attrs.item(i).getNamespaceURI();
if (nsUri != null && nsUri != Constants.XMLNS_NS_URI) {
result.add(nsUri);
}
}
// add all namespaces from subelements
for (i = 0; i < childNodes.getLength(); i++) {
Node node = childNodes.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE) {
collectNamespaceURIsImpl((Element) node, result);
}
}
}
/**
* Check, that each attribute node in the given NodeList
has its
* parent in the NodeList
as well.
*
* @param nodes The NodeList
to check.
* @return true
, if each attribute node in nodes
* has its parent in nodes
as well.
*/
public static boolean checkAttributeParentsInNodeList(NodeList nodes) {
Set nodeSet = new HashSet();
int i;
// put the nodes into the nodeSet
for (i = 0; i < nodes.getLength(); i++) {
nodeSet.add(nodes.item(i));
}
// check that each attribute node's parent is in the node list
for (i = 0; i < nodes.getLength(); i++) {
Node n = nodes.item(i);
if (n.getNodeType() == Node.ATTRIBUTE_NODE) {
Attr attr = (Attr) n;
Element owner = attr.getOwnerElement();
if (owner == null) {
if (!isNamespaceDeclaration(attr)) {
return false;
}
}
if (!nodeSet.contains(owner) && !isNamespaceDeclaration(attr)) {
return false;
}
}
}
return true;
}
/**
* Convert an unstructured NodeList
into a
* DocumentFragment
.
*
* @param nodeList Contains the node list to be converted into a DOM
* DocumentFragment.
* @return the resulting DocumentFragment. The DocumentFragment will be
* backed by a new DOM Document, i.e. all noded of the node list will be
* cloned.
* @throws ParserConfigurationException An error occurred creating the
* DocumentFragment.
* @precondition The nodes in the node list appear in document order.
* @precondition For each Attr node in the node list, the owning Element is
* in the node list as well.
* @precondition Each Element or Attr node in the node list is namespace
* aware.
*/
public static DocumentFragment nodeList2DocumentFragment(NodeList nodeList)
throws ParserConfigurationException {
DocumentBuilder builder =
DocumentBuilderFactory.newInstance().newDocumentBuilder();
Document doc = builder.newDocument();
DocumentFragment result = doc.createDocumentFragment();
if (null == nodeList || nodeList.getLength() == 0) {
return result;
}
int currPos = 0;
currPos =
nodeList2DocumentFragment(nodeList, currPos, result, null, null) + 1;
while (currPos < nodeList.getLength()) {
currPos =
nodeList2DocumentFragment(nodeList, currPos, result, null, null) + 1;
}
return result;
}
/**
* Helper method for the nodeList2DocumentFragment
.
*
* @param nodeList The NodeList
to convert.
* @param currPos The current position in the nodeList
.
* @param result The resulting DocumentFragment
.
* @param currOrgElem The current original element.
* @param currClonedElem The current cloned element.
* @return The current position.
*/
private static int nodeList2DocumentFragment(
NodeList nodeList,
int currPos,
DocumentFragment result,
Element currOrgElem,
Element currClonedElem) {
while (currPos < nodeList.getLength()) {
Node currentNode = nodeList.item(currPos);
switch (currentNode.getNodeType()) {
case Node.COMMENT_NODE :
case Node.PROCESSING_INSTRUCTION_NODE :
case Node.TEXT_NODE :
{
// Append current node either to resulting DocumentFragment or to
// current cloned Element
if (null == currClonedElem) {
result.appendChild(
result.getOwnerDocument().importNode(currentNode, false));
} else {
// Stop processing if current Node is not a descendant of
// current Element
if (!isAncestor(currOrgElem, currentNode)) {
return --currPos;
}
currClonedElem.appendChild(
result.getOwnerDocument().importNode(currentNode, false));
}
break;
}
case Node.ELEMENT_NODE :
{
Element nextCurrOrgElem = (Element) currentNode;
Element nextCurrClonedElem =
result.getOwnerDocument().createElementNS(
nextCurrOrgElem.getNamespaceURI(),
nextCurrOrgElem.getNodeName());
// Append current Node either to resulting DocumentFragment or to
// current cloned Element
if (null == currClonedElem) {
result.appendChild(nextCurrClonedElem);
currOrgElem = nextCurrOrgElem;
currClonedElem = nextCurrClonedElem;
} else {
// Stop processing if current Node is not a descendant of
// current Element
if (!isAncestor(currOrgElem, currentNode)) {
return --currPos;
}
currClonedElem.appendChild(nextCurrClonedElem);
}
// Process current Node (of type Element) recursively
currPos =
nodeList2DocumentFragment(
nodeList,
++currPos,
result,
nextCurrOrgElem,
nextCurrClonedElem);
break;
}
case Node.ATTRIBUTE_NODE :
{
Attr currAttr = (Attr) currentNode;
// GK 20030411: Hack to overcome problems with IAIK IXSIL
if (currAttr.getOwnerElement() == null)
break;
if (currClonedElem == null)
break;
// currClonedElem must be the owner Element of currAttr if
// preconditions are met
currClonedElem.setAttributeNS(
currAttr.getNamespaceURI(),
currAttr.getNodeName(),
currAttr.getValue());
break;
}
default :
{
// All other nodes will be ignored
}
}
currPos++;
}
return currPos;
}
/**
* Check, if the given attribute is a namespace declaration.
*
* @param attr The attribute to check.
* @return true
, if the attribute is a namespace declaration,
* false
otherwise.
*/
private static boolean isNamespaceDeclaration(Attr attr) {
return Constants.XMLNS_NS_URI.equals(attr.getNamespaceURI());
}
/**
* Check, if a given DOM element is an ancestor of a given node.
*
* @param candAnc The DOM element to check for being the ancestor.
* @param cand The node to check for being the child.
* @return true
, if candAnc
is an (indirect)
* ancestor of cand
; false
otherwise.
*/
public static boolean isAncestor(Element candAnc, Node cand) {
Node currPar = cand.getParentNode();
while (currPar != null) {
if (candAnc == currPar)
return true;
currPar = currPar.getParentNode();
}
return false;
}
}