//package org.gicentre.utils.io;
import javax.xml.parsers.*; // For Document builder factory.
import org.xml.sax.*; // For SAX Exception handling.
import org.w3c.dom.*; // For document object model (DOM).
import java.io.*; // For file handling.
import java.util.*; // For vector structure.
// ****************************************************************************************
/** Handles DOM processing allowing the reading and writing of hierarchical structures as
* XML files. Uses the Document Object Model (DOM) to store the tree of nodes, therefore
* not suitable for very large structures. For reading very large structures represented as
* XML, use SAX processing instead.
* @author Jo Wood, giCentre, City University London.
* @version 3.1, 18th February, 2011.
*/
// *****************************************************************************************
/* This file is part of giCentre utilities library. gicentre.utils is free software: you can
* redistribute it and/or modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation, either version 3 of the License, or (at your
* option) any later version.
*
* gicentre.utils is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License along with this
* source code (see COPYING.LESSER included with this source code). If not, see
* http://www.gnu.org/licenses/.
*/
public class DOMProcessor
{
// ----------------------- Object Variables -----------------------
private int indent; // Indent level.
private Document dom; // Document object model.
private PrintWriter out; // Output stream.
private Vector matches; // List of matching elements.
// ------------------------- Constructors -------------------------
/** Creates a new empty DOM ready for processing.
*/
public DOMProcessor()
{
// Do nothing.
}
/** Wraps the given DOM in this processor allowing it to be written
* as an XML file, or appended with new nodes.
* @param dom Document Object Model to use in processor.
*/
public DOMProcessor(Document dom)
{
this.dom = dom;
}
/** Reads and the given XML file and constructs a DOM from it.
* @param fileName Name of XML file to read.
*/
public DOMProcessor(String fileName)
{
readXML(fileName);
}
/** Reads XML from the given input stream and constructs a DOM from it.
* @param inStream Stream from which to read XML.
*/
public DOMProcessor(InputStream inStream)
{
readXML(inStream);
}
// ------------------------- Methods ---------------------------
/** Reports whether we have an empty DOM.
* @return True if DOM is empty.
*/
public boolean isEmpty()
{
if (dom == null)
{
return true;
}
return false;
}
/** Adds a new element to the root of the DOM.
* @param name Name of the new element
* @return New element in the DOM.
*/
public Node addElement(String name)
{
if (dom == null)
{
// Create a DocumentBuilder using the DocumentBuilderFactory.
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = null;
indent = 0;
try
{
db = dbf.newDocumentBuilder();
}
catch (ParserConfigurationException e)
{
System.err.println("Problem finding an XML parser:\n"+e);
return null;
}
dom = db.getDOMImplementation().createDocument(null,name,null);
return dom.getDocumentElement();
}
return addElement(name,null,dom.getDocumentElement());
}
/** Adds a new element to the given one within the DOM.
* @param name Name of the new element
* @param existingElement Element onto which the new element should be attached.
* @return New element in the DOM.
*/
public Node addElement(String name, Node existingElement)
{
return addElement(name,null,existingElement);
}
/** Adds a new element to the given one within the DOM.
* @param name Name of the new element
* @param text Text to attach to element or null if none required.
* @param existingElement Element onto which the new element should be attached.
* @return New element in the DOM.
*/
public Node addElement(String name, String text, Node existingElement)
{
// Create the new element node and attach it to existing node.
Node newNode = dom.createElement(name);
existingElement.appendChild(newNode);
// Add text if given.
if (text != null)
{
Node textNode = dom.createTextNode(text);
newNode.appendChild(textNode);
}
return newNode;
}
/** Renames the given element with the given new name.
* @param existingElement Element to rename.
* @param newName New name to give element.
*/
public void renameElement(Node existingElement, String newName)
{
// Create an element with the new name
Node newElement = dom.createElement(newName);
// Copy the attributes to the new element
NamedNodeMap attrs = existingElement.getAttributes();
for (int i=0; i {
Attr attr2 = (Attr)dom.importNode(attrs.item(i), true);
newElement.getAttributes().setNamedItem(attr2);
}
// Move all the children
while (existingElement.hasChildNodes())
{
newElement.appendChild(existingElement.getFirstChild());
}
// Replace the old node with the new node
existingElement.getParentNode().replaceChild(newElement, existingElement);
}
/** Adds the given attribute to the given node.
* @param name Attribute name.
* @param value Attribute value.
* @param node Element to attach attribute.
*/
public void addAttribute(String name, String value, Node node)
{
if (node.getNodeType() == Node.ELEMENT_NODE)
{
Element element = (Element)node;
element.setAttribute(name,value);
}
}
/** Adds the given comment to the root of the DOM. Note that this method should only
* be called once a root node has been created in the DOM.
* @param comment Comment text.
*/
public void addComment(String comment)
{
addComment(comment,dom.getDocumentElement());
}
/** Adds the given comment to the given node.
* @param comment Comment text.
* @param node Element to attach comment.
*/
public void addComment(String comment, Node node)
{
node.getParentNode().insertBefore(dom.createComment(comment),node);
}
/** Adds text as the child of the given node.
* @param text Text to add to node.
* @param node Element to attach text.
*/
public void addText(String text, Node node)
{
node.appendChild(dom.createTextNode(text));
}
/** Searches the entire DOM for a given element and returns text associated
* with it. If more than one element with the given name exists, multiple
* text values are returned.
* @param elementName Element to search for.
* @return Array of strings associated with all occurrences of
* the given element. Array will be 0 length if none found.
*/
public String[] getText(String elementName)
{
return getText(elementName,dom);
}
/** Returns any text associated found in the given node or its children.
* This is equivalent to calling getText(null,node)
.
* If more than one element containing text exists, multiple text values
* are returned.
* @param node Node from which to start search.
* @return Array of strings associated with all occurrences of
* text in the node or its children. Array will be 0 length if none found.
*/
public String[] getText(Node node)
{
return getText(null,node);
}
/** Searches for a given element and returns text associated with it. If more than one
* element with the given name exists, multiple text values are returned.
* @param elementName Element to search for. If elementName is null, search will be
* for all text contained within the given node.
* @param node Node from which to start search.
* @return Array of strings associated with all occurrences of the given element.
* Array will be 0 length if none found.
*/
public String[] getText(String elementName, Node node)
{
matches = new Vector();
searchText(elementName,node);
// Convert match vector into an array;
String[] matchArray = new String[matches.size()];
int i=0;
for (Node matchedNode : matches)
{
matchArray[i++] = matchedNode.getNodeValue();
}
matches = null;
return matchArray;
}
/** Searches for a given node and returns text associated with
* it. This version does not recurse to the node's children.
* @param node Node to search.
* @return Text associated with the node, or null if none found.
*/
public String getNodeText(Node node)
{
// Look for text in child (text stored in its own node).
NodeList children = node.getChildNodes();
for (int i=0; i {
Node child = children.item(i);
if ((child.getNodeType() == Node.CDATA_SECTION_NODE) ||
(child.getNodeType() == Node.TEXT_NODE))
{
return(child.getNodeValue());
}
}
// If we get this far, no text was found.
return null;
}
/** Searches the entire DOM for a given attribute and returns the value associated with it.
* If there is more than one occurrence of the attribute, multiple text values are returned.
* @param attributeName Attribute to search for.
* @return Array of strings associated with all occurrences of the given attribute.
* Array will be 0 length if none found.
*/
public String[] getAttributes(String attributeName)
{
return getAttributes(attributeName,dom);
}
/** Searches the given node and its children for a given attribute and returns the value
* associated with it. If there is more than one occurrence of the attribute, multiple
* text values are returned.
* @param attributeName Attribute to search for.
* @param node Node from which to start search.
* @return Array of strings associated with all occurrences of the given attribute.
* Array will be 0 length if none found.
*/
public String[] getAttributes(String attributeName, Node node)
{
matches = new Vector();
searchAttributes(attributeName,node);
// Convert match vector into an array;
String[] matchArray = new String[matches.size()];
int i=0;
for (Node matchedNode : matches)
{
matchArray[i++] = matchedNode.getNodeValue();
}
matches = null;
return matchArray;
}
/** Searches the given node for a given attribute and returns the value associated with it.
* This version does not recurse to children of the given node.
* @param attributeName Attribute to search for.
* @param node Node from which to start search.
* @return Value associated with the attribute, or null if not found.
*/
public String getNodeAttribute(String attributeName, Node node)
{
// Only consider document or element nodes.
if ((node.getNodeType() != Node.DOCUMENT_NODE) &&
(node.getNodeType() != Node.ELEMENT_NODE))
{
return null;
}
// Search attributes associated with the node.
NamedNodeMap attributes = node.getAttributes();
for (int i=0; i {
Node attribute = attributes.item(i);
if (attribute.getNodeName().equalsIgnoreCase(attributeName))
{
return attribute.getNodeValue();
}
}
// If we get this far, the attribute has not been found.
return null;
}
/** Returns a list of the DOM elements with the given name. This can be
* used to provide the base of sub-trees for searches within nested
* elements.
* @param name Element name to search for.
* @return Array of elements with the given name. Array will be 0 length if none found.
*/
public Node[] getElements(String name)
{
return getElements(name,dom);
}
/** Returns a list of the DOM elements with the given name that are
* nested within the given node. This can be used to provide the
* base of sub-trees for searches within nested elements. The order of matched elements
* is depth-first. For breadth-first searches, use getNodeElements
and
* recursively search for children of returned nodes.
* @param name Element name to search for.
* @param node Node from which to start search.
* @return Array of elements with the given name. Array will be 0 length if none found.
*/
public Node[] getElements(String name, Node node)
{
matches = new Vector();
searchNode(name,node);
// Convert match vector into an array;
Node[] matchArray = new Node[matches.size()];
matches.toArray(matchArray);
matches = null;
return matchArray;
}
/** Returns a DOM element with the given name that is the child of the
* given node. This is a non-recursive method that only looks for immediate
* children. Note that unlike getNodeElements()
this method only
* returns the first matched child of the given node.
* @param name Element name to search for.
* @param node Node from which to examine children.
* @return Child node or null if none found.
*/
public Node getNodeElement(String name, Node node)
{
// Only consider document or element nodes.
if ((node.getNodeType() != Node.DOCUMENT_NODE) &&
(node.getNodeType() != Node.ELEMENT_NODE))
{
return null;
}
NodeList children = node.getChildNodes();
for (int i=0; i {
Node child = children.item(i);
// Only consider element child nodes.
if (child.getNodeType() == Node.ELEMENT_NODE)
{
if (child.getNodeName().equalsIgnoreCase(name))
{
return child;
}
}
}
// If we get this far, no child node was found.
return null;
}
/** Returns the DOM elements with the given name that are the children of the
* given node. This is a non-recursive method that only looks for immediate
* children. Array will be 0 length if none found.
* @param name Element name to search for.
* @param node Node from which to examine children.
* @return Child nodes or empty Node array if none found.
*/
public Node[] getNodeElements(String name, Node node)
{
// Only consider document or element nodes.
if ((node.getNodeType() != Node.DOCUMENT_NODE) &&
(node.getNodeType() != Node.ELEMENT_NODE))
{
return new Node[0];
}
Vector matchedChildren = new Vector();
NodeList children = node.getChildNodes();
for (int i=0; i {
Node child = children.item(i);
// Only consider element child nodes.
if (child.getNodeType() == Node.ELEMENT_NODE)
{
if (child.getNodeName().equalsIgnoreCase(name))
{
matchedChildren.add(child);
}
}
}
Node[] nodes = new Node[matchedChildren.size()];
matchedChildren.toArray(nodes);
return nodes;
}
/** Reads the given XML file and converts it into a DOM.
* @param fileName Name of XML file to convert.
* @return True if converted successfully.
*/
public boolean readXML(String fileName)
{
// Create a DocumentBuilder using the DocumentBuilderFactory.
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = null;
indent = 0;
try
{
db = dbf.newDocumentBuilder();
}
catch (ParserConfigurationException e)
{
System.err.println("Problem finding an XML parser:\n"+e);
return false;
}
// Try to parse the given file and store XML nodes in the DOM.
try
{
dom = db.parse(new File(fileName));
}
catch (SAXException e)
{
System.err.println("Problem parsing document: "+e.getMessage());
dom = db.newDocument();
return false;
}
catch (IOException e)
{
System.err.println("Problem reading "+fileName);
return false;
}
return true;
}
/** Reads the XML from the given input stream and converts it into a DOM.
* @param inStream Input stream containing XML to convert.
* @return True if converted successfully.
*/
public boolean readXML(InputStream inStream)
{
// Create a DocumentBuilder using the DocumentBuilderFactory.
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = null;
indent = 0;
try
{
db = dbf.newDocumentBuilder();
}
catch (ParserConfigurationException e)
{
System.err.println("Problem finding an XML parser:\n"+e);
return false;
}
// Try to parse the given file and store XML nodes in the DOM.
try
{
dom = db.parse(inStream);
}
catch (SAXException e)
{
System.err.println("Problem parsing document: "+e.getMessage());
dom = db.newDocument();
return false;
}
catch (IOException e)
{
System.err.println("Problem reading from "+inStream);
return false;
}
return true;
}
/** Displays the DOM stored within this class as an XML file with default
* document settings on standard output.
* @return Always true.
*/
public boolean writeXML()
{
return writeXML(null,null,null);
}
/** Displays the DOM stored within this class as an XML file with the given document
* settings on standard output.
* @param version XML version, or null if default ('1.0') is to be used.
* @param encoding XML encoding, or null if encoding is not to be specified.
* @param standalone XML stand-alone status of XML file or null if not to be specified.
* @return Always true.
*/
public boolean writeXML(String version, String encoding, Boolean standalone)
{
out = new PrintWriter(System.out);
indent = 0;
outputNodeAsXML(dom,version,encoding,standalone);
// NOTE: Closing the writer to standard output closes stdout itself!
// So flush output rather than close it.
out.flush();
return true;
}
/** Converts the DOM stored within this class into an XML file with default document settings.
* @param fileName Name of file to contain the XML.
* @return true if successful XML generation.
*/
public boolean writeXML(String fileName)
{
return writeXML(fileName,null,null,null);
}
/** Converts the DOM stored within this class into an XML file with default document settings.
* @param outStream Output stream representing file to contain the XML.
* @return true if successful XML generation.
*/
public boolean writeXML(OutputStream outStream)
{
return writeXML(outStream,null,null,null);
}
/** Converts the DOM stored within this class into an XML file with the given document settings.
* @param fileName Name of file to contain the XML.
* @param version XML version, or null if default ('1.0') is to be used.
* @param encoding XML encoding, or null if encoding is not to be specified.
* @param standalone XML stand-alone status of XML file or null if not to be specified.
* @return true if successful XML generation.
*/
public boolean writeXML(String fileName, String version, String encoding, Boolean standalone)
{
if (dom == null)
{
System.err.println("Error: No document object model to process.");
return false;
}
// Open file for output.
try
{
out = new PrintWriter(new BufferedWriter(new FileWriter(fileName)));
}
catch (IOException e)
{
System.err.println("Error: Problem creating XML file: "+fileName);
return false;
}
// Start recursive output of the whole DOM.
indent = 0;
outputNodeAsXML(dom,version,encoding,standalone);
// Close output and leave.
out.close();
return true;
}
/** Converts the DOM stored within this class into an XML file with the given document settings.
* @param outStream Output stream representing file to contain the XML.
* @param version XML version, or null if default ('1.0') is to be used.
* @param encoding XML encoding, or null if encoding is not to be specified.
* @param standalone XML stand-alone status of XML file or null if not to be specified.
* @return true if successful XML generation.
*/
public boolean writeXML(OutputStream outStream, String version, String encoding, Boolean standalone)
{
if (dom == null)
{
System.err.println("Error: No document object model to process.");
return false;
}
// Open file for output.
out = new PrintWriter(outStream);
// Start recursive output of the whole DOM.
indent = 0;
outputNodeAsXML(dom,version,encoding,standalone);
// Close output and leave.
out.close();
return true;
}
// ---------------------- Private Methods --------------------------
/** Searches for a given element in the given node and updates list
* of text within matched elements. Recursively searches for sub-nodes
* of the given one.
* @param element Element to search for. If null, all elements searched.
* @param node Node to start search from.
*/
private void searchText(String element, Node node)
{
// Only consider document or element nodes.
if ((node.getNodeType() != Node.DOCUMENT_NODE) &&
(node.getNodeType() != Node.ELEMENT_NODE))
{
return;
}
if ((element == null) || (node.getNodeName().equalsIgnoreCase(element)))
{
// Match found so look for text in children.
NodeList children = node.getChildNodes();
for (int i=0; i {
Node child = children.item(i);
if ((child.getNodeType() == Node.CDATA_SECTION_NODE) ||
(child.getNodeType() == Node.TEXT_NODE))
{
if (child.getNodeValue().trim().length() > 0)
{
//matches.add(child.getNodeValue());
matches.add(child);
}
}
}
}
if ((node.getNodeType() == Node.DOCUMENT_NODE) ||
(node.getNodeType() == Node.ELEMENT_NODE))
{
// Search child nodes.
NodeList children = node.getChildNodes();
for (int i=0; i {
searchText(element,children.item(i));
}
}
}
/** Searches for a given attribute in the given node and updates list
* of attribute values. Recursively searches for sub-nodes of the given one.
* @param element Element to search for.
* @param node Node to start search from.
*/
private void searchAttributes(String element, Node node)
{
// Only consider document or element nodes.
if ((node.getNodeType() != Node.DOCUMENT_NODE) &&
(node.getNodeType() != Node.ELEMENT_NODE))
{
return;
}
// Search attributes associated with current node.
NamedNodeMap attributes = node.getAttributes();
for (int i=0; i {
Node attribute = attributes.item(i);
if (attribute.getNodeName().equalsIgnoreCase(element))
{
//matches.add(attribute.getNodeValue());
matches.add(attribute);
}
}
// Search child nodes.
NodeList children = node.getChildNodes();
for (int i=0; i {
searchAttributes(element,children.item(i));
}
}
/** Searches for a given element in the given node and updates list
* of elements with that name. Recursively searches for sub-nodes of the given one.
* @param element Element to search for.
* @param node Node to start search from.
*/
private void searchNode(String element, Node node)
{
// Only consider document or element nodes.
if ((node.getNodeType() != Node.DOCUMENT_NODE) &&
(node.getNodeType() != Node.ELEMENT_NODE))
{
return;
}
// Match found, so add node to list.
if (node.getNodeName().equalsIgnoreCase(element))
{
matches.add(node);
}
// Search children
NodeList children = node.getChildNodes();
for (int i=0; i {
searchNode(element,children.item(i));
}
}
/** Converts the given DOM node into XML. Recursively converts
* any child nodes.
* @param node DOM Node to display.
*/
private void outputNodeAsXML(Node node)
{
outputNodeAsXML(node,null,null,null);
}
/** Converts the given DOM node into XML. Recursively converts
* any child nodes. This version allows the XML version, encoding and stand-alone
* status to be set.
* @param node DOM Node to display.
* @param version XML version, or null if default ('1.0') is to be used.
* @param encoding XML encoding, or null if encoding is not to be specified.
* @param standalone XML stand-alone status or null if not to be specified.
*/
private void outputNodeAsXML(Node node, String version, String encoding, Boolean standalone)
{
// Store node name, type and value.
String name = node.getNodeName(),
value = makeFriendly(node.getNodeValue());
int type = node.getNodeType();
// Ignore empty nodes (e.g. blank lines etc.)
if ((value != null) && (value.trim().equals("")))
{
return;
}
switch (type)
{
case Node.DOCUMENT_NODE: // Start of document.
{
if (version == null)
{
out.print(" }
else
{
out.print(" }
if (encoding != null)
{
out.print("encoding=\""+encoding+"\" ");
}
if (standalone != null)
{
if (standalone.booleanValue())
{
out.print("standalone=\"yes\" ");
}
else
{
out.print("standalone=\"no\" ");
}
}
out.println("?>");
// Output the document's child nodes.
NodeList children = node.getChildNodes();
for (int i=0; i {
outputNodeAsXML(children.item(i));
}
break;
}
case Node.ELEMENT_NODE: // Document element with attributes.
{
// Output opening element tag.
indent++;
indent();
out.print("<"+name);
// Output any attributes the element might have.
NamedNodeMap attributes = node.getAttributes();
for (int i=0; i {
Node attribute = attributes.item(i);
out.print(" "+attribute.getNodeName()+"=\""+attribute.getNodeValue()+"\"");
}
out.print(">");
// Output any child nodes that exist.
NodeList children = node.getChildNodes();
for (int i=0; i {
outputNodeAsXML(children.item(i));
}
break;
}
case Node.CDATA_SECTION_NODE: // Display text.
case Node.TEXT_NODE:
{
out.print(value);
break;
}
case Node.COMMENT_NODE: // Comment node.
{
indent++;
indent();
out.print("");
indent--;
break;
}
case Node.ENTITY_REFERENCE_NODE: // Entity reference nodes.
{
indent++;
indent();
out.print("&"+name+";");
indent--;
break;
}
case Node.PROCESSING_INSTRUCTION_NODE: // Processing instruction.
{
indent++;
indent();
out.print(""+name);
if ((value != null) && (value.length() > 0))
{
out.print(" "+value);
}
out.println("?>");
indent--;
break;
}
}
// Finally output closing tags for each element.
if (type == Node.ELEMENT_NODE)
{
out.print(""+node.getNodeName()+">");
indent--;
if (node.getNextSibling() == null)
{
indent(); // Only throw new line if this is the last sibling.
}
}
}
/** Converts a given string into XML-friendly code by replacing
* quotes, triangular brackets etc. with their symbolic equivalent.
* @param text Text to process.
* @return Processed text with XML friendly symbols.
*/
private static String makeFriendly(String text)
{
StringBuffer newText = new StringBuffer();
if (text == null)
{
return null;
}
int numCharacters = text.length();
for (int i=0; i {
char ch = text.charAt(i);
switch (ch)
{
case '<':
{
newText.append("<");
break;
}
case '>':
{
newText.append(">");
break;
}
case '&':
{
newText.append("&");
break;
}
case '"':
{
newText.append(""");
break;
}
default:
{
newText.append(ch);
}
}
}
return newText.toString();
}
/** Indents output to current tree depth.
*/
private void indent()
{
out.println("");
for (int i=1; i {
out.print(" ");
}
}
}