1 package de.uni_hamburg.corpora.utilities;
9 import java.io.ByteArrayInputStream;
10 import java.io.IOException;
11 import java.io.UnsupportedEncodingException;
12 import java.util.regex.Pattern;
13 import javax.xml.parsers.DocumentBuilderFactory;
14 import javax.xml.parsers.ParserConfigurationException;
15 import javax.xml.transform.OutputKeys;
16 import javax.xml.transform.TransformerException;
17 import javax.xml.xpath.XPath;
18 import javax.xml.xpath.XPathConstants;
19 import javax.xml.xpath.XPathExpressionException;
20 import javax.xml.xpath.XPathFactory;
21 import org.w3c.dom.Document;
22 import org.w3c.dom.Node;
23 import org.w3c.dom.NodeList;
24 import org.xml.sax.InputSource;
25 import org.xml.sax.SAXException;
34 String xslLocation =
"/xsl/pretty-print-sort-elements.xsl";
48 public String
indent(String xml, String suppressedElements)
throws TransformerException, ParserConfigurationException, UnsupportedEncodingException, SAXException, IOException, XPathExpressionException {
51 return indent(xml, suppressedElements, xslString);
63 public String
indent(String xml, String suppressedElements, String xslString)
throws TransformerException, ParserConfigurationException, UnsupportedEncodingException, SAXException, IOException, XPathExpressionException {
67 Document document = DocumentBuilderFactory.newInstance()
69 .parse(
new InputSource(
new ByteArrayInputStream(xml.getBytes(
"utf-8"))));
73 XPath xPath = XPathFactory.newInstance().newXPath();
74 NodeList nodeList = (NodeList) xPath.evaluate(
"//text()[normalize-space()='']",
76 XPathConstants.NODESET);
78 for (
int i = 0; i < nodeList.getLength(); ++i) {
79 Node node = nodeList.item(i);
80 node.getParentNode().removeChild(node);
94 xt.
setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, xml.indexOf(
"<?xml") >= 0 ?
"no" :
"yes");
100 String prettyXmlString;
101 if(xslString.equals(
"")){
102 prettyXmlString = xt.
transform(xmlString);
105 prettyXmlString = xt.
transform(xmlString, xslString);
112 Pattern r1 = Pattern.compile(
"<([^>]+)([^>\\s])/>", Pattern.DOTALL);
113 prettyXmlString = r1.matcher(prettyXmlString).replaceAll(
"<$1$2 />");
116 Pattern r2 = Pattern.compile(
"<nts([^>]*)>([\\s]+)</nts>", Pattern.DOTALL);
117 prettyXmlString = r2.matcher(prettyXmlString).replaceAll(
"<nts$1><![CDATA[$2]]></nts>");
120 Pattern r2a = Pattern.compile(
"<event([^>]*)>([\\s]+)</event>", Pattern.DOTALL);
121 prettyXmlString = r2a.matcher(prettyXmlString).replaceAll(
"<event$1><![CDATA[$2]]></event>");
124 Pattern r2b = Pattern.compile(
"<ts([^>]*)>([\\s]+)</ts>", Pattern.DOTALL);
125 prettyXmlString = r2b.matcher(prettyXmlString).replaceAll(
"<ts$1><![CDATA[$2]]></ts>");
128 Pattern r2c = Pattern.compile(
"<ta([^>]*)>([\\s]+)</ta>", Pattern.DOTALL);
129 prettyXmlString = r2c.matcher(prettyXmlString).replaceAll(
"<ta$1><![CDATA[$2]]></ta>");
132 Pattern r2d = Pattern.compile(
"<ats([^>]*)>([\\s]+)</ats>", Pattern.DOTALL);
133 prettyXmlString = r2d.matcher(prettyXmlString).replaceAll(
"<ats$1><![CDATA[$2]]></ats>");
136 Pattern r3 = Pattern.compile(
"<event\\s*(end=\"[^\">]*\")\\s+(start=\"[^\">]*\")\\s*>", Pattern.DOTALL);
137 prettyXmlString = r3.matcher(prettyXmlString).replaceAll(
"<event $2 $1>");
139 Pattern r4 = Pattern.compile(
"<tier\\s+(category=\"[^\">]*\")\\s+(display\\-name=\"[^\">]*\")\\s+(id=\"[^\">]*\")\\s+(speaker=\"[^\">]*\")\\s+(type=\"[^\">]*\")\\s*(/?)>", Pattern.DOTALL);
140 prettyXmlString = r4.matcher(prettyXmlString).replaceAll(
"<tier $3 $4 $1 $5 $2 $6>");
143 Pattern r5 = Pattern.compile(
"<(tier|event|ud\\-meta\\-information|languages\\-used|ud\\-speaker\\-information)([^/>]*?)\\s*/>", Pattern.DOTALL);
144 prettyXmlString = r5.matcher(prettyXmlString).replaceAll(
"<$1$2></$1>");
146 return prettyXmlString;
String indent(String xml, String suppressedElements)
static String InputStream2String(InputStream is)
String indent(String xml, String suppressedElements, String xslString)
static String W3cDocument2String(org.w3c.dom.Document doc)