6 package de.uni_hamburg.corpora.validation;
15 import java.io.ByteArrayInputStream;
16 import java.io.IOException;
17 import java.io.StringWriter;
18 import java.io.UnsupportedEncodingException;
19 import java.net.URISyntaxException;
20 import java.util.Collection;
21 import javax.xml.parsers.DocumentBuilderFactory;
22 import javax.xml.parsers.ParserConfigurationException;
23 import javax.xml.transform.OutputKeys;
24 import javax.xml.transform.Transformer;
25 import javax.xml.transform.TransformerException;
26 import javax.xml.transform.TransformerFactory;
27 import javax.xml.transform.dom.DOMSource;
28 import javax.xml.transform.stream.StreamResult;
29 import javax.xml.xpath.XPath;
30 import javax.xml.xpath.XPathConstants;
31 import javax.xml.xpath.XPathExpressionException;
32 import javax.xml.xpath.XPathFactory;
33 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
34 import org.jdom.JDOMException;
35 import org.w3c.dom.Document;
36 import org.w3c.dom.DocumentType;
37 import org.w3c.dom.Node;
38 import org.w3c.dom.NodeList;
39 import org.xml.sax.InputSource;
40 import org.xml.sax.SAXException;
51 String prettyCorpusData =
"";
58 public Report function(
CorpusData cd, Boolean fix)
throws IOException, TransformerException, ParserConfigurationException, SAXException, XPathExpressionException {
61 report.
addCorrect(
function, cd,
"Already pretty printed.");
64 if (cd.toUnformattedString() == null) {
65 report.
addCritical(
function, cd,
"Could not create the unformatted String!");
69 cio.
write(prettyCorpusData, cd.getURL());
70 cd.updateUnformattedString(prettyCorpusData);
71 report.
addFix(
function, cd,
"CorpusData was pretty printed and saved.");
75 report.
addCritical(
function, cd,
"Needs to be pretty printed.");
84 Class cl = Class.forName(
"de.uni_hamburg.corpora.BasicTranscriptionData");
86 Class cl2 = Class.forName(
"de.uni_hamburg.corpora.UnspecifiedXMLData");
88 Class cl3 = Class.forName(
"de.uni_hamburg.corpora.ComaData");
90 Class cl4 = Class.forName(
"de.uni_hamburg.corpora.SegmentedTranscriptionData");
92 }
catch (ClassNotFoundException ex) {
98 public boolean CorpusDataIsAlreadyPretty(
CorpusData cd)
throws TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException, UnsupportedEncodingException {
104 if (cd.toUnformattedString() != null) {
109 prettyCorpusData = pp.
indent(cd.toUnformattedString(),
"event");
111 return cd.toUnformattedString().equals(prettyCorpusData);
127 String description =
"This class takes XML corpusdata and formats it in the same way to avoid merge conflicts. ";
135 Document document = DocumentBuilderFactory.newInstance()
136 .newDocumentBuilder()
137 .parse(
new InputSource(
new ByteArrayInputStream(xml.getBytes(
"utf-8"))));
140 document.normalize();
141 XPath xPath = XPathFactory.newInstance().newXPath();
142 NodeList nodeList = (NodeList) xPath.evaluate(
"//text()[normalize-space()='']",
144 XPathConstants.NODESET);
146 for (
int i = 0; i < nodeList.getLength(); ++i) {
147 Node node = nodeList.item(i);
148 node.getParentNode().removeChild(node);
152 TransformerFactory transformerFactory = TransformerFactory.newInstance();
154 Transformer transformer = transformerFactory.newTransformer();
155 transformer.setOutputProperty(OutputKeys.ENCODING,
"UTF-8");
156 transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION,
"no");
157 transformer.setOutputProperty(OutputKeys.INDENT,
"yes");
160 DocumentType doctype = document.getDoctype();
161 System.out.println(doctype);
162 if (doctype != null && doctype.getSystemId() != null) {
163 transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, doctype.getSystemId());
165 if (doctype != null && doctype.getPublicId() != null) {
166 transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, doctype.getPublicId());
169 StringWriter stringWriter =
new StringWriter();
170 transformer.transform(
new DOMSource(document),
new StreamResult(stringWriter));
171 return stringWriter.toString();
172 }
catch (Exception e) {
173 throw new RuntimeException(e);
178 public Report function(
Corpus c, Boolean fix)
throws SAXException, IOException, ParserConfigurationException, URISyntaxException, JDOMException, TransformerException, XPathExpressionException, JexmaraldaException {
181 stats.
merge(
function(cdata, fix));
boolean CorpusDataIsAlreadyPretty(CorpusData cd)
String indent(String xml, String suppressedElements)
void addCritical(String description)
Collection< Class<?extends CorpusData > > getIsUsableFor()
static String toPrettyString(String xml, int indent)
void addCorrect(String statId, String description)
void addException(Throwable e, String description)
void write(CorpusData cd, URL url)
void addFix(String statId, CorpusData cd, String description)