6 package de.uni_hamburg.corpora.validation;
13 import java.io.IOException;
14 import java.util.Collection;
15 import java.util.regex.Pattern;
16 import org.jdom.Document;
17 import org.jdom.JDOMException;
18 import org.jdom.xpath.XPath;
19 import org.xml.sax.SAXException;
21 import java.net.URISyntaxException;
22 import java.util.List;
23 import javax.xml.parsers.ParserConfigurationException;
24 import javax.xml.transform.TransformerException;
25 import javax.xml.xpath.XPathExpressionException;
26 import org.jdom.Element;
27 import static org.apache.commons.lang3.StringEscapeUtils.escapeHtml4;
40 boolean linebreak =
false;
41 String xpathContext =
"//event";
57 throws SAXException, IOException, ParserConfigurationException, URISyntaxException, JDOMException, TransformerException, XPathExpressionException {
60 Pattern replacePattern = Pattern.compile(
"[\r\n]");
61 context = XPath.newInstance(xpathContext);
62 List allContextInstances = context.selectNodes(doc);
65 if (!allContextInstances.isEmpty()) {
66 for (
int i = 0; i < allContextInstances.size(); i++) {
67 Object o = allContextInstances.get(i);
68 if (o instanceof Element) {
69 Element e = (Element) o;
71 if (replacePattern.matcher(s).find()) {
74 String snew = s.replaceAll(
"[\r\n]",
"");
77 cd.updateUnformattedString(doc.toString());
78 cio.
write(cd, cd.getURL());
79 stats.
addFix(
function, cd,
"Removed line ending in an event: " + escapeHtml4(s) +
" with " + escapeHtml4(snew));
81 System.out.println(
"Exb is containing line ending in an event: " + escapeHtml4(s));
82 stats.
addCritical(
function, cd,
"Exb is containing line ending in an event: " + escapeHtml4(s));
88 stats.
addCorrect(
function, cd,
"CorpusData file does not contain line ending in an event");
91 stats.
addCorrect(
function, cd,
"CorpusData file does not contain any event");
104 Class cl = Class.forName(
"de.uni_hamburg.corpora.BasicTranscriptionData");
106 }
catch (ClassNotFoundException ex) {
118 String description =
"This class issues warnings if the exb file contains " 119 +
"linebreaks or fixes linebreaks in the events and adds those " 120 +
"warnings to the report which it returns.";
125 public Report function(
Corpus c, Boolean fix)
throws SAXException, IOException, ParserConfigurationException, URISyntaxException, JDOMException, TransformerException, XPathExpressionException {
127 for (
CorpusData cdata : c.getBasicTranscriptionData()) {
128 stats.
merge(
function(cdata, fix));
Collection< Class<?extends CorpusData > > getIsUsableFor()
void addCritical(String description)
ExbEventLinebreaksChecker()
void addCorrect(String statId, String description)
static org.jdom.Document String2JdomDocument(String stringRespresentingDocument)
void addException(Throwable e, String description)
void write(CorpusData cd, URL url)
void addFix(String statId, CorpusData cd, String description)