6 package de.uni_hamburg.corpora.validation;
15 import java.io.IOException;
16 import java.net.URISyntaxException;
17 import java.util.Collection;
18 import java.util.List;
19 import java.util.regex.Pattern;
20 import javax.xml.parsers.ParserConfigurationException;
21 import javax.xml.transform.TransformerException;
22 import javax.xml.xpath.XPathExpressionException;
23 import org.jdom.Attribute;
24 import org.jdom.Document;
25 import org.jdom.Element;
26 import org.jdom.JDOMException;
27 import org.jdom.xpath.XPath;
28 import org.xml.sax.SAXException;
29 import static org.apache.commons.lang3.StringEscapeUtils.escapeHtml4;
38 boolean containsRegEx =
false;
40 String replacement =
"ยด";
42 String xpathContext =
"//*";
57 throws SAXException, IOException, ParserConfigurationException, URISyntaxException, JDOMException, TransformerException, XPathExpressionException {
61 Pattern replacePattern = Pattern.compile(replace);
62 context = XPath.newInstance(xpathContext);
63 List allContextInstances = context.selectNodes(doc);
66 if (!allContextInstances.isEmpty()) {
67 for (
int i = 0; i < allContextInstances.size(); i++) {
68 Object o = allContextInstances.get(i);
69 if (o instanceof Element) {
70 Element e = (Element) o;
72 if (replacePattern.matcher(s).find()) {
75 String snew = s.replaceAll(replace, replacement);
78 stats.
addFix(
function, cd,
"Replaced " + escapeHtml4(replace) +
" with " + escapeHtml4(replacement) +
" at " + escapeHtml4(xpathContext) +
" here: " + escapeHtml4(s) +
" with " + escapeHtml4(snew));
80 System.out.println(
"CorpusData file is containing " + escapeHtml4(replace) +
" at " + escapeHtml4(xpathContext) +
": " + escapeHtml4(s));
81 stats.
addCritical(
function, cd,
"CorpusData file is containing " + escapeHtml4(replace) +
" at " + escapeHtml4(xpathContext) +
": " + escapeHtml4(s));
84 }
else if (o instanceof Attribute) {
85 Attribute a = (Attribute) o;
88 System.out.println(
"Attributes cannot be replaced yet at " + escapeHtml4(xpathContext));
89 stats.
addCritical(
function, cd,
"Attributes cannot be replaced yet at " + escapeHtml4(xpathContext));
92 stats.
addWarning(
function, cd,
"Xpath " + escapeHtml4(xpathContext) +
" does not lead to Element or Attribute");
98 stats.
addCorrect(
function, cd,
"CorpusData file does not contain " + escapeHtml4(replace) +
" at " + escapeHtml4(xpathContext));
102 cio.
write(cd, cd.getURL());
104 stats.
addCorrect(
function, cd,
"CorpusData file does not contain anything at " + escapeHtml4(xpathContext));
114 Class cl3 = Class.forName(
"de.uni_hamburg.corpora.ComaData");
115 IsUsableFor.add(cl3);
117 Class cl = Class.forName(
"de.uni_hamburg.corpora.BasicTranscriptionData");
120 }
catch (ClassNotFoundException ex) {
139 if (s.equalsIgnoreCase(
"true") || s.equalsIgnoreCase(
"wahr") || s.equalsIgnoreCase(
"ja")) {
141 }
else if (s.equalsIgnoreCase(
"false") || s.equalsIgnoreCase(
"falsch") || s.equalsIgnoreCase(
"nein")) {
144 report.
addCritical(
function, cd,
"Parameter coma not recognized: " + escapeHtml4(s));
154 String description =
"This class issues warnings if a file contains a certain RegEx and can also replace";
159 public Report function(
Corpus c, Boolean fix)
throws SAXException, IOException, ParserConfigurationException, URISyntaxException, JDOMException, TransformerException, XPathExpressionException {
162 cd = c.getComaData();
163 stats =
function(cd, fix);
167 stats.
merge(
function(btd, fix));
Collection< Class<?extends CorpusData > > getIsUsableFor()
void setReplacement(String s)
void addCritical(String description)
CorpusDataRegexReplacer()
void addWarning(String statId, String description)
void addCorrect(String statId, String description)
static org.jdom.Document String2JdomDocument(String stringRespresentingDocument)
void setReplace(String s)
static String JdomDocument2String(org.jdom.Document jdomDocument)
void addException(Throwable e, String description)
void write(CorpusData cd, URL url)
void addFix(String statId, CorpusData cd, String description)
void setXpathContext(String s)