1 package de.uni_hamburg.corpora.validation;
11 import java.io.IOException;
12 import java.io.UnsupportedEncodingException;
13 import java.net.MalformedURLException;
14 import java.net.URISyntaxException;
16 import java.nio.file.Paths;
17 import java.security.NoSuchAlgorithmException;
18 import java.util.Collection;
19 import java.util.List;
20 import java.util.Scanner;
21 import javax.xml.
transform.TransformerException;
22 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
23 import org.jdom.JDOMException;
24 import org.xml.sax.SAXException;
25 import javax.xml.parsers.ParserConfigurationException;
26 import javax.xml.xpath.XPathExpressionException;
27 import org.exmaralda.partitureditor.fsm.FSMException;
28 import org.jdom.Document;
29 import org.jdom.Element;
30 import org.jdom.xpath.XPath;
42 String xslresource =
"/xsl/nslc-checks.xsl";
44 String UTTERANCEENDSYMBOLS =
"[.!?…:]";
53 public Report function(
CorpusData cd, Boolean fix)
throws SAXException, JexmaraldaException, TransformerException, ParserConfigurationException, IOException, XPathExpressionException, MalformedURLException, JDOMException, URISyntaxException {
56 filename = cd.getURL().getFile().subSequence(cd.getURL().getFile().lastIndexOf(
'/') + 1, cd.getURL().getFile().lastIndexOf(
'.')).toString();
59 if (!FSMpath.equals(
"")) {
69 xt.
setParameter(
"UTTERANCEENDSYMBOL", UTTERANCEENDSYMBOLS);
71 String result = xt.
transform(cd.toSaveableString(), xsl);
74 Scanner scanner =
new Scanner(result);
77 while (scanner.hasNextLine()) {
78 String line = scanner.nextLine();
81 String[] lineParts = line.split(
";", -1);
82 if (lineParts.length != 5) {
84 for (String s : lineParts) {
85 message = message + s;
87 r.
addCritical(lineParts[0], cd,
"There was an exception while creating the error probably because of a semicolon or newline in an event: " + message);
89 switch (lineParts[1].toUpperCase()) {
98 if (cd.getFilename().endsWith(
".exb")) {
99 exmaError.addError(lineParts[0], cd.getURL().getFile(), lineParts[3], lineParts[4],
false, lineParts[2]);
104 r.
addNote(lineParts[0], cd, lineParts[2]);
108 if (cd.getFilename().endsWith(
".exb")) {
109 exmaError.addError(lineParts[0], cd.getURL().getFile(), lineParts[3], lineParts[4],
false, lineParts[2]);
113 r.
addCritical(lineParts[0], cd,
"(Unrecognized report type): " + lineParts[2]);
114 if (cd.getFilename().endsWith(
".exb")) {
115 exmaError.addError(lineParts[0], cd.getURL().getFile(), lineParts[3], lineParts[4],
false, lineParts[2]);
135 Class cl = Class.forName(
"de.uni_hamburg.corpora.BasicTranscriptionData");
137 Class cl1 = Class.forName(
"de.uni_hamburg.corpora.ComaData");
138 IsUsableFor.add(cl1);
141 }
catch (ClassNotFoundException ex) {
142 report.
addException(ex,
"unknown class not found error");
147 public void setUtteranceEndSymbols(String fsmPath)
throws MalformedURLException, JDOMException, IOException, URISyntaxException {
150 UTTERANCEENDSYMBOLS =
"";
152 URL url = Paths.get(fsmPath).toUri().toURL();
155 XPath xpath = XPath.newInstance(
"//fsm/char-set[@id='UtteranceEndSymbols']/char");
156 List allContextInstances = xpath.selectNodes(fsmdoc);
157 if (!allContextInstances.isEmpty()) {
158 for (
int i = 0; i < allContextInstances.size(); i++) {
159 Object o = allContextInstances.get(i);
160 if (o instanceof Element) {
161 Element e = (Element) o;
162 String symbol = e.getText();
163 System.out.println(symbol);
164 UTTERANCEENDSYMBOLS = UTTERANCEENDSYMBOLS + symbol;
169 UTTERANCEENDSYMBOLS =
"[" + UTTERANCEENDSYMBOLS +
"]";
170 System.out.println(UTTERANCEENDSYMBOLS);
183 String description =
"This class runs many little checks specified" 184 +
" in a XSLT stylesheet and adds them to the report. ";
189 public Report function(
Corpus c, Boolean fix)
throws SAXException, JDOMException, IOException, JexmaraldaException, TransformerException, ParserConfigurationException, UnsupportedEncodingException, XPathExpressionException, NoSuchAlgorithmException, ClassNotFoundException, FSMException, URISyntaxException {
192 stats =
function(cdata, fix);
193 for (
CorpusData bdata : c.getBasicTranscriptionData()) {
194 stats.
merge(
function(bdata, fix));
void addMissing(String statId, String description)
void addNote(String statId, String description)
static ExmaErrorList exmaError
String readExternalResourceAsString(String path2resource)
void setFSMpath(String s)
void addCritical(String description)
static String InputStream2String(InputStream is)
void setUtteranceEndSymbols(String fsmPath)
void addWarning(String statId, String description)
void setXSLresource(String s)
Collection< Class<?extends CorpusData > > getIsUsableFor()
static org.jdom.Document String2JdomDocument(String stringRespresentingDocument)
void addException(Throwable e, String description)