corpus-services  1.0
XSLTChecker.java
Go to the documentation of this file.
1 package de.uni_hamburg.corpora.validation;
2 
11 import java.io.IOException;
12 import java.io.UnsupportedEncodingException;
13 import java.net.MalformedURLException;
14 import java.net.URISyntaxException;
15 import java.net.URL;
16 import java.nio.file.Paths;
17 import java.security.NoSuchAlgorithmException;
18 import java.util.Collection;
19 import java.util.List;
20 import java.util.Scanner;
21 import javax.xml.transform.TransformerException;
22 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
23 import org.jdom.JDOMException;
24 import org.xml.sax.SAXException;
25 import javax.xml.parsers.ParserConfigurationException;
26 import javax.xml.xpath.XPathExpressionException;
27 import org.exmaralda.partitureditor.fsm.FSMException;
28 import org.jdom.Document;
29 import org.jdom.Element;
30 import org.jdom.xpath.XPath;
31 
40 public class XSLTChecker extends Checker implements CorpusFunction {
41 
42  String xslresource = "/xsl/nslc-checks.xsl";
43  String filename = "";
44  String UTTERANCEENDSYMBOLS = "[.!?…:]";
45  String FSMpath = "";
46 
47  public XSLTChecker() {
48  //fixing is not possible
49  super(false);
50  }
51 
52  @Override
53  public Report function(CorpusData cd, Boolean fix) throws SAXException, JexmaraldaException, TransformerException, ParserConfigurationException, IOException, XPathExpressionException, MalformedURLException, JDOMException, URISyntaxException {
54 
55  Report r = new Report();
56  filename = cd.getURL().getFile().subSequence(cd.getURL().getFile().lastIndexOf('/') + 1, cd.getURL().getFile().lastIndexOf('.')).toString();
57 
58  //get UtteranceEndSymbols form FSM if supplied
59  if (!FSMpath.equals("")) {
60  setUtteranceEndSymbols(FSMpath);
61  }
62  // get the XSLT stylesheet
63  String xsl = TypeConverter.InputStream2String(getClass().getResourceAsStream(xslresource));
64 
65  // create XSLTransformer and set the parameters
66  XSLTransformer xt = new XSLTransformer();
67 
68  xt.setParameter("filename", filename);
69  xt.setParameter("UTTERANCEENDSYMBOL", UTTERANCEENDSYMBOLS);
70  // perform XSLT transformation
71  String result = xt.transform(cd.toSaveableString(), xsl);
72 
73  //read lines and add to Report
74  Scanner scanner = new Scanner(result);
75 
76  int i = 1;
77  while (scanner.hasNextLine()) {
78  String line = scanner.nextLine();
79 
80  //split line by ;
81  String[] lineParts = line.split(";", -1);
82  if (lineParts.length != 5) {
83  String message = "";
84  for (String s : lineParts) {
85  message = message + s;
86  }
87  r.addCritical(lineParts[0], cd, "There was an exception while creating the error probably because of a semicolon or newline in an event: " + message);
88  } else {
89  switch (lineParts[1].toUpperCase()) {
90  case "WARNING":
91  r.addWarning(lineParts[0], cd, lineParts[2]);
92  /* if (cd.getFilename().endsWith(".exb")) {
93  exmaError.addError("XSLTChecker", cd.getURL().getFile(), lineParts[2], lineParts[3], false, lineParts[1]);
94  } */
95  break;
96  case "CRITICAL":
97  r.addCritical(lineParts[0], cd, lineParts[2]);
98  if (cd.getFilename().endsWith(".exb")) {
99  exmaError.addError(lineParts[0], cd.getURL().getFile(), lineParts[3], lineParts[4], false, lineParts[2]);
100 
101  }
102  break;
103  case "NOTE":
104  r.addNote(lineParts[0], cd, lineParts[2]);
105  break;
106  case "MISSING":
107  r.addMissing(lineParts[0], cd, lineParts[2]);
108  if (cd.getFilename().endsWith(".exb")) {
109  exmaError.addError(lineParts[0], cd.getURL().getFile(), lineParts[3], lineParts[4], false, lineParts[2]);
110  }
111  break;
112  default:
113  r.addCritical(lineParts[0], cd, "(Unrecognized report type): " + lineParts[2]);
114  if (cd.getFilename().endsWith(".exb")) {
115  exmaError.addError(lineParts[0], cd.getURL().getFile(), lineParts[3], lineParts[4], false, lineParts[2]);
116  }
117  }
118  }
119 
120  i++;
121  }
122 
123  scanner.close();
124  return r;
125 
126  }
127 
128  public void setXSLresource(String s) {
129  xslresource = s;
130  }
131 
132  @Override
133  public Collection<Class<? extends CorpusData>> getIsUsableFor() {
134  try {
135  Class cl = Class.forName("de.uni_hamburg.corpora.BasicTranscriptionData");
136  IsUsableFor.add(cl);
137  Class cl1 = Class.forName("de.uni_hamburg.corpora.ComaData");
138  IsUsableFor.add(cl1);
139  //Class cl2 = Class.forName("de.uni_hamburg.corpora.UnspecifiedXMLData");
140  //IsUsableFor.add(cl2);
141  } catch (ClassNotFoundException ex) {
142  report.addException(ex, "unknown class not found error");
143  }
144  return IsUsableFor;
145  }
146 
147  public void setUtteranceEndSymbols(String fsmPath) throws MalformedURLException, JDOMException, IOException, URISyntaxException {
148  //now get the UtteranceEndSymbols from the FSM XML file
149  //XPath: "//fsm/char-set[@id='UtteranceEndSymbols']/char"
150  UTTERANCEENDSYMBOLS = "";
151  CorpusIO cio = new CorpusIO();
152  URL url = Paths.get(fsmPath).toUri().toURL();
153  String fsmstring = cio.readExternalResourceAsString(url.toString());
154  Document fsmdoc = de.uni_hamburg.corpora.utilities.TypeConverter.String2JdomDocument(fsmstring);
155  XPath xpath = XPath.newInstance("//fsm/char-set[@id='UtteranceEndSymbols']/char");
156  List allContextInstances = xpath.selectNodes(fsmdoc);
157  if (!allContextInstances.isEmpty()) {
158  for (int i = 0; i < allContextInstances.size(); i++) {
159  Object o = allContextInstances.get(i);
160  if (o instanceof Element) {
161  Element e = (Element) o;
162  String symbol = e.getText();
163  System.out.println(symbol);
164  UTTERANCEENDSYMBOLS = UTTERANCEENDSYMBOLS + symbol;
165  }
166  }
167  }
168  //needs to be a RegEx (set)
169  UTTERANCEENDSYMBOLS = "[" + UTTERANCEENDSYMBOLS + "]";
170  System.out.println(UTTERANCEENDSYMBOLS);
171  }
172 
173  public void setFSMpath(String s) {
174  FSMpath = s;
175  }
176 
181  @Override
182  public String getDescription() {
183  String description = "This class runs many little checks specified"
184  + " in a XSLT stylesheet and adds them to the report. ";
185  return description;
186  }
187 
188  @Override
189  public Report function(Corpus c, Boolean fix) throws SAXException, JDOMException, IOException, JexmaraldaException, TransformerException, ParserConfigurationException, UnsupportedEncodingException, XPathExpressionException, NoSuchAlgorithmException, ClassNotFoundException, FSMException, URISyntaxException {
190  Report stats = new Report();
191  CorpusData cdata = c.getComaData();
192  stats = function(cdata, fix);
193  for (CorpusData bdata : c.getBasicTranscriptionData()) {
194  stats.merge(function(bdata, fix));
195  }
196  return stats;
197  }
198 }
void setParameter(String parameterName, Object parameterValue)
void addMissing(String statId, String description)
Definition: Report.java:199
void addNote(String statId, String description)
Definition: Report.java:245
void merge(Report sr)
Definition: Report.java:73
String readExternalResourceAsString(String path2resource)
Definition: CorpusIO.java:201
void addCritical(String description)
Definition: Report.java:104
static String InputStream2String(InputStream is)
void addWarning(String statId, String description)
Definition: Report.java:164
Collection< Class<?extends CorpusData > > getIsUsableFor()
static org.jdom.Document String2JdomDocument(String stringRespresentingDocument)
void addException(Throwable e, String description)
Definition: Report.java:287