10 package de.uni_hamburg.corpora.validation;
19 import java.io.IOException;
20 import java.net.URISyntaxException;
21 import java.util.List;
22 import java.util.ArrayList;
23 import java.util.Collection;
24 import javax.xml.parsers.ParserConfigurationException;
25 import javax.xml.transform.TransformerException;
26 import javax.xml.xpath.XPathExpressionException;
27 import org.xml.sax.SAXException;
28 import org.w3c.dom.Document;
29 import org.w3c.dom.Element;
30 import org.w3c.dom.Node;
31 import org.w3c.dom.NodeList;
32 import org.w3c.dom.Text;
34 import org.exmaralda.partitureditor.jexmaralda.BasicTranscription;
35 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
36 import org.jdom.JDOMException;
38 import org.languagetool.rules.RuleMatch;
39 import org.languagetool.JLanguageTool;
40 import org.languagetool.language.GermanyGerman;
49 static String filename;
50 BasicTranscription bt;
53 List<String> conventions =
new ArrayList<String>();
54 List<String> problems =
new ArrayList<String>();
55 String tierToCheck =
"fg";
56 String language =
"de";
57 JLanguageTool langTool;
73 throws SAXException, IOException, ParserConfigurationException, JexmaraldaException {
76 if (language.equals(
"de")) {
77 langTool =
new JLanguageTool(
new GermanyGerman());
78 System.out.println(
"Language set to German");
91 report.
addCritical(
function, cd,
"Missing languagetool resource for language " 96 NodeList tiers = doc.getElementsByTagName(
"tier");
97 List<RuleMatch> matches =
new ArrayList<RuleMatch>();
99 for (
int k = 0; k < tiers.getLength(); k++) {
100 Element tier = (Element) tiers.item(k);
101 if (!tier.getAttribute(
"category").equals(tierToCheck)) {
104 NodeList events = tier.getElementsByTagName(
"event");
105 for (
int i = 0; i < events.getLength(); i++) {
106 Element
event = (Element) events.item(i);
107 NodeList eventTexts =
event.getChildNodes();
108 for (
int j = 0; j < eventTexts.getLength(); j++) {
109 Node maybeText = eventTexts.item(j);
110 if (maybeText.getNodeType() != Node.TEXT_NODE) {
111 if (maybeText.getNodeType() == Node.ELEMENT_NODE
112 && maybeText.getNodeName().equals(
"ud-information")) {
116 System.out.println(
"This is not a text node: " 120 Text eventText = (Text) maybeText;
121 String text = eventText.getWholeText();
122 matches = langTool.check(text);
123 for (RuleMatch match : matches) {
124 String message =
"Potential error at characters " 125 + match.getFromPos() +
"-" + match.getToPos() +
": " 126 + match.getMessage() +
": \"" 127 + text.substring(match.getFromPos(),
128 match.getToPos()) +
"\" " 129 +
"Suggested correction(s): " 130 + match.getSuggestedReplacements();
141 exmaError.addError(
function, cd.getURL().getFile(), tier.getAttribute(
"id"),
event.getAttribute(
"start"),
false, message);
143 if (!matches.isEmpty()) {
151 stats.
addCorrect(
function, cd,
"No spelling errors found.");
165 Class cl = Class.forName(
"de.uni_hamburg.corpora.BasicTranscriptionData");
167 }
catch (ClassNotFoundException ex) {
168 report.
addException(ex,
"unknown class not found error");
175 String description =
"This class takes a CorpusDataObject that is an Exb, " 176 +
"checks if there are spell or grammar errors in German, English or Russian using LanguageTool and" 177 +
" returns the errors in the Report and in the ExmaErrors.";
190 public Report function(
Corpus c, Boolean fix)
throws SAXException, IOException, ParserConfigurationException, URISyntaxException, JDOMException, TransformerException, XPathExpressionException, JexmaraldaException {
192 for (
CorpusData cdata : c.getBasicTranscriptionData()) {
193 stats.
merge(
function(cdata, fix));
static ExmaErrorList exmaError
void addCritical(String description)
static org.w3c.dom.Document JdomDocument2W3cDocument(org.jdom.Document jdomDoc)
void addWarning(String statId, String description)
void addCorrect(String statId, String description)
void addException(Throwable e, String description)