11 package de.uni_hamburg.corpora.validation;
14 import java.io.IOException;
16 import java.util.List;
17 import java.util.ArrayList;
18 import java.util.regex.Pattern;
19 import java.util.regex.Matcher;
20 import javax.xml.parsers.DocumentBuilder;
21 import javax.xml.parsers.DocumentBuilderFactory;
22 import javax.xml.parsers.ParserConfigurationException;
24 import org.apache.commons.cli.Option;
25 import org.apache.commons.cli.CommandLine;
26 import org.xml.sax.SAXException;
27 import org.w3c.dom.Document;
28 import org.w3c.dom.Element;
29 import org.w3c.dom.Node;
30 import org.w3c.dom.NodeList;
31 import org.w3c.dom.Text;
33 import org.exmaralda.partitureditor.jexmaralda.BasicTranscription;
34 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
41 BasicTranscription bt;
43 List<String> conventions =
new ArrayList<String>();
44 List<String> problems =
new ArrayList<String>();
46 final String
function =
"exb-patterns";
48 private void tryLoadBasicTranscription(String filename)
49 throws SAXException, JexmaraldaException {
51 bt =
new BasicTranscription(filename);
59 }
catch (ParserConfigurationException pce) {
61 }
catch (SAXException saxe) {
63 }
catch (IOException ioe) {
70 throws SAXException, IOException, ParserConfigurationException {
72 List<Pattern> correctPatterns =
new ArrayList<Pattern>();
73 for (String convention : conventions) {
74 correctPatterns.add(Pattern.compile(convention));
76 List<Pattern> errorPatterns =
new ArrayList<Pattern>();
77 for (String problem : problems) {
78 errorPatterns.add(Pattern.compile(problem));
80 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
81 DocumentBuilder db = dbf.newDocumentBuilder();
82 Document doc = db.parse(f);
83 NodeList events = doc.getElementsByTagName(
"event");
85 for (
int i = 0; i < events.getLength(); i++) {
86 Element
event = (Element)events.item(i);
87 NodeList eventTexts =
event.getChildNodes();
88 for (
int j = 0; j < eventTexts.getLength(); j++) {
89 Node maybeText = eventTexts.item(j);
90 if (maybeText.getNodeType() != Node.TEXT_NODE) {
91 if (maybeText.getNodeType() == Node.ELEMENT_NODE &&
92 maybeText.getNodeName().equals(
"ud-information")) {
96 System.err.println(
"This is not a text node: " +
100 Text eventText = (Text) maybeText;
101 String text = eventText.getWholeText();
103 for (Pattern pattern : correctPatterns) {
104 Matcher matcher = pattern.matcher(text);
105 if (!matcher.matches()) {
107 "Text: " + text +
" does not fit to the " +
108 "conventions given.",
"Expression was: " +
113 for (Pattern pattern : errorPatterns) {
114 Matcher matcher = pattern.matcher(text);
115 if (matcher.matches()) {
117 "Text: " + text +
" does not fit to the " +
118 "conventions given.",
"Expression was: " +
119 errorPatterns.get(k));
129 "Checks Exmaralda .exb file annotations for conventions using " +
130 "patterns",
"If input is a directory, performs recursive check " 131 +
"from that directory, otherwise checks input file\n" +
132 "Patterns are given as regular expressions to match against " +
133 "(regular expression is compiled with java.util.regex)");
135 List<Option> patternOptions =
new ArrayList<Option>();
136 patternOptions.add(
new Option(
"a",
"accept",
true,
"add an acceptable " 138 patternOptions.add(
new Option(
"d",
"disallow",
true,
"add an illegal " 144 if (!cmd.hasOption(
"accept") && !cmd.hasOption(
"disallow")) {
145 System.err.println(
"Nothing to accept or disallow, " +
149 if (cmd.hasOption(
"accept")) {
150 conventions.add(cmd.getOptionValue(
"accept"));
152 if (cmd.hasOption(
"disallow")) {
153 problems.add(cmd.getOptionValue(
"disallow"));
156 System.out.println(
"Checking exb files for unconventional " +
162 System.out.println(
" * " + f.getName());
169 public static void main(String[] args) {
CommandLine handleCommandLine(String[] args, List< Option > extraOptions)
Report doMain(String[] args)
Collection< File > getInputFiles()
void addCritical(String description)
Report exceptionalCheck(File f)
void addException(Throwable e, String description)
static void main(String[] args)