9 package de.uni_hamburg.corpora.validation;
16 import java.io.IOException;
17 import java.net.URISyntaxException;
18 import java.util.ArrayList;
19 import java.util.Collection;
20 import java.util.List;
21 import java.util.regex.Matcher;
22 import java.util.regex.Pattern;
23 import javax.xml.parsers.ParserConfigurationException;
25 import javax.xml.transform.TransformerException;
26 import javax.xml.xpath.XPathExpressionException;
27 import org.apache.commons.cli.Option;
28 import org.apache.commons.cli.CommandLine;
29 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
30 import org.jdom.JDOMException;
31 import org.xml.sax.SAXException;
56 throws SAXException, IOException, ParserConfigurationException, URISyntaxException {
57 File f =
new File(cd.getURL().toString());
58 String filename = f.getName();
59 File fp = f.getParentFile().getParentFile();
60 String[] path =
new String[1];
61 path[0] = fp.getPath().substring(6);
63 List<Option> patternOptions =
new ArrayList<Option>();
64 patternOptions.add(
new Option(
"a",
"accept",
true,
"add an acceptable " 66 patternOptions.add(
new Option(
"d",
"disallow",
true,
"add an illegal " 72 if (cmd.hasOption(
"accept")) {
73 acceptable = Pattern.compile(cmd.getOptionValue(
"accept"));
75 acceptable = Pattern.compile(
"^[A-Za-z0-9_.-]*$");
77 if (cmd.hasOption(
"disallow")) {
78 unacceptable = Pattern.compile(cmd.getOptionValue(
"disallow"));
80 unacceptable = Pattern.compile(
"[ üäöÜÄÖ]");
83 System.out.println(
"Checking coma file against directory...");
87 Matcher matchAccepting = acceptable.matcher(filename);
88 boolean allesGut =
true;
89 if (!matchAccepting.matches()) {
91 filename +
" does not follow " 92 +
"filename conventions for HZSK corpora");
93 exmaError.addError(
function, cd.getURL().getFile(),
"",
"",
false,
"Error: " + filename +
" does not follow " 94 +
"filename conventions for HZSK corpora");
97 Matcher matchUnaccepting = unacceptable.matcher(filename);
98 if (matchUnaccepting.find()) {
100 filename +
" contains " 101 +
"characters that may break in HZSK repository");
102 exmaError.addError(
function, cd.getURL().getFile(),
"",
"",
false,
"Error: " + filename +
" contains " 103 +
"characters that may break in HZSK repository");
109 filename +
" is OK by HZSK standards.");
122 Class clThird = Class.forName(
"de.uni_hamburg.corpora.ComaData");
123 IsUsableFor.add(clThird);
124 }
catch (ClassNotFoundException ex) {
135 String description =
"This class checks if all file names linked in the coma file" 136 +
" to be deposited in HZSK repository; checks if there is a file" 137 +
" which is not named according to coma file.";
142 public Report function(
Corpus c, Boolean fix)
throws SAXException, IOException, ParserConfigurationException, URISyntaxException {
144 cd = c.getComaData();
145 stats =
function(cd, fix);
CommandLine handleCommandLine(String[] args, List< Option > extraOptions)
Collection< Class<?extends CorpusData > > getIsUsableFor()
static ExmaErrorList exmaError
void addWarning(String statId, String description)
void addCorrect(String statId, String description)
void addException(Throwable e, String description)