corpus-services  1.0
ComaFilenameChecker.java
Go to the documentation of this file.
1 
9 package de.uni_hamburg.corpora.validation;
10 
15 import java.io.File;
16 import java.io.IOException;
17 import java.net.URISyntaxException;
18 import java.util.ArrayList;
19 import java.util.Collection;
20 import java.util.List;
21 import java.util.regex.Matcher;
22 import java.util.regex.Pattern;
23 import javax.xml.parsers.ParserConfigurationException;
25 import javax.xml.transform.TransformerException;
26 import javax.xml.xpath.XPathExpressionException;
27 import org.apache.commons.cli.Option;
28 import org.apache.commons.cli.CommandLine;
29 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
30 import org.jdom.JDOMException;
31 import org.xml.sax.SAXException;
32 
37 public class ComaFilenameChecker extends Checker implements CorpusFunction {
38 
39  Pattern acceptable;
40  Pattern unacceptable;
41  String fileLoc = "";
42  ValidatorSettings settings;
43 
45  //fixing is not possible
46  super(false);
47  }
48 
54  @Override
55  public Report function(CorpusData cd, Boolean fix)
56  throws SAXException, IOException, ParserConfigurationException, URISyntaxException {
57  File f = new File(cd.getURL().toString());
58  String filename = f.getName();
59  File fp = f.getParentFile().getParentFile();
60  String[] path = new String[1];
61  path[0] = fp.getPath().substring(6);
62 
63  List<Option> patternOptions = new ArrayList<Option>();
64  patternOptions.add(new Option("a", "accept", true, "add an acceptable "
65  + "pattern"));
66  patternOptions.add(new Option("d", "disallow", true, "add an illegal "
67  + "pattern"));
68  CommandLine cmd = settings.handleCommandLine(path, patternOptions);
69  if (cmd == null) {
70  System.exit(0);
71  }
72  if (cmd.hasOption("accept")) {
73  acceptable = Pattern.compile(cmd.getOptionValue("accept"));
74  } else {
75  acceptable = Pattern.compile("^[A-Za-z0-9_.-]*$");
76  }
77  if (cmd.hasOption("disallow")) {
78  unacceptable = Pattern.compile(cmd.getOptionValue("disallow"));
79  } else {
80  unacceptable = Pattern.compile("[ üäöÜÄÖ]");
81  }
82  if (settings.isVerbose()) {
83  System.out.println("Checking coma file against directory...");
84  }
85  Report stats = new Report();
86 
87  Matcher matchAccepting = acceptable.matcher(filename);
88  boolean allesGut = true;
89  if (!matchAccepting.matches()) {
90  stats.addWarning(function,
91  filename + " does not follow "
92  + "filename conventions for HZSK corpora");
93  exmaError.addError(function, cd.getURL().getFile(), "", "", false, "Error: " + filename + " does not follow "
94  + "filename conventions for HZSK corpora");
95  allesGut = false;
96  }
97  Matcher matchUnaccepting = unacceptable.matcher(filename);
98  if (matchUnaccepting.find()) {
99  stats.addWarning(function,
100  filename + " contains "
101  + "characters that may break in HZSK repository");
102  exmaError.addError(function, cd.getURL().getFile(), "", "", false, "Error: " + filename + " contains "
103  + "characters that may break in HZSK repository");
104  allesGut = false;
105  }
106 
107  if (allesGut) {
108  stats.addCorrect(function,
109  filename + " is OK by HZSK standards.");
110  }
111  return stats;
112  }
113 
119  @Override
120  public Collection<Class<? extends CorpusData>> getIsUsableFor() {
121  try {
122  Class clThird = Class.forName("de.uni_hamburg.corpora.ComaData");
123  IsUsableFor.add(clThird);
124  } catch (ClassNotFoundException ex) {
125  report.addException(ex, " usable class not found");
126  }
127  return IsUsableFor;
128  }
129 
133  @Override
134  public String getDescription() {
135  String description = "This class checks if all file names linked in the coma file"
136  + " to be deposited in HZSK repository; checks if there is a file"
137  + " which is not named according to coma file.";
138  return description;
139  }
140 
141  @Override
142  public Report function(Corpus c, Boolean fix) throws SAXException, IOException, ParserConfigurationException, URISyntaxException {
143  Report stats;
144  cd = c.getComaData();
145  stats = function(cd, fix);
146  return stats;
147  }
148 
149 }
CommandLine handleCommandLine(String[] args, List< Option > extraOptions)
Collection< Class<?extends CorpusData > > getIsUsableFor()
void addWarning(String statId, String description)
Definition: Report.java:164
void addCorrect(String statId, String description)
Definition: Report.java:217
void addException(Throwable e, String description)
Definition: Report.java:287