corpus-services  1.0
NgTierCheckerWithAnnotation.java
Go to the documentation of this file.
1 package de.uni_hamburg.corpora.validation;
2 
8 import java.io.IOException;
9 import java.net.URISyntaxException;
10 import java.security.NoSuchAlgorithmException;
11 import java.util.ArrayList;
12 import java.util.Collection;
13 import java.util.HashMap;
14 import java.util.Map;
15 import javax.xml.parsers.DocumentBuilder;
16 import javax.xml.parsers.DocumentBuilderFactory;
17 import javax.xml.parsers.ParserConfigurationException;
18 import org.w3c.dom.Document;
19 import org.w3c.dom.Element;
20 import org.w3c.dom.NodeList;
21 import org.xml.sax.SAXException;
22 import javax.xml.transform.TransformerException;
23 import javax.xml.xpath.XPathExpressionException;
24 import org.exmaralda.partitureditor.fsm.FSMException;
25 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
26 import org.jdom.JDOMException;
27 
33 public class NgTierCheckerWithAnnotation extends Checker implements CorpusFunction {
34 
35  String comaLoc = "";
36  HashMap<String, Collection<String>> annotationsInComa; // list for holding annotations of coma file
37  ArrayList<String> annotations; // list for holding annotations of annotation spec file
38  int counter = 0; // counter for controlling whether we are on coma or annotation spec file
39 
41  super(false);
42  }
43 
48  public void addAnnotations(CorpusData cd)
49  throws SAXException, IOException, ParserConfigurationException, URISyntaxException, TransformerException, XPathExpressionException {
50  DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
51  DocumentBuilder db = dbf.newDocumentBuilder();
52  Document doc = db.parse(TypeConverter.String2InputStream(cd.toSaveableString())); // get the file as a document
53  if (cd.getURL().toString().endsWith(".coma")) {
54  NodeList communications = doc.getElementsByTagName("Communication"); // divide by Communication tags
55  annotationsInComa = new HashMap<String, Collection<String>>();
56  for (int i = 0; i < communications.getLength(); i++) { //iterate through communications
57  Element communication = (Element) communications.item(i);
58  String name = communication.getAttribute("Name"); //get the name of the file that has the description
59  NodeList descriptions = communication.getElementsByTagName("Description"); // get descriptions of current communication
60  for (int j = 0; j < descriptions.getLength(); j++) { // iterate through descriptions
61  Element description = (Element) descriptions.item(j);
62  NodeList keys = description.getElementsByTagName("Key"); // get keys of current description
63  for (int k = 0; k < keys.getLength(); k++) { // look for the key with "annotation" attribute
64  Element key = (Element) keys.item(k);
65  if (key.getAttribute("Name").contains("Annotation")) {
66  int spaceIndex = key.getAttribute("Name").lastIndexOf(' ');
67  if (annotationsInComa.containsKey(name)) {
68  if (!annotationsInComa.get(name).contains(key.getAttribute("Name").substring(spaceIndex + 1))) {
69  Collection<String> c = annotationsInComa.get(name);
70  c.add(key.getAttribute("Name").substring(spaceIndex + 1));
71  annotationsInComa.put(name, c);
72  }
73  } else {
74  Collection<String> c = new ArrayList<String>();
75  c.add(key.getAttribute("Name").substring(spaceIndex + 1));
76  annotationsInComa.put(name, c);
77  }
78  }
79  }
80  }
81  }
82  } else {
83  annotations = new ArrayList<String>();
84  NodeList annotationSets = doc.getElementsByTagName("annotation-set"); // divide by tags
85  for (int i = 0; i < annotationSets.getLength(); i++) { //iterate through tags
86  Element annotationSet = (Element) annotationSets.item(i);
87  annotations.add(annotationSet.getAttribute("exmaralda-tier-category"));
88  }
89  }
90  }
91 
97  public Report check(CorpusData cd) {
98  Report stats = new Report();
99  try {
100  if (counter < 1) { //first add annotations from coma or annotation spec file depending on which is read first
101  addAnnotations(cd);
102  counter++;
103  } else { //then add the second annotations and check them against the first ones
104  addAnnotations(cd);
105  stats = exceptionalCheck(cd);
106  }
107  } catch (ParserConfigurationException pce) {
108  stats.addException(pce, comaLoc + ": Unknown parsing error");
109  } catch (SAXException saxe) {
110  stats.addException(saxe, comaLoc + ": Unknown parsing error");
111  } catch (IOException ioe) {
112  stats.addException(ioe, comaLoc + ": Unknown file reading error");
113  } catch (URISyntaxException ex) {
114  stats.addException(ex, comaLoc + ": Unknown file reading error");
115  } catch (TransformerException ex) {
116  stats.addException(ex, comaLoc + ": Unknown file reading error");
117  } catch (XPathExpressionException ex) {
118  stats.addException(ex, comaLoc + ": Unknown file reading error");
119  }
120  return stats;
121  }
122 
128  private Report exceptionalCheck(CorpusData cd)
129  throws SAXException, IOException, ParserConfigurationException, URISyntaxException {
130  Report stats = new Report(); //create a new report
131  if (annotationsInComa != null){
132  for (Map.Entry<String, Collection<String>> entry : annotationsInComa.entrySet()) {
133  String name = entry.getKey();
134  Collection<String> annotTypes = entry.getValue();
135  for (String annotType : annotTypes) { // iterate through annotations in the coma file
136  if (!annotations.contains(annotType)) { // check if annotations not present in annotation spec file
137  System.out.println("Coma file is containing annotation (" + annotType
138  + ") for " + name + " not specified by annotation spec file!");
139  stats.addWarning("tier-checker-with-annotation", "annotation error: annotation ("
140  + annotType + ") for " + name + " not specified in the annotation spec file!");
141  int index = cd.getURL().getFile().lastIndexOf("/");
142  String nameExtension = name.substring(name.lastIndexOf('_'));
143  String filePath;
144  switch (nameExtension) {
145  case "_conv":
146  filePath = cd.getURL().getFile().substring(0, index) + "/conversation/" + name + "/" + name + ".exb";
147  break;
148  case "_nar":
149  filePath = cd.getURL().getFile().substring(0, index) + "/narrative/" + name + "/" + name + ".exb";
150  break;
151  case "_song":
152  filePath = cd.getURL().getFile().substring(0, index) + "/songs/" + name + "/" + name + ".exb";
153  break;
154  default:
155  filePath = cd.getURL().getFile().substring(0, index) + "/" + nameExtension.substring(1) + "/" + name + "/" + name + ".exb";
156  }
157  }
158  }
159  }
160  }
161 
162  return stats; // return the report with warnings
163  }
164 
170  @Override
171  public Collection<Class<? extends CorpusData>> getIsUsableFor() {
172  try {
173  Class cl = Class.forName("de.uni_hamburg.corpora.ComaData");
174  Class clSecond = Class.forName("de.uni_hamburg.corpora.AnnotationSpecification");
175  IsUsableFor.add(cl);
176  IsUsableFor.add(clSecond);
177  } catch (ClassNotFoundException ex) {
178  report.addException(ex, " usable class not found");
179  }
180  return IsUsableFor;
181  }
182 
186  @Override
187  public String getDescription() {
188  String description = "This class checks out if all annotations for Nganasan"
189  + " Corpus are from the annotation specification file and there are"
190  + " no annotations in the coma file not present in the annotation"
191  + " specification file.";
192  return description;
193  }
194 
195  @Override
196  public Report function(CorpusData cd, Boolean fix) throws NoSuchAlgorithmException, ClassNotFoundException, FSMException, URISyntaxException, SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException, JDOMException {
197  throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
198  }
199 
200  @Override
201  public Report function(Corpus c, Boolean fix) throws NoSuchAlgorithmException, ClassNotFoundException, FSMException, URISyntaxException, SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException, JDOMException {
202  throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
203  }
204 
205 }
void addWarning(String statId, String description)
Definition: Report.java:164
static InputStream String2InputStream(String s)
void addException(Throwable e, String description)
Definition: Report.java:287