corpus-services  1.0
ComaTiersDescriptionAnnotationPanelChecker.java
Go to the documentation of this file.
1 package de.uni_hamburg.corpora.validation;
2 
9 import java.io.IOException;
10 import java.net.URISyntaxException;
11 import java.util.ArrayList;
12 import java.util.Collection;
13 import java.util.HashMap;
14 import java.util.Map;
15 import javax.xml.parsers.DocumentBuilder;
16 import javax.xml.parsers.DocumentBuilderFactory;
17 import javax.xml.parsers.ParserConfigurationException;
18 import javax.xml.transform.TransformerException;
19 import javax.xml.xpath.XPathExpressionException;
20 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
21 import org.w3c.dom.Document;
22 import org.w3c.dom.Element;
23 import org.w3c.dom.NodeList;
24 import org.xml.sax.SAXException;
25 
32 
33  String comaLoc = "";
34  HashMap<String, Collection<String>> annotationsInComa; // list for holding annotations of coma file
35  ArrayList<String> annotations; // list for holding annotations of annotation spec file
36  int counter = 0; // counter for controlling whether we are on coma or annotation spec file
37 
39  //no fixing available
40  super(false);
41  }
42 
48  public Report function(CorpusData cd, Boolean fix)
49  throws SAXException, IOException, ParserConfigurationException, URISyntaxException {
50  Report stats = new Report(); //create a new report
51  if (annotationsInComa != null) {
52  for (Map.Entry<String, Collection<String>> entry : annotationsInComa.entrySet()) {
53  String name = entry.getKey();
54  Collection<String> annotTypes = entry.getValue();
55  for (String annotType : annotTypes) { // iterate through annotations in the coma file
56  if (!annotations.contains(annotType)) { // check if annotations not present in annotation spec file
57  System.out.println("Coma file is containing annotation (" + annotType
58  + ") for " + name + " not specified by annotation spec file!");
59  stats.addWarning(function, cd, "annotation error: annotation in annotation panel ("
60  + annotType + ") in communication " + name + " not specified!");
61  int index = cd.getURL().getFile().lastIndexOf("/");
62  String filePath = cd.getURL().getFile().substring(0, index) + "/" + name + "/" + name + ".exb";
63  exmaError.addError("tier-checker-with-annotation", filePath, "", "", false, "annotation error: annotation in annotation panel("
64  + annotType + ") for communication " + name + " not specified in the annotation specification file!");
65  } else {
66  stats.addCorrect(function, cd, "annotation in annotation panel ("
67  + annotType + ") in communication " + name + " was found.");
68  }
69  }
70  }
71  } else {
72  stats.addNote(function, cd, "No annotations found in coma.");
73  }
74 
75  return stats; // return the report with warnings
76  }
77 
78 
83  public void addAnnotations(CorpusData cd)
84  throws SAXException, IOException, ParserConfigurationException, URISyntaxException, TransformerException, XPathExpressionException {
85  DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
86  DocumentBuilder db = dbf.newDocumentBuilder();
87  Document doc = db.parse(TypeConverter.String2InputStream(cd.toSaveableString())); // get the file as a document
88  if (cd.getURL().toString().endsWith(".coma")) {
89  NodeList communications = doc.getElementsByTagName("Communication"); // divide by Communication tags
90  annotationsInComa = new HashMap<String, Collection<String>>();
91  for (int i = 0; i < communications.getLength(); i++) { //iterate through communications
92  Element communication = (Element) communications.item(i);
93  NodeList transcriptions = communication.getElementsByTagName("Transcription"); // get transcriptions of current communication
94  for (int j = 0; j < transcriptions.getLength(); j++) { // iterate through transcriptions
95  Element transcription = (Element) transcriptions.item(j);
96  //Element name = (Element) transcription.getElementsByTagName("Name").item(0); //get the name of the file that has the transcription
97  String name = ((Element) transcription.getElementsByTagName("Name").item(0)).getTextContent(); //get the name of the file that has the transcription
98  NodeList keys = transcription.getElementsByTagName("Key"); // get keys of current transcription
99  boolean segmented = false; // flag for distinguishing basic file from segmented file
100  for (int k = 0; k < keys.getLength(); k++) { // look for the key with "segmented" attribute
101  Element key = (Element) keys.item(k);
102  if (key.getAttribute("Name").equals("segmented")) {
103  String seg = key.getTextContent();
104  if (seg.equals("true")) // check if transcription is segmented or not
105  {
106  segmented = true; // if segmented transcription then turn the flag true
107  }
108  break;
109  }
110  }
111  if (segmented) { // get the names of the segmentation algorithms in the coma file
112  for (int k = 0; k < keys.getLength(); k++) { // look for the keys with algorithm
113  Element key = (Element) keys.item(k);
114  if (key.getAttribute("Name").contains("Annotation type:")) {
115  int colonIndex = key.getAttribute("Name").lastIndexOf(':');
116  if (annotationsInComa.containsKey(name)) {
117  if (!annotationsInComa.get(name).contains(key.getAttribute("Name").substring(colonIndex + 2))) {
118  Collection<String> c = annotationsInComa.get(name);
119  c.add(key.getAttribute("Name").substring(colonIndex + 2));
120  annotationsInComa.put(name, c);
121  }
122  } else {
123  Collection<String> c = new ArrayList<String>();
124  c.add(key.getAttribute("Name").substring(colonIndex + 2));
125  annotationsInComa.put(name, c);
126  }
127  }
128  }
129  }
130  }
131  }
132  } else {
133  annotations = new ArrayList<String>();
134  NodeList tags = doc.getElementsByTagName("tag"); // divide by tags
135  for (int i = 0; i < tags.getLength(); i++) { //iterate through tags
136  Element tag = (Element) tags.item(i);
137  annotations.add(tag.getAttribute("name"));
138  }
139  }
140  }
141 
147  @Override
148  public Collection<Class<? extends CorpusData>> getIsUsableFor() {
149  try {
150  Class cl = Class.forName("de.uni_hamburg.corpora.ComaData");
151  Class clSecond = Class.forName("de.uni_hamburg.corpora.AnnotationSpecification");
152  IsUsableFor.add(cl);
153  IsUsableFor.add(clSecond);
154  } catch (ClassNotFoundException ex) {
155  report.addException(ex, " usable class not found");
156  }
157  return IsUsableFor;
158  }
159 
164  @Override
165  public String getDescription() {
166  String description = "This class checks out that all annotations are from"
167  + " the annotation specification file and that there are no annotations"
168  + " in the coma file not existing in the annotation specification file.";
169  return description;
170  }
171 
172  @Override
173  public Report function(Corpus c, Boolean fix) throws SAXException, IOException, ParserConfigurationException, URISyntaxException {
174  Report stats;
175  cd = c.getComaData();
176  stats = function(cd, fix);
177  return stats;
178  }
179 }
void addNote(String statId, String description)
Definition: Report.java:245
void addWarning(String statId, String description)
Definition: Report.java:164
void addCorrect(String statId, String description)
Definition: Report.java:217
static InputStream String2InputStream(String s)
void addException(Throwable e, String description)
Definition: Report.java:287