corpus-services  1.0
GenerateAnnotationPanel.java
Go to the documentation of this file.
1 package de.uni_hamburg.corpora.validation;
2 
9 import java.io.File;
10 import java.io.IOException;
11 import java.net.URI;
12 import java.net.URISyntaxException;
13 import java.util.ArrayList;
14 import java.util.Collection;
15 import java.util.HashMap;
16 import java.util.List;
17 import java.util.Map;
18 import javax.xml.parsers.DocumentBuilder;
19 import javax.xml.parsers.DocumentBuilderFactory;
20 import javax.xml.parsers.ParserConfigurationException;
21 import javax.xml.transform.Transformer;
22 import javax.xml.transform.TransformerConfigurationException;
23 import javax.xml.transform.TransformerException;
24 import javax.xml.transform.TransformerFactory;
25 import javax.xml.transform.dom.DOMSource;
26 import javax.xml.transform.stream.StreamResult;
27 import javax.xml.xpath.XPathExpressionException;
28 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
29 import org.jdom.JDOMException;
30 import org.w3c.dom.Attr;
31 import org.w3c.dom.Document;
32 import org.w3c.dom.Element;
33 import org.w3c.dom.NodeList;
34 import org.xml.sax.SAXException;
35 
40 public class GenerateAnnotationPanel extends Checker implements CorpusFunction {
41 
42  String genLoc = "";
43  static Map<String, Collection<String>> annotationsInExbs = new HashMap<String, Collection<String>>(); // list for holding annotations in exbs
44  boolean generateDoc = true; // flag for whether the file created or not
45  int iterateExbs = 0;
46 
48  //fixing not available
49  super(false);
50  }
51 
55  public Report generateAnnotation(CorpusData cd) throws ParserConfigurationException, TransformerConfigurationException, TransformerException {
56  Report stats = new Report();
57  DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
58  DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
59  Document doc = docBuilder.newDocument();
60  Element rootElement;
61  rootElement = doc.createElement("annotation-specification");
62  doc.appendChild(rootElement);
63  for (String key : annotationsInExbs.keySet()) {
64  if (!key.equals("en") && !key.equals("de") && !key.equals("ita") && !key.equals("fe")
65  && !key.isEmpty() && annotationsInExbs.get(key).size() <= 60) {
66  Element annotationSet = doc.createElement("annotation-set");
67  rootElement.appendChild(annotationSet);
68  Attr attr = doc.createAttribute("exmaralda-tier-category");
69  attr.setValue(key);
70  annotationSet.setAttributeNode(attr);
71  Element category = doc.createElement("category");
72  category.setAttribute("name", key + "-tags");
73  annotationSet.appendChild(category);
74  Element higherTag = doc.createElement("tag");
75  higherTag.setAttribute("name", key);
76  category.appendChild(higherTag);
77  Element description = doc.createElement("description");
78  category.appendChild(description);
79  List<String> sortedTags = (List<String>) annotationsInExbs.get(key);
80  java.util.Collections.sort(sortedTags, String.CASE_INSENSITIVE_ORDER);
81  annotationsInExbs.replace(key, sortedTags);
82  for (String tag : annotationsInExbs.get(key)) {
83  if (!tag.isEmpty()) {
84  Element lowerCategory = doc.createElement("category");
85  lowerCategory.setAttribute("name", tag);
86  Element lowerTag = doc.createElement("tag");
87  lowerTag.setAttribute("name", tag);
88  lowerCategory.appendChild(lowerTag);
89  Element lowerDescription = doc.createElement("description");
90  lowerCategory.appendChild(lowerDescription);
91  stats.addCorrect(function, cd,
92  "Annotation added to the file annotation panel: "
93  + tag);
94  category.appendChild(lowerCategory);
95  }
96  }
97  }
98  }
99 
100  TransformerFactory transformerFactory = TransformerFactory.newInstance();
101  Transformer transformer = transformerFactory.newTransformer();
102  DOMSource source = new DOMSource(doc);
103  File f = new File(new File(cd.getURL().getFile()).getParentFile() + "\\AnnotationSpecFromExbs.xml");
104  URI u = f.toURI();
105  StreamResult result = new StreamResult(new File(u));
106  transformer.transform(source, result);
107  return stats;
108  }
109 
113  @Override
114  public Report function(CorpusData cd, Boolean fix)
115  throws SAXException, IOException, ParserConfigurationException, TransformerConfigurationException, TransformerException, XPathExpressionException {
116  DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
117  DocumentBuilder db = dbf.newDocumentBuilder();
118  Document doc = db.parse(TypeConverter.String2InputStream(cd.toSaveableString())); // get the file as a document
119  Report stats = new Report();
120  //TODO!!
121  if (cd.getURL().getFile().endsWith(".exb")) {
122  NodeList tiers = doc.getElementsByTagName("tier"); // get all tiers of the transcript
123  for (int i = 0; i < tiers.getLength(); i++) { // loop for dealing with each tier
124  Element tier = (Element) tiers.item(i);
125  String category = tier.getAttribute("category"); // get category
126  String type = tier.getAttribute("type"); // get type
127  // check if it is an annotation tier category that is already in the map
128  if (annotationsInExbs.containsKey(category) && type.equals("a")) {
129  Collection<String> tags = annotationsInExbs.get(category);
130  NodeList events = tier.getElementsByTagName("event");
131  for (int j = 0; j < events.getLength(); j++) {
132  Element event = (Element) events.item(j);
133  String tag = event.getTextContent();
134  // check and fix irregularities (e.g. if there is a space at the end) in the tags
135  if (tag.endsWith(" ")) {
136  System.out.println("Exb file " + cd.getURL().getFile().substring(cd.getURL().getFile().lastIndexOf("/") + 1) + " is containing a tag ("
137  + tag + ") in its tier " + tier.getAttribute("display-name") + " with an extra space in the end!");
138  stats.addWarning(function, cd, "Exb file is containing a tag ("
139  + tag + ") in its tier " + tier.getAttribute("display-name") + " with an extra space in the end!");
140  exmaError.addError("generate-annotation-panel", cd.getURL().getFile(), tier.getAttribute("id"), event.getAttribute("start"), false,
141  "Exb file " + cd.getURL().getFile().substring(cd.getURL().getFile().lastIndexOf("/") + 1) + " is containing a tag ("
142  + tag + ") in its tier " + tier.getAttribute("display-name") + " with an extra space in the end!");
143  //tag = tag.substring(0, tag.length() - 1);
144  }
145  if (!tags.contains(tag)) {
146  tags.add(tag);
147  }
148  }
149  //annotationsInExbs.remove(category);
150  annotationsInExbs.put(category, tags); // add annotations to the map
151  } // check if it is an annotation tier category that is not in the map
152  else if (!annotationsInExbs.containsKey(category) && type.equals("a")) {
153  Collection<String> tags = new ArrayList<String>();
154  NodeList events = tier.getElementsByTagName("event");
155  for (int j = 0; j < events.getLength(); j++) {
156  Element event = (Element) events.item(j);
157  String tag = event.getTextContent();
158  // check and fix irregularities (e.g. if there is a space at the end) in the tags
159  if (tag.endsWith(" ")) {
160  System.out.println("Exb file " + cd.getURL().getFile().substring(cd.getURL().getFile().lastIndexOf("/") + 1) + " is containing a tag ("
161  + tag + ") in its tier " + tier.getAttribute("display-name") + " with an extra space in the end!");
162  stats.addWarning(function, cd, "Exb file is containing a tag ("
163  + tag + ") in its tier " + tier.getAttribute("display-name") + " with an extra space in the end!");
164  exmaError.addError("generate-annotation-panel", cd.getURL().getFile(), tier.getAttribute("id"), event.getAttribute("start"), false,
165  "Exb file " + cd.getURL().getFile().substring(cd.getURL().getFile().lastIndexOf("/") + 1) + " is containing a tag ("
166  + tag + ") in its tier " + tier.getAttribute("display-name") + " with an extra space in the end!");
167  tag = tag.substring(0, tag.length() - 1);
168  }
169  if (!tags.contains(tag)) {
170  tags.add(tag);
171  }
172  }
173  annotationsInExbs.put(category, tags); // add annotations to the map
174  }
175  }
176  }else {
177  stats = generateAnnotation(cd); // call the necessary method to create the annotation panel
178  }
179  return stats;
180  }
181 
182 
188  @Override
189  public Collection<Class<? extends CorpusData>> getIsUsableFor() {
190  try {
191  Class cl = Class.forName("de.uni_hamburg.corpora.BasicTranscriptionData");
192  Class clSecond = Class.forName("de.uni_hamburg.corpora.ComaData");
193  IsUsableFor.add(cl);
194  IsUsableFor.add(clSecond);
195  } catch (ClassNotFoundException ex) {
196  report.addException(ex, "Usable class not found.");
197  }
198  return IsUsableFor;
199  }
200 
204  @Override
205  public String getDescription() {
206  String description = "This class generates an annotation specification panel"
207  + " from the basic transcription files (exb).";
208  return description;
209  }
210 
211  @Override
212  public Report function(Corpus c, Boolean fix) throws SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException {
213  Report stats = new Report();
214  for (CorpusData cdata : c.getBasicTranscriptionData()) {
215  stats.merge(function(cdata, fix));
216  }
217  for (CorpusData adata : c.getAnnotationspecification()) {
218  stats.merge(function(adata, fix));
219  }
220  return stats;
221  }
222 
223 
224 }
void merge(Report sr)
Definition: Report.java:73
void addWarning(String statId, String description)
Definition: Report.java:164
Collection< Class<?extends CorpusData > > getIsUsableFor()
void addCorrect(String statId, String description)
Definition: Report.java:217
static InputStream String2InputStream(String s)
void addException(Throwable e, String description)
Definition: Report.java:287