corpus-services  1.0
ExbCalculateAnnotatedTime.java
Go to the documentation of this file.
1 package de.uni_hamburg.corpora.validation;
2 
8 import java.io.IOException;
9 import java.util.Collection;
10 import java.util.HashMap;
11 import java.util.Set;
12 import javax.xml.parsers.DocumentBuilder;
13 import javax.xml.parsers.DocumentBuilderFactory;
14 import javax.xml.parsers.ParserConfigurationException;
15 import javax.xml.transform.TransformerException;
16 import javax.xml.xpath.XPathExpressionException;
17 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
18 import org.w3c.dom.Document;
19 import org.w3c.dom.Element;
20 import org.w3c.dom.NodeList;
21 import org.xml.sax.SAXException;
22 
29 public class ExbCalculateAnnotatedTime extends Checker implements CorpusFunction {
30 
31  //HashMap<String, HashMap<String, String>> eventMap; // hash map for holding events of annotation tiers
32  HashMap<String, HashMap<String, String>> tierMap; // all the annotation tiers of all the exb files of the corpus
33 
35  //has no fixing option
36  super(false);
37  }
38 
39  @Override
40  public Report function(Corpus c, Boolean fix) throws SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException {
41  Report stats = new Report();
42  for (CorpusData cdata : c.getBasicTranscriptionData()) {
43  stats.merge(function(cdata, fix));
44  }
45  return stats;
46  }
47 
53  public Report function(CorpusData cd, Boolean fix)
54  throws SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException {
55  Report stats = new Report(); //create a new report
56  DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
57  DocumentBuilder db = dbf.newDocumentBuilder();
58  Document doc = db.parse(TypeConverter.String2InputStream(cd.toSaveableString())); // get the file as a document
59  // get the name of the transcription
60  String transcriptName;
61  if (doc.getElementsByTagName("transcription-name").getLength() > 0) { // check if transcript name exists for the exb file
62  transcriptName = doc.getElementsByTagName("transcription-name").item(0).getTextContent(); // get transcript name
63  } else {
64  transcriptName = "No Name Transcript";
65  }
66  // add the title as a note to the report
67  stats.addNote("calculate-annotated-time", "Annotation Tiers of " + transcriptName);
68  HashMap<String, HashMap<String, String>> eventMap = new HashMap<>();
69  //initialise the hash map only for the first time when an exb file is encountered
70  if (tierMap == null) {
71  tierMap = new HashMap<>();
72  }
73  NodeList tiers = doc.getElementsByTagName("tier"); // get all tiers of the transcript
74  NodeList items = doc.getElementsByTagName("tli"); // get all timeline items of the transcript
75  HashMap<String, Float> timelineItems = getTimelineItems(items); // container for the tl items
76  HashMap<String, String> tierH = new HashMap<>();
77  for (int i = 0; i < tiers.getLength(); i++) { // loop for dealing with each tier
78  Element tier = (Element) tiers.item(i); // get one tier at a time
79  if (tier.getAttribute("type").equals("a") && !(tier.getAttribute("category").equals("en")
80  || tier.getAttribute("category").equals("de"))) { // handle annotation tiers exclusively
81  HashMap<String, String> eventH = new HashMap<>(); // hashmap for dealing with events
82  String tierDisplay = tier.getAttribute("display-name"); // get tier name
83  float tierDuration = 0; // time the tier duration
84  NodeList events = tier.getElementsByTagName("event"); // get all events for the tier
85  boolean notAnnotation = false; // in case the tier is yet not an annotation
86  for (int j = 0; j < events.getLength(); j++) { // handle each event
87  float eventDuration = 0; // time the event duration
88  Element event = (Element) events.item(j);
89  String eventLabel = event.getTextContent(); // acquire the content of the event
90  String eventStart = event.getAttribute("start"); // acquire the starting tl item for the event
91  String eventEnd = event.getAttribute("end"); // acquire the ending tl item for the event
92  if (eventLabel.length() > 20) { //if an event in the tier is suspiciously lengthy
93  notAnnotation = true;
94  break;
95  }
96  if (timelineItems.get(eventEnd) - timelineItems.get(eventStart) >= 0) { // make sure eventEnd is after the eventStart
97  eventDuration = timelineItems.get(eventEnd) - timelineItems.get(eventStart); // calculate the event duration
98  tierDuration += timelineItems.get(eventEnd) - timelineItems.get(eventStart); // add it up to the total tier duration
99  }
100  // sort the format out for putting it on the report
101  float secondsLeft = eventDuration % 60;
102  int minutes = (int) Math.floor(eventDuration / 60);
103  String MM = (String) (minutes < 10 ? "0" + Integer.toString(minutes) : Integer.toString(minutes));
104  String SS = (String) (secondsLeft < 10 ? "0" + Float.toString(secondsLeft) : Float.toString(secondsLeft));
105  if (SS.length() > 5) {
106  SS = SS.substring(0, 5);
107  }
108  if (eventH.containsKey(eventLabel)) { // in case the label has already been found in the tier
109  String durOfEvent = eventH.get(eventLabel);
110  int minute = Integer.parseInt(durOfEvent.substring(0, durOfEvent.indexOf(":")));
111  float second = Float.parseFloat(durOfEvent.substring(durOfEvent.indexOf(":") + 1));
112  float totalSecond = (secondsLeft + second) % 60;
113  if ((secondsLeft + second) / 60 >= 1.0) {
114  minute++;
115  }
116  int totalMin = minute + minutes;
117  String totalMM = (String) (totalMin < 10 ? "0" + Integer.toString(totalMin) : Integer.toString(totalMin));
118  String totalSS = (String) (totalSecond < 10 ? "0" + Float.toString(totalSecond) : Float.toString(totalSecond));
119  if (totalSS.length() > 5) {
120  totalSS = totalSS.substring(0, 5);
121  }
122  eventH.put(eventLabel, totalMM + ":" + totalSS);
123  } else {
124  eventH.put(eventLabel, MM + ":" + SS);
125  }
126  }
127  if (notAnnotation) {// if the tier is not an annotation
128  continue; // then do not save this tier or its events
129  }
130  // put the events for each tier in the hashmap so long as there is an event under that tier
131  if (!eventH.isEmpty()) {
132  eventMap.put(tierDisplay, eventH);
133  }
134  // formatting the duration of the annotation for the report
135  float secondsLeft = tierDuration % 60;
136  int minutes = (int) Math.floor(tierDuration / 60);
137  String MM = (String) (minutes < 10 ? "0" + Integer.toString(minutes) : Integer.toString(minutes));
138  String SS = (String) (secondsLeft < 10 ? "0" + Float.toString(secondsLeft) : Float.toString(secondsLeft));
139  if (SS.length() > 5) {
140  SS = SS.substring(0, 5);
141  }
142  tierH.put(tierDisplay, MM + ":" + SS); // add total duration of each tier into the hash map
143  stats.addNote(function, cd, tierDisplay + " " + MM + ":" + SS); // display it on the report
144  }
145  }
146  // show the annotation time for each label in every tier
147  stats.addNote(function, cd, "Labels per Tier");
148  Set perTier = eventMap.keySet();
149  for (Object per : perTier) {
150  String tierName = (String) per;
151  stats.addNote(function, cd, tierName);
152  HashMap map = new HashMap(eventMap.get(tierName));
153  Set perMap = map.keySet();
154  for (Object obj : perMap) {
155  String label = (String) obj;
156  stats.addNote(function, cd, label + " " + map.get(label));
157  System.out.println(label + " " + map.get(label));
158  }
159  }
160  tierMap.put(transcriptName, tierH); // finally add the annotations of the transcript
161  return stats;
162  }
163 
167  public HashMap<String, Float> getTimelineItems(NodeList items) {
168  HashMap<String, Float> h = new HashMap<>();
169  for (int i = 0; i < items.getLength(); i++) { // loop for dealing with each timeline item
170  Element item = (Element) items.item(i);
171  String itemID = item.getAttribute("id");
172  Float time = null;
173  if (h.get("T" + Integer.toString(Integer.valueOf(itemID.substring(1)) - 1)) != null) {
174  time = h.get("T" + Integer.toString(Integer.valueOf(itemID.substring(1)) - 1));
175  } else {
176  time = new Float(0.0);
177  }
178  if (!item.getAttribute("time").equals("")) {
179  time = new Float(item.getAttribute("time"));
180  }
181  h.put(itemID, time);
182  }
183  return h;
184  }
185 
191  @Override
192  public Collection<Class<? extends CorpusData>> getIsUsableFor(){
193  try {
194  Class cl = Class.forName("de.uni_hamburg.corpora.BasicTranscriptionData");
195  IsUsableFor.add(cl);
196  } catch (ClassNotFoundException ex) {
197  report.addException(ex, " usable class not found");
198  }
199  return IsUsableFor;
200  }
201 
206  @Override
207  public String getDescription() {
208  String description = "This class calculates annotated time for an exb file and computes the duration of each annotation in the exb.";
209  return description;
210  }
211 
212 }
void addNote(String statId, String description)
Definition: Report.java:245
void merge(Report sr)
Definition: Report.java:73
static InputStream String2InputStream(String s)
void addException(Throwable e, String description)
Definition: Report.java:287