hzsk-corpus-services  0.2
CalculateAnnotatedTime.java
Go to the documentation of this file.
1 package de.uni_hamburg.corpora.validation;
2 
7 import java.io.IOException;
8 import java.util.Collection;
9 import java.util.HashMap;
10 import java.util.Set;
11 import java.util.logging.Level;
12 import java.util.logging.Logger;
13 import javax.xml.parsers.DocumentBuilder;
14 import javax.xml.parsers.DocumentBuilderFactory;
15 import javax.xml.parsers.ParserConfigurationException;
16 import javax.xml.transform.TransformerException;
17 import javax.xml.xpath.XPathExpressionException;
18 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
19 import org.jdom.JDOMException;
20 import org.w3c.dom.Document;
21 import org.w3c.dom.Element;
22 import org.w3c.dom.NodeList;
23 import org.xml.sax.SAXException;
24 
30 public class CalculateAnnotatedTime extends Checker implements CorpusFunction {
31 
32  String annotLoc = "";
33  //HashMap<String, HashMap<String, String>> eventMap; // hash map for holding events of annotation tiers
34  HashMap<String, HashMap<String, String>> tierMap; // all the annotation tiers of all the exb files of the corpus
35 
41  public Report check(CorpusData cd) throws JexmaraldaException {
42  Report stats = new Report();
43  try {
44  stats = exceptionalCheck(cd);
45  } catch (ParserConfigurationException pce) {
46  stats.addException(pce, annotLoc + ": Unknown parsing error");
47  } catch (SAXException saxe) {
48  stats.addException(saxe, annotLoc + ": Unknown parsing error");
49  } catch (IOException ioe) {
50  stats.addException(ioe, annotLoc + ": Unknown file reading error");
51  } catch (TransformerException ex) {
52  Logger.getLogger(CalculateAnnotatedTime.class.getName()).log(Level.SEVERE, null, ex);
53  } catch (XPathExpressionException ex) {
54  Logger.getLogger(CalculateAnnotatedTime.class.getName()).log(Level.SEVERE, null, ex);
55  }
56  return stats;
57  }
58 
64  private Report exceptionalCheck(CorpusData cd)
65  throws SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException {
66  Report stats = new Report(); //create a new report
67  DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
68  DocumentBuilder db = dbf.newDocumentBuilder();
69  Document doc = db.parse(TypeConverter.String2InputStream(cd.toSaveableString())); // get the file as a document
70  // get the name of the transcription
71  String transcriptName;
72  if (doc.getElementsByTagName("transcription-name").getLength() > 0) { // check if transcript name exists for the exb file
73  transcriptName = doc.getElementsByTagName("transcription-name").item(0).getTextContent(); // get transcript name
74  } else {
75  transcriptName = "No Name Transcript";
76  }
77  // add the title as a note to the report
78  stats.addNote("calculate-annotated-time", "Annotation Tiers of " + transcriptName);
79  HashMap<String, HashMap<String, String>> eventMap = new HashMap<>();
80  //initialise the hash map only for the first time when an exb file is encountered
81  if (tierMap == null) {
82  tierMap = new HashMap<>();
83  }
84  NodeList tiers = doc.getElementsByTagName("tier"); // get all tiers of the transcript
85  NodeList items = doc.getElementsByTagName("tli"); // get all timeline items of the transcript
86  HashMap<String, Float> timelineItems = getTimelineItems(items); // container for the tl items
87  HashMap<String, String> tierH = new HashMap<>();
88  for (int i = 0; i < tiers.getLength(); i++) { // loop for dealing with each tier
89  Element tier = (Element) tiers.item(i); // get one tier at a time
90  if (tier.getAttribute("type").equals("a") && !(tier.getAttribute("category").equals("en")
91  || tier.getAttribute("category").equals("de"))) { // handle annotation tiers exclusively
92  HashMap<String, String> eventH = new HashMap<>(); // hashmap for dealing with events
93  String tierDisplay = tier.getAttribute("display-name"); // get tier name
94  float tierDuration = 0; // time the tier duration
95  NodeList events = tier.getElementsByTagName("event"); // get all events for the tier
96  boolean notAnnotation = false; // in case the tier is yet not an annotation
97  for (int j = 0; j < events.getLength(); j++) { // handle each event
98  float eventDuration = 0; // time the event duration
99  Element event = (Element) events.item(j);
100  String eventLabel = event.getTextContent(); // acquire the content of the event
101  String eventStart = event.getAttribute("start"); // acquire the starting tl item for the event
102  String eventEnd = event.getAttribute("end"); // acquire the ending tl item for the event
103  if (eventLabel.length() > 20) { //if an event in the tier is suspiciously lengthy
104  notAnnotation = true;
105  break;
106  }
107  if (timelineItems.get(eventEnd) - timelineItems.get(eventStart) >= 0) { // make sure eventEnd is after the eventStart
108  eventDuration = timelineItems.get(eventEnd) - timelineItems.get(eventStart); // calculate the event duration
109  tierDuration += timelineItems.get(eventEnd) - timelineItems.get(eventStart); // add it up to the total tier duration
110  }
111  // sort the format out for putting it on the report
112  float secondsLeft = eventDuration % 60;
113  int minutes = (int) Math.floor(eventDuration / 60);
114  String MM = (String) (minutes < 10 ? "0" + Integer.toString(minutes) : Integer.toString(minutes));
115  String SS = (String) (secondsLeft < 10 ? "0" + Float.toString(secondsLeft) : Float.toString(secondsLeft));
116  if (SS.length() > 5) {
117  SS = SS.substring(0, 5);
118  }
119  if (eventH.containsKey(eventLabel)) { // in case the label has already been found in the tier
120  String durOfEvent = eventH.get(eventLabel);
121  int minute = Integer.parseInt(durOfEvent.substring(0, durOfEvent.indexOf(":")));
122  float second = Float.parseFloat(durOfEvent.substring(durOfEvent.indexOf(":") + 1));
123  float totalSecond = (secondsLeft + second) % 60;
124  if ((secondsLeft + second) / 60 >= 1.0) {
125  minute++;
126  }
127  int totalMin = minute + minutes;
128  String totalMM = (String) (totalMin < 10 ? "0" + Integer.toString(totalMin) : Integer.toString(totalMin));
129  String totalSS = (String) (totalSecond < 10 ? "0" + Float.toString(totalSecond) : Float.toString(totalSecond));
130  if (totalSS.length() > 5) {
131  totalSS = totalSS.substring(0, 5);
132  }
133  eventH.put(eventLabel, totalMM + ":" + totalSS);
134  } else {
135  eventH.put(eventLabel, MM + ":" + SS);
136  }
137  }
138  if (notAnnotation) {// if the tier is not an annotation
139  continue; // then do not save this tier or its events
140  }
141  // put the events for each tier in the hashmap so long as there is an event under that tier
142  if (!eventH.isEmpty()) {
143  eventMap.put(tierDisplay, eventH);
144  }
145  // formatting the duration of the annotation for the report
146  float secondsLeft = tierDuration % 60;
147  int minutes = (int) Math.floor(tierDuration / 60);
148  String MM = (String) (minutes < 10 ? "0" + Integer.toString(minutes) : Integer.toString(minutes));
149  String SS = (String) (secondsLeft < 10 ? "0" + Float.toString(secondsLeft) : Float.toString(secondsLeft));
150  if (SS.length() > 5) {
151  SS = SS.substring(0, 5);
152  }
153  tierH.put(tierDisplay, MM + ":" + SS); // add total duration of each tier into the hash map
154  stats.addNote("calculate-annotated-time", tierDisplay + " " + MM + ":" + SS); // display it on the report
155  }
156  }
157  // show the annotation time for each label in every tier
158  stats.addNote("calculate-annotated-time", "Labels per Tier");
159  Set perTier = eventMap.keySet();
160  for (Object per : perTier) {
161  String tierName = (String) per;
162  stats.addNote("calculate-annotated-time", tierName);
163  HashMap map = new HashMap(eventMap.get(tierName));
164  Set perMap = map.keySet();
165  for (Object obj : perMap) {
166  String label = (String) obj;
167  stats.addNote("calculate-annotated-time", label + " " + map.get(label));
168  System.out.println(label + " " + map.get(label));
169  }
170  }
171  tierMap.put(transcriptName, tierH); // finally add the annotations of the transcript
172  return stats;
173  }
174 
178  public HashMap<String, Float> getTimelineItems(NodeList items) {
179  HashMap<String, Float> h = new HashMap<>();
180  for (int i = 0; i < items.getLength(); i++) { // loop for dealing with each timeline item
181  Element item = (Element) items.item(i);
182  String itemID = item.getAttribute("id");
183  Float time = null;
184  if (h.get("T" + Integer.toString(Integer.valueOf(itemID.substring(1)) - 1)) != null) {
185  time = h.get("T" + Integer.toString(Integer.valueOf(itemID.substring(1)) - 1));
186  } else {
187  time = new Float(0.0);
188  }
189  if (!item.getAttribute("time").equals("")) {
190  time = new Float(item.getAttribute("time"));
191  }
192  h.put(itemID, time);
193  }
194  return h;
195  }
196 
200  @Override
201  public Report fix(CorpusData cd) throws SAXException, JDOMException, IOException, JexmaraldaException {
202  throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
203  }
204 
210  @Override
211  public Collection<Class<? extends CorpusData>> getIsUsableFor() {
212  try {
213  Class cl = Class.forName("de.uni_hamburg.corpora.BasicTranscriptionData");
214  IsUsableFor.add(cl);
215  } catch (ClassNotFoundException ex) {
216  Logger.getLogger(IAAFunctionality.class.getName()).log(Level.SEVERE, null, ex);
217  }
218  return IsUsableFor;
219  }
220 
221 }
void addNote(String statId, String description)
Definition: Report.java:233
Collection< Class<?extends CorpusData > > getIsUsableFor()
static InputStream String2InputStream(String s)
void addException(Throwable e, String description)
Definition: Report.java:275