1 package de.uni_hamburg.corpora.validation;
8 import java.io.IOException;
9 import java.util.Collection;
10 import java.util.HashMap;
12 import javax.xml.parsers.DocumentBuilder;
13 import javax.xml.parsers.DocumentBuilderFactory;
14 import javax.xml.parsers.ParserConfigurationException;
15 import javax.xml.transform.TransformerException;
16 import javax.xml.xpath.XPathExpressionException;
17 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
18 import org.w3c.dom.Document;
19 import org.w3c.dom.Element;
20 import org.w3c.dom.NodeList;
21 import org.xml.sax.SAXException;
32 HashMap<String, HashMap<String, String>> tierMap;
40 public Report function(
Corpus c, Boolean fix)
throws SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException {
42 for (
CorpusData cdata : c.getBasicTranscriptionData()) {
43 stats.
merge(
function(cdata, fix));
54 throws SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException {
56 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
57 DocumentBuilder db = dbf.newDocumentBuilder();
60 String transcriptName;
61 if (doc.getElementsByTagName(
"transcription-name").getLength() > 0) {
62 transcriptName = doc.getElementsByTagName(
"transcription-name").item(0).getTextContent();
64 transcriptName =
"No Name Transcript";
67 stats.
addNote(
"calculate-annotated-time",
"Annotation Tiers of " + transcriptName);
68 HashMap<String, HashMap<String, String>> eventMap =
new HashMap<>();
70 if (tierMap == null) {
71 tierMap =
new HashMap<>();
73 NodeList tiers = doc.getElementsByTagName(
"tier");
74 NodeList items = doc.getElementsByTagName(
"tli");
76 HashMap<String, String> tierH =
new HashMap<>();
77 for (
int i = 0; i < tiers.getLength(); i++) {
78 Element tier = (Element) tiers.item(i);
79 if (tier.getAttribute(
"type").equals(
"a") && !(tier.getAttribute(
"category").equals(
"en")
80 || tier.getAttribute(
"category").equals(
"de"))) {
81 HashMap<String, String> eventH =
new HashMap<>();
82 String tierDisplay = tier.getAttribute(
"display-name");
83 float tierDuration = 0;
84 NodeList events = tier.getElementsByTagName(
"event");
85 boolean notAnnotation =
false;
86 for (
int j = 0; j < events.getLength(); j++) {
87 float eventDuration = 0;
88 Element
event = (Element) events.item(j);
89 String eventLabel =
event.getTextContent();
90 String eventStart =
event.getAttribute(
"start");
91 String eventEnd =
event.getAttribute(
"end");
92 if (eventLabel.length() > 20) {
96 if (timelineItems.get(eventEnd) - timelineItems.get(eventStart) >= 0) {
97 eventDuration = timelineItems.get(eventEnd) - timelineItems.get(eventStart);
98 tierDuration += timelineItems.get(eventEnd) - timelineItems.get(eventStart);
101 float secondsLeft = eventDuration % 60;
102 int minutes = (int) Math.floor(eventDuration / 60);
103 String MM = (String) (minutes < 10 ?
"0" + Integer.toString(minutes) : Integer.toString(minutes));
104 String SS = (String) (secondsLeft < 10 ?
"0" + Float.toString(secondsLeft) : Float.toString(secondsLeft));
105 if (SS.length() > 5) {
106 SS = SS.substring(0, 5);
108 if (eventH.containsKey(eventLabel)) {
109 String durOfEvent = eventH.get(eventLabel);
110 int minute = Integer.parseInt(durOfEvent.substring(0, durOfEvent.indexOf(
":")));
111 float second = Float.parseFloat(durOfEvent.substring(durOfEvent.indexOf(
":") + 1));
112 float totalSecond = (secondsLeft + second) % 60;
113 if ((secondsLeft + second) / 60 >= 1.0) {
116 int totalMin = minute + minutes;
117 String totalMM = (String) (totalMin < 10 ?
"0" + Integer.toString(totalMin) : Integer.toString(totalMin));
118 String totalSS = (String) (totalSecond < 10 ?
"0" + Float.toString(totalSecond) : Float.toString(totalSecond));
119 if (totalSS.length() > 5) {
120 totalSS = totalSS.substring(0, 5);
122 eventH.put(eventLabel, totalMM +
":" + totalSS);
124 eventH.put(eventLabel, MM +
":" + SS);
131 if (!eventH.isEmpty()) {
132 eventMap.put(tierDisplay, eventH);
135 float secondsLeft = tierDuration % 60;
136 int minutes = (int) Math.floor(tierDuration / 60);
137 String MM = (String) (minutes < 10 ?
"0" + Integer.toString(minutes) : Integer.toString(minutes));
138 String SS = (String) (secondsLeft < 10 ?
"0" + Float.toString(secondsLeft) : Float.toString(secondsLeft));
139 if (SS.length() > 5) {
140 SS = SS.substring(0, 5);
142 tierH.put(tierDisplay, MM +
":" + SS);
143 stats.
addNote(
function, cd, tierDisplay +
" " + MM +
":" + SS);
147 stats.
addNote(
function, cd,
"Labels per Tier");
148 Set perTier = eventMap.keySet();
149 for (Object per : perTier) {
150 String tierName = (String) per;
151 stats.
addNote(
function, cd, tierName);
152 HashMap map =
new HashMap(eventMap.get(tierName));
153 Set perMap = map.keySet();
154 for (Object obj : perMap) {
155 String label = (String) obj;
156 stats.
addNote(
function, cd, label +
" " + map.get(label));
157 System.out.println(label +
" " + map.get(label));
160 tierMap.put(transcriptName, tierH);
168 HashMap<String, Float> h =
new HashMap<>();
169 for (
int i = 0; i < items.getLength(); i++) {
170 Element item = (Element) items.item(i);
171 String itemID = item.getAttribute(
"id");
173 if (h.get(
"T" + Integer.toString(Integer.valueOf(itemID.substring(1)) - 1)) != null) {
174 time = h.get(
"T" + Integer.toString(Integer.valueOf(itemID.substring(1)) - 1));
176 time =
new Float(0.0);
178 if (!item.getAttribute(
"time").equals(
"")) {
179 time =
new Float(item.getAttribute(
"time"));
194 Class cl = Class.forName(
"de.uni_hamburg.corpora.BasicTranscriptionData");
196 }
catch (ClassNotFoundException ex) {
208 String description =
"This class calculates annotated time for an exb file and computes the duration of each annotation in the exb.";
ExbCalculateAnnotatedTime()
void addNote(String statId, String description)
Collection< Class<?extends CorpusData > > getIsUsableFor()
HashMap< String, Float > getTimelineItems(NodeList items)
static InputStream String2InputStream(String s)
void addException(Throwable e, String description)