corpus-services  1.0
ExbTierDisplayNameChecker.java
Go to the documentation of this file.
1 package de.uni_hamburg.corpora.validation;
2 
9 import java.io.IOException;
10 import java.net.URISyntaxException;
11 import java.util.Collection;
12 import java.util.HashMap;
13 import javax.xml.parsers.DocumentBuilder;
14 import javax.xml.parsers.DocumentBuilderFactory;
15 import javax.xml.parsers.ParserConfigurationException;
16 import javax.xml.transform.TransformerException;
17 import javax.xml.xpath.XPathExpressionException;
18 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
19 import org.jdom.JDOMException;
20 import org.w3c.dom.Document;
21 import org.w3c.dom.Element;
22 import org.w3c.dom.NodeList;
23 import org.xml.sax.SAXException;
24 
29 public class ExbTierDisplayNameChecker extends Checker implements CorpusFunction {
30 
31  String tierLoc = "";
32 
34  //fixing not possible
35  super(false);
36  }
37 
44  @Override
45  public Report function(CorpusData cd, Boolean fix)
46  throws SAXException, IOException, ParserConfigurationException, TransformerException, XPathExpressionException {
47  DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
48  DocumentBuilder db = dbf.newDocumentBuilder();
49  Document doc = db.parse(TypeConverter.String2InputStream(cd.toSaveableString())); // get the file as a document
50  String transcriptName;
51  if (doc.getElementsByTagName("transcription-name").getLength() > 0) { // check if transcript name exists for the exb file
52  transcriptName = doc.getElementsByTagName("transcription-name").item(0).getTextContent(); // get transcript name
53  } else {
54  transcriptName = "No Name Transcript";
55  }
56  NodeList tiers = doc.getElementsByTagName("tier"); // get all tiers of the transcript
57  NodeList speakers = doc.getElementsByTagName("speaker"); // get all speakers of the transcript
58  HashMap<String, String> speakerMap = new HashMap<String, String>(); // map for each speaker and its corresponding abbreviation
59  Report stats = new Report(); // create a new report for the transcript
60  for (int i = 0; i < speakers.getLength(); i++) { // put speakers and their abbreviations into the map
61  Element speaker = (Element) speakers.item(i);
62  speakerMap.put(speaker.getAttribute("id"), speaker.getElementsByTagName("abbreviation").item(0).getTextContent());
63  }
64  for (int i = 0; i < tiers.getLength(); i++) { // loop for dealing with each tier
65  Element tier = (Element) tiers.item(i);
66  String category = tier.getAttribute("category"); // get category
67  String speakerName = tier.getAttribute("speaker"); // get speaker name
68  String displayName = tier.getAttribute("display-name"); // get display name
69  String displayNameCategory = displayName;
70 
71  String displayNameSpeaker = "";
72  int openingPar = -1;
73  int closingPar = -1;
74  if (!displayName.isEmpty()) { // if display name exists compare it with other attributes
75  if (displayName.contains("[") && displayName.contains("]")) { // check if display name contains brackets
76  openingPar = displayName.indexOf("[");
77  closingPar = displayName.indexOf("]");
78  displayNameCategory = displayName.substring(openingPar + 1, closingPar);
79  displayNameSpeaker = displayName.substring(0, openingPar - 1);
80  } else if (displayName.contains("-")){
81  openingPar = displayName.lastIndexOf("-");
82  closingPar = displayName.length();
83  //Could also be that the category has a dash!
84  displayNameSpeaker = displayName.substring(openingPar + 1, closingPar);
85  //Could also be that the category has a dash!
86  displayNameCategory = displayName.substring(0, openingPar);
87  }
88  //System.out.println("Tier DisplayName " + displayName + " category " + category + " displaycategory " + displayNameCategory + " and speaker name " + speakerName + " displayspeaker " + displayNameSpeaker);
89  if (!speakerName.isEmpty() && !category.isEmpty()) { // if speaker name exists check if it complies with tier display name
90  if (((category.equals(displayNameCategory)) && (speakerName.equals(displayNameSpeaker))) || (category.equals(displayName))) {
91  //everything is correct
92  System.out.println("Tier DisplayName " + displayName + " matches category " + category + " and speaker name " + speakerName);
93  stats.addCorrect(function, cd, "Tier DisplayName " + displayName + " matches category " + category + " and speaker name " + speakerName);
94  } else {
95  System.out.println("Speaker abbreviation and display name for tier do not match"
96  + "for speaker " + speakerName + ", tier: displayname " + displayName + " and id " + tier.getAttribute("id")
97  + " in transcription of " + transcriptName);
98  stats.addCritical(function, cd, "Tier mismatch "
99  + "for speaker " + speakerName + ", tier category " + category
100  +", tier: displayname " + displayName
101  + " id " + tier.getAttribute("id")
102  + " in transcription of " + transcriptName);
103  exmaError.addError(function, cd.getURL().getFile(), tier.getAttribute("id"), "", false, "Error: Speaker abbreviation and display name for tier does not match"
104  + "for speaker " + speakerName + ", tier category " + category
105  + ", tier id " + tier.getAttribute("id")
106  + " in transcription of " + transcriptName);
107  }
108  }
109  }
110  else{
111  stats.addWarning(function, cd, "Display name is empty "
112  + "for speaker " + speakerName + ", tier category " + category
113  + ", tier id " + tier.getAttribute("id"));
114  exmaError.addError(function, cd.getURL().getFile(), tier.getAttribute("id"), "", false, "Error: Display name for tier is empty"
115  + "for speaker " + speakerName + ", tier category " + category
116  + ", tier id " + tier.getAttribute("id"));
117  }
118  }
119  return stats; // return all the warnings
120  }
121 
122 
128  @Override
129  public Collection<Class<? extends CorpusData>> getIsUsableFor() {
130  try {
131  Class cl = Class.forName("de.uni_hamburg.corpora.BasicTranscriptionData");
132  //Class clSecond = Class.forName("de.uni_hamburg.corpora.UnspecifiedXMLData");
133  IsUsableFor.add(cl);
134  //IsUsableFor.add(clSecond);
135  } catch (ClassNotFoundException ex) {
136  report.addException(ex, "unknown class not found error");
137  }
138  return IsUsableFor;
139  }
140 
144  @Override
145  public String getDescription() {
146  String description = "This class checks exb tiers and finds out if there"
147  + " is a mismatch between category, speaker abbreviation and display"
148  + " name for each tier.";
149  return description;
150  }
151 
152  @Override
153  public Report function(Corpus c, Boolean fix) throws SAXException, IOException, ParserConfigurationException, URISyntaxException, JDOMException, TransformerException, XPathExpressionException {
154  Report stats = new Report();
155  for (CorpusData cdata : c.getBasicTranscriptionData()) {
156  stats.merge(function(cdata, fix));
157  }
158  return stats;
159  }
160 
161 }
void merge(Report sr)
Definition: Report.java:73
static InputStream String2InputStream(String s)
void addException(Throwable e, String description)
Definition: Report.java:287