1 package de.uni_hamburg.corpora.validation;
9 import java.io.IOException;
10 import java.net.URISyntaxException;
11 import java.util.Collection;
12 import java.util.HashMap;
13 import javax.xml.parsers.DocumentBuilder;
14 import javax.xml.parsers.DocumentBuilderFactory;
15 import javax.xml.parsers.ParserConfigurationException;
16 import javax.xml.transform.TransformerException;
17 import javax.xml.xpath.XPathExpressionException;
18 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
19 import org.jdom.JDOMException;
20 import org.w3c.dom.Document;
21 import org.w3c.dom.Element;
22 import org.w3c.dom.NodeList;
23 import org.xml.sax.SAXException;
46 throws SAXException, IOException, ParserConfigurationException, TransformerException, XPathExpressionException {
47 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
48 DocumentBuilder db = dbf.newDocumentBuilder();
50 String transcriptName;
51 if (doc.getElementsByTagName(
"transcription-name").getLength() > 0) {
52 transcriptName = doc.getElementsByTagName(
"transcription-name").item(0).getTextContent();
54 transcriptName =
"No Name Transcript";
56 NodeList tiers = doc.getElementsByTagName(
"tier");
57 NodeList speakers = doc.getElementsByTagName(
"speaker");
58 HashMap<String, String> speakerMap =
new HashMap<String, String>();
60 for (
int i = 0; i < speakers.getLength(); i++) {
61 Element speaker = (Element) speakers.item(i);
62 speakerMap.put(speaker.getAttribute(
"id"), speaker.getElementsByTagName(
"abbreviation").item(0).getTextContent());
64 for (
int i = 0; i < tiers.getLength(); i++) {
65 Element tier = (Element) tiers.item(i);
66 String category = tier.getAttribute(
"category");
67 String speakerName = tier.getAttribute(
"speaker");
68 String displayName = tier.getAttribute(
"display-name");
69 String displayNameCategory = displayName;
71 String displayNameSpeaker =
"";
74 if (!displayName.isEmpty()) {
75 if (displayName.contains(
"[") && displayName.contains(
"]")) {
76 openingPar = displayName.indexOf(
"[");
77 closingPar = displayName.indexOf(
"]");
78 displayNameCategory = displayName.substring(openingPar + 1, closingPar);
79 displayNameSpeaker = displayName.substring(0, openingPar - 1);
80 }
else if (displayName.contains(
"-")){
81 openingPar = displayName.lastIndexOf(
"-");
82 closingPar = displayName.length();
84 displayNameSpeaker = displayName.substring(openingPar + 1, closingPar);
86 displayNameCategory = displayName.substring(0, openingPar);
89 if (!speakerName.isEmpty() && !category.isEmpty()) {
90 if (((category.equals(displayNameCategory)) && (speakerName.equals(displayNameSpeaker))) || (category.equals(displayName))) {
92 System.out.println(
"Tier DisplayName " + displayName +
" matches category " + category +
" and speaker name " + speakerName);
93 stats.addCorrect(
function, cd,
"Tier DisplayName " + displayName +
" matches category " + category +
" and speaker name " + speakerName);
95 System.out.println(
"Speaker abbreviation and display name for tier do not match" 96 +
"for speaker " + speakerName +
", tier: displayname " + displayName +
" and id " + tier.getAttribute(
"id")
97 +
" in transcription of " + transcriptName);
98 stats.addCritical(
function, cd,
"Tier mismatch " 99 +
"for speaker " + speakerName +
", tier category " + category
100 +
", tier: displayname " + displayName
101 +
" id " + tier.getAttribute(
"id")
102 +
" in transcription of " + transcriptName);
103 exmaError.addError(
function, cd.getURL().getFile(), tier.getAttribute(
"id"),
"",
false,
"Error: Speaker abbreviation and display name for tier does not match" 104 +
"for speaker " + speakerName +
", tier category " + category
105 +
", tier id " + tier.getAttribute(
"id")
106 +
" in transcription of " + transcriptName);
111 stats.addWarning(
function, cd,
"Display name is empty " 112 +
"for speaker " + speakerName +
", tier category " + category
113 +
", tier id " + tier.getAttribute(
"id"));
114 exmaError.addError(
function, cd.getURL().getFile(), tier.getAttribute(
"id"),
"",
false,
"Error: Display name for tier is empty" 115 +
"for speaker " + speakerName +
", tier category " + category
116 +
", tier id " + tier.getAttribute(
"id"));
131 Class cl = Class.forName(
"de.uni_hamburg.corpora.BasicTranscriptionData");
135 }
catch (ClassNotFoundException ex) {
136 report.
addException(ex,
"unknown class not found error");
146 String description =
"This class checks exb tiers and finds out if there" 147 +
" is a mismatch between category, speaker abbreviation and display" 148 +
" name for each tier.";
153 public Report function(
Corpus c, Boolean fix)
throws SAXException, IOException, ParserConfigurationException, URISyntaxException, JDOMException, TransformerException, XPathExpressionException {
155 for (
CorpusData cdata : c.getBasicTranscriptionData()) {
156 stats.
merge(
function(cdata, fix));
static ExmaErrorList exmaError
Collection< Class<?extends CorpusData > > getIsUsableFor()
static InputStream String2InputStream(String s)
ExbTierDisplayNameChecker()
void addException(Throwable e, String description)