1 package de.uni_hamburg.corpora.validation;
10 import java.io.IOException;
12 import java.net.URISyntaxException;
13 import java.util.ArrayList;
14 import java.util.Collection;
15 import java.util.HashMap;
16 import java.util.List;
18 import javax.xml.parsers.DocumentBuilder;
19 import javax.xml.parsers.DocumentBuilderFactory;
20 import javax.xml.parsers.ParserConfigurationException;
21 import javax.xml.transform.Transformer;
22 import javax.xml.transform.TransformerConfigurationException;
23 import javax.xml.transform.TransformerException;
24 import javax.xml.transform.TransformerFactory;
25 import javax.xml.transform.dom.DOMSource;
26 import javax.xml.transform.stream.StreamResult;
27 import javax.xml.xpath.XPathExpressionException;
28 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
29 import org.jdom.JDOMException;
30 import org.w3c.dom.Attr;
31 import org.w3c.dom.Document;
32 import org.w3c.dom.Element;
33 import org.w3c.dom.NodeList;
34 import org.xml.sax.SAXException;
43 static Map<String, Collection<String>> annotationsInExbs =
new HashMap<String, Collection<String>>();
44 boolean generateDoc =
true;
57 DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
58 DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
59 Document doc = docBuilder.newDocument();
61 rootElement = doc.createElement(
"annotation-specification");
62 doc.appendChild(rootElement);
63 for (String key : annotationsInExbs.keySet()) {
64 if (!key.equals(
"en") && !key.equals(
"de") && !key.equals(
"ita") && !key.equals(
"fe")
65 && !key.isEmpty() && annotationsInExbs.get(key).size() <= 60) {
66 Element annotationSet = doc.createElement(
"annotation-set");
67 rootElement.appendChild(annotationSet);
68 Attr attr = doc.createAttribute(
"exmaralda-tier-category");
70 annotationSet.setAttributeNode(attr);
71 Element category = doc.createElement(
"category");
72 category.setAttribute(
"name", key +
"-tags");
73 annotationSet.appendChild(category);
74 Element higherTag = doc.createElement(
"tag");
75 higherTag.setAttribute(
"name", key);
76 category.appendChild(higherTag);
77 Element description = doc.createElement(
"description");
78 category.appendChild(description);
79 List<String> sortedTags = (List<String>) annotationsInExbs.get(key);
80 java.util.Collections.sort(sortedTags, String.CASE_INSENSITIVE_ORDER);
81 annotationsInExbs.replace(key, sortedTags);
82 for (String tag : annotationsInExbs.get(key)) {
84 Element lowerCategory = doc.createElement(
"category");
85 lowerCategory.setAttribute(
"name", tag);
86 Element lowerTag = doc.createElement(
"tag");
87 lowerTag.setAttribute(
"name", tag);
88 lowerCategory.appendChild(lowerTag);
89 Element lowerDescription = doc.createElement(
"description");
90 lowerCategory.appendChild(lowerDescription);
92 "Annotation added to the file annotation panel: " 94 category.appendChild(lowerCategory);
100 TransformerFactory transformerFactory = TransformerFactory.newInstance();
101 Transformer transformer = transformerFactory.newTransformer();
102 DOMSource source =
new DOMSource(doc);
103 File f =
new File(
new File(cd.
getURL().getFile()).getParentFile() +
"\\AnnotationSpecFromExbs.xml");
105 StreamResult result =
new StreamResult(
new File(u));
106 transformer.transform(source, result);
115 throws SAXException, IOException, ParserConfigurationException, TransformerConfigurationException, TransformerException, XPathExpressionException {
116 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
117 DocumentBuilder db = dbf.newDocumentBuilder();
121 if (cd.getURL().getFile().endsWith(
".exb")) {
122 NodeList tiers = doc.getElementsByTagName(
"tier");
123 for (
int i = 0; i < tiers.getLength(); i++) {
124 Element tier = (Element) tiers.item(i);
125 String category = tier.getAttribute(
"category");
126 String type = tier.getAttribute(
"type");
128 if (annotationsInExbs.containsKey(category) && type.equals(
"a")) {
129 Collection<String> tags = annotationsInExbs.get(category);
130 NodeList events = tier.getElementsByTagName(
"event");
131 for (
int j = 0; j < events.getLength(); j++) {
132 Element
event = (Element) events.item(j);
133 String tag =
event.getTextContent();
135 if (tag.endsWith(
" ")) {
136 System.out.println(
"Exb file " + cd.getURL().getFile().substring(cd.getURL().getFile().lastIndexOf(
"/") + 1) +
" is containing a tag (" 137 + tag +
") in its tier " + tier.getAttribute(
"display-name") +
" with an extra space in the end!");
138 stats.
addWarning(
function, cd,
"Exb file is containing a tag (" 139 + tag +
") in its tier " + tier.getAttribute(
"display-name") +
" with an extra space in the end!");
140 exmaError.addError(
"generate-annotation-panel", cd.getURL().getFile(), tier.getAttribute(
"id"),
event.getAttribute(
"start"),
false,
141 "Exb file " + cd.getURL().getFile().substring(cd.getURL().getFile().lastIndexOf(
"/") + 1) +
" is containing a tag (" 142 + tag +
") in its tier " + tier.getAttribute(
"display-name") +
" with an extra space in the end!");
145 if (!tags.contains(tag)) {
150 annotationsInExbs.put(category, tags);
152 else if (!annotationsInExbs.containsKey(category) && type.equals(
"a")) {
153 Collection<String> tags =
new ArrayList<String>();
154 NodeList events = tier.getElementsByTagName(
"event");
155 for (
int j = 0; j < events.getLength(); j++) {
156 Element
event = (Element) events.item(j);
157 String tag =
event.getTextContent();
159 if (tag.endsWith(
" ")) {
160 System.out.println(
"Exb file " + cd.getURL().getFile().substring(cd.getURL().getFile().lastIndexOf(
"/") + 1) +
" is containing a tag (" 161 + tag +
") in its tier " + tier.getAttribute(
"display-name") +
" with an extra space in the end!");
162 stats.
addWarning(
function, cd,
"Exb file is containing a tag (" 163 + tag +
") in its tier " + tier.getAttribute(
"display-name") +
" with an extra space in the end!");
164 exmaError.addError(
"generate-annotation-panel", cd.getURL().getFile(), tier.getAttribute(
"id"),
event.getAttribute(
"start"),
false,
165 "Exb file " + cd.getURL().getFile().substring(cd.getURL().getFile().lastIndexOf(
"/") + 1) +
" is containing a tag (" 166 + tag +
") in its tier " + tier.getAttribute(
"display-name") +
" with an extra space in the end!");
167 tag = tag.substring(0, tag.length() - 1);
169 if (!tags.contains(tag)) {
173 annotationsInExbs.put(category, tags);
191 Class cl = Class.forName(
"de.uni_hamburg.corpora.BasicTranscriptionData");
192 Class clSecond = Class.forName(
"de.uni_hamburg.corpora.ComaData");
194 IsUsableFor.add(clSecond);
195 }
catch (ClassNotFoundException ex) {
206 String description =
"This class generates an annotation specification panel" 207 +
" from the basic transcription files (exb).";
212 public Report function(
Corpus c, Boolean fix)
throws SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException {
214 for (
CorpusData cdata : c.getBasicTranscriptionData()) {
215 stats.
merge(
function(cdata, fix));
217 for (
CorpusData adata : c.getAnnotationspecification()) {
218 stats.
merge(
function(adata, fix));
GenerateAnnotationPanel()
Report generateAnnotation(CorpusData cd)
static ExmaErrorList exmaError
void addWarning(String statId, String description)
Collection< Class<?extends CorpusData > > getIsUsableFor()
void addCorrect(String statId, String description)
static InputStream String2InputStream(String s)
void addException(Throwable e, String description)