1 package de.uni_hamburg.corpora.validation;
12 import java.util.ArrayList;
13 import java.io.IOException;
15 import java.util.Collection;
16 import java.util.List;
17 import java.util.regex.Pattern;
18 import javax.xml.parsers.ParserConfigurationException;
19 import javax.xml.transform.TransformerException;
20 import javax.xml.xpath.XPathExpressionException;
21 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
22 import org.jdom.JDOMException;
23 import org.jdom.xpath.XPath;
24 import org.w3c.dom.Document;
25 import org.w3c.dom.Element;
26 import org.w3c.dom.NodeList;
27 import org.xml.sax.SAXException;
48 public Report function(
CorpusData cd, Boolean fix)
throws ClassNotFoundException, SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException, JDOMException {
51 org.jdom.Document comaDoc = comad.
getJdom();
52 Document doc = JdomDocument2W3cDocument(comaDoc);
53 NodeList communications = doc.getElementsByTagName(
"Communication");
54 ArrayList<String> algorithmNames =
new ArrayList<>();
58 List<org.jdom.Element> toRemove =
new ArrayList<org.jdom.Element>();
60 context = XPath.newInstance(
"//Transcription[Description/Key[@Name='segmented']/text()='true']");
62 List allContextInstances = context.selectNodes(comaDoc);
63 if (!allContextInstances.isEmpty()) {
64 for (
int i = 0; i < allContextInstances.size(); i++) {
65 Object o = allContextInstances.get(i);
66 if (o instanceof org.jdom.Element) {
67 org.jdom.Element e = (org.jdom.Element) o;
68 List<org.jdom.Element> descKeys;
70 descKeys = e.getChild(
"Description")
72 for (org.jdom.Element ke : (List<org.jdom.Element>) descKeys) {
73 if (Pattern.matches(
"#(..).*", ke.getAttributeValue(
"Name"))) {
77 for (org.jdom.Element re : toRemove) {
81 String s = e.getChildText(
"NSLink");
83 url =
new URL(cd.getParentURL() + s);
86 for (Object segmentCount : segmentCounts) {
87 if (segmentCount instanceof org.jdom.Element) {
88 org.jdom.Element segmentCountEl = (org.jdom.Element) segmentCount;
90 Object key = segmentCountEl.getAttributeValue(
"attribute-name");
91 Object value = segmentCountEl.getValue();
93 org.jdom.Element newKey =
new org.jdom.Element(
"Key");
94 newKey.setAttribute(
"Name", (String) key);
95 newKey.setText(value.toString());
96 e.getChild(
"Description").addContent(
98 report.
addFix(
function, cd,
"Updated segment count " + key.toString() +
":" + value.toString() +
"for transcription " + e.getAttributeValue(
"Name"));
105 if (comaDoc != null) {
107 cio.write(cd, cd.getURL());
108 report.
addCorrect(
function, cd,
"Updated the segment counts!");
110 report.
addCritical(
function, cd,
"Updating the segment counts was not possible!");
113 for (
int i = 0; i < communications.getLength(); i++) {
114 Element communication = (Element) communications.item(i);
115 NodeList transcriptions = communication.getElementsByTagName(
"Transcription");
116 for (
int j = 0; j < transcriptions.getLength(); j++) {
117 Element transcription = (Element) transcriptions.item(j);
119 NodeList descriptions = transcription.getElementsByTagName(
"Description");
120 for (
int d = 0; d < descriptions.getLength(); d++) {
121 Element description = (Element) descriptions.item(d);
122 NodeList keys = description.getElementsByTagName(
"Key");
125 for (
int k = 0; k < keys.getLength(); k++) {
126 Element key = (Element) keys.item(k);
130 if (key.getAttribute(
"Name").contains(
"#") && key.getAttribute(
"Name").contains(
":")) {
131 String text = key.getAttribute(
"Name");
133 int colonIndex = key.getAttribute(
"Name").lastIndexOf(
':');
134 int hashIndex = key.getAttribute(
"Name").indexOf(
'#');
135 algorithmNames.add(key.getAttribute(
"Name").substring(hashIndex + 2, colonIndex));
144 String algorithmName =
"";
145 if (!algorithmNames.isEmpty()) {
146 algorithmName = algorithmNames.get(0);
147 boolean error =
false;
148 for (
int i = 1; i < algorithmNames.size(); i++) {
149 if (!algorithmName.equals(algorithmNames.get(i))) {
151 System.out.println(
"Coma file contains different segmentation algorithms: " + algorithmNames.get(i));
152 stats.
addCritical(
function, cd,
"More than one segmentation algorithm: " + algorithmNames.get(i) +
" and " + algorithmName);
157 stats.
addCorrect(
function, cd,
"Only segmentation " + algorithmNames.get(1));
160 stats.
addWarning(
function, cd,
"No segment counts added yet. Use Coma > Maintenance > Update segment counts to add them. ");
173 Class cl = Class.forName(
"de.uni_hamburg.corpora.ComaData");
175 }
catch (ClassNotFoundException ex) {
187 String description =
"This class checks whether there are more than one " 188 +
"segmentation algorithms used in the coma file. If that is the case" 189 +
", it issues warnings. If it ihas the fix option, it updates the segment counts from the exbs. ";
194 public Report function(
Corpus c, Boolean fix)
throws ClassNotFoundException, SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException, JDOMException {
196 cd = c.getComaData();
197 stats =
function(cd, fix);
ComaSegmentCountChecker()
void addCritical(String description)
static org.w3c.dom.Document JdomDocument2W3cDocument(org.jdom.Document jdomDoc)
void addWarning(String statId, String description)
Collection< Class<?extends CorpusData > > getIsUsableFor()
void addCorrect(String statId, String description)
static String JdomDocument2String(org.jdom.Document jdomDocument)
void addException(Throwable e, String description)
void addFix(String statId, CorpusData cd, String description)