hzsk-corpus-services  0.2
ComaUpdateSegmentCounts.java
Go to the documentation of this file.
1 package de.uni_hamburg.corpora.validation;
2 
10 import java.io.IOException;
11 import java.net.URL;
12 import java.util.ArrayList;
13 import java.util.Collection;
14 import java.util.List;
15 import java.util.regex.Pattern;
16 import javax.xml.parsers.ParserConfigurationException;
17 import javax.xml.transform.TransformerException;
18 import javax.xml.xpath.XPathExpressionException;
19 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
20 import org.xml.sax.SAXException;
21 import org.jdom.Document;
22 import org.jdom.Element;
23 import org.jdom.JDOMException;
24 import org.jdom.xpath.XPath;
25 
30 public class ComaUpdateSegmentCounts extends Checker implements CorpusFunction {
31 
32  static String filename;
33  static ValidatorSettings settings;
34  String path2ExternalFSM = "";
35 
37  }
38 
44  @Override
45  public Report check(CorpusData cd) {
46  report.addCritical(function, cd, "Checking option is not available");
47  return report;
48  }
49 
54  @Override
55  public Report fix(CorpusData cd) throws SAXException, IOException, JexmaraldaException {
56  Report stats = new Report();
57  CorpusIO cio = new CorpusIO();
59  List<Element> toRemove = new ArrayList<Element>();
60  try {
61  Document comaDoc = TypeConverter.String2JdomDocument(cd.toSaveableString());
62  XPath context;
63  context = XPath.newInstance("//Transcription[Description/Key[@Name='segmented']/text()='true']");
64  URL url;
65  List allContextInstances = context.selectNodes(comaDoc);
66  if (!allContextInstances.isEmpty()) {
67  for (int i = 0; i < allContextInstances.size(); i++) {
68  Object o = allContextInstances.get(i);
69  if (o instanceof Element) {
70  Element e = (Element) o;
71  List<Element> descKeys;
72  //in the coma file remove old stats first
73  descKeys = e.getChild("Description")
74  .getChildren();
75  for (Element ke : (List<Element>) descKeys) {
76  if (Pattern.matches("#(..).*", ke.getAttributeValue("Name"))) {
77  toRemove.add(ke);
78  }
79  }
80  for (Element re : toRemove) {
81  descKeys.remove(re);
82  }
83  //now get the new segment counts and add them insted
84  String s = e.getChildText("NSLink");
85  //System.out.println("NSLink:" + s);
86  url = new URL(cd.getParentURL() + s);
87  exs = (SegmentedTranscriptionData) cio.readFileURL(url);
88  List segmentCounts = exs.getSegmentCounts();
89  for (Object segmentCount : segmentCounts) {
90  if (segmentCount instanceof Element) {
91  Element segmentCountEl = (Element) segmentCount;
92  //Object key = segmentCountEl.getAttributeValue("attribute-name").substring(2);
93  Object key = segmentCountEl.getAttributeValue("attribute-name");
94  Object value = segmentCountEl.getValue();
95  //System.out.println("Value:" + value);
96  Element newKey = new Element("Key");
97  newKey.setAttribute("Name", (String) key);
98  newKey.setText(value.toString());
99  e.getChild("Description").addContent(
100  newKey);
101  report.addFix(function, cd, "Updated segment count " + key.toString() + ":" + value.toString() + "for transcription " + e.getAttributeValue("Name"));
102  }
103  }
104 
105  }
106  }
107  }
108  if (comaDoc != null) {
110  cio.write(cd, cd.getURL());
111  report.addCorrect(function, cd, "Updated the segment counts!");
112  } else {
113  report.addCritical(function, cd, "Updating the segment counts was not possible!");
114  }
115  } catch (IOException ex) {
116  report.addException(ex, function, cd, "unknown IO exception");
117  } catch (TransformerException ex) {
118  report.addException(ex, function, cd, "unknown xml exception");
119  } catch (ParserConfigurationException ex) {
120  report.addException(ex, function, cd, "unknown xml exception");
121  } catch (SAXException ex) {
122  report.addException(ex, function, cd, "unknown xml exception");
123  } catch (XPathExpressionException ex) {
124  report.addException(ex, function, cd, "unknown xml exception");
125  } catch (JDOMException ex) {
126  report.addException(ex, function, cd, "unknown xml exception");
127  } catch (ClassNotFoundException ex) {
128  report.addException(ex, function, cd, "class not found exception");
129  }
130  return stats;
131  }
132 
138  @Override
139  public Collection<Class<? extends CorpusData>> getIsUsableFor() {
140  try {
141  Class cl = Class.forName("de.uni_hamburg.corpora.ComaData");
142  IsUsableFor.add(cl);
143  } catch (ClassNotFoundException ex) {
144  report.addException(ex, " usable class not found");
145  }
146  return IsUsableFor;
147  }
148 
153  @Override
154  public String getDescription() {
155  String description = "This class takes a coma file, updates the info using"
156  + " the linked exbs and saves the coma file afterwards without changing"
157  + " exbs.";
158  return description;
159  }
160 
161  @Override
162  public Report check(Corpus c) {
163  throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
164  }
165 
166  @Override
167  public Report function(CorpusData cd, Boolean fix) throws SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException {
168  throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
169  }
170 
171 }
Collection< Class<?extends CorpusData > > getIsUsableFor()
CorpusData readFileURL(URL url, Collection< Class<?extends CorpusData >> clcds)
Definition: CorpusIO.java:123
void addCritical(String description)
Definition: Report.java:104
void addCorrect(String statId, String description)
Definition: Report.java:217
static org.jdom.Document String2JdomDocument(String stringRespresentingDocument)
static String JdomDocument2String(org.jdom.Document jdomDocument)
void addException(Throwable e, String description)
Definition: Report.java:287
void updateUnformattedString(String newUnformattedString)
void write(CorpusData cd, URL url)
Definition: CorpusIO.java:63
void addFix(String statId, CorpusData cd, String description)
Definition: Report.java:155