corpus-services  1.0
ExbAnnotationPanelCheck.java
Go to the documentation of this file.
1 package de.uni_hamburg.corpora.validation;
2 
9 import java.io.IOException;
10 import java.util.ArrayList;
11 import java.util.Collection;
12 import javax.xml.parsers.DocumentBuilder;
13 import javax.xml.parsers.DocumentBuilderFactory;
14 import javax.xml.parsers.ParserConfigurationException;
15 import javax.xml.transform.TransformerException;
16 import javax.xml.xpath.XPathExpressionException;
17 import org.exmaralda.partitureditor.jexmaralda.BasicTranscription;
18 import org.exmaralda.partitureditor.jexmaralda.Event;
19 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
20 import org.exmaralda.partitureditor.jexmaralda.Tier;
21 import org.w3c.dom.NodeList;
22 import org.xml.sax.SAXException;
23 
31 public class ExbAnnotationPanelCheck extends Checker implements CorpusFunction {
32 
33  ArrayList<String> allTagStrings;
34  String tierLoc = "";
35 
37  //no fixing option available
38  super(false);
39  }
40 
45  @Override
46  public Report function(CorpusData cd, Boolean fix)
47  throws SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException {
48  Report stats = new Report(); //create a new report
49  DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
50  DocumentBuilder db = dbf.newDocumentBuilder();
51  org.w3c.dom.Document doc = db.parse(TypeConverter.String2InputStream(cd.toSaveableString())); // get the file as a document
52 
53  if (cd.getURL().toString().endsWith(".xml")) { // when the annotation spec file is read
54  allTagStrings = new ArrayList<String>();
55  NodeList tags = doc.getElementsByTagName("tag"); // divide by tags
56  for (int i = 0; i < tags.getLength(); i++) { //iterate through tags
57  org.w3c.dom.Element tag = (org.w3c.dom.Element) tags.item(i);
58  allTagStrings.add(tag.getAttribute("name"));
59  }
60  } else { // when a basic transcription file is read
61  BasicTranscription basictranscription = new BasicTranscription();
62  basictranscription.BasicTranscriptionFromString(cd.toSaveableString());
63  for (int pos = 0; pos < basictranscription.getBody().getNumberOfTiers(); pos++) {
64  Tier tier = basictranscription.getBody().getTierAt(pos);
65  //single out only the annotation tiers
66  if (tier.getType().equals("a") && !tier.getCategory().equals("de") && !tier.getCategory().equals("en")
67  && !tier.getCategory().equals("ita") && !tier.getCategory().equals("fe")) {
68  //go through every event of that tier
69  for (int pos2 = 0; pos2 < tier.getNumberOfEvents(); pos2++) {
70  //get the event
71  Event event = tier.getEventAt(pos2);
72  //convert the event content to a string
73  //System.out.println(content);
74  String content = event.getDescription();
75  if (!content.isEmpty()) {
76  if (content.endsWith(" ")) { // scrap extra space at the end of the tag
77  content = content.substring(0, content.length() - 1);
78  }
79  // check if the content is contained in the possible tags
80  if (!(allTagStrings.contains(content))) {
81  System.out.println("Exb file " + cd.getURL().getFile().substring(cd.getURL().getFile().lastIndexOf("/") + 1) + " is containing annotation with incompatible tag ("
82  + content + ") in its tier " + tier.getID() + " for the event " + event.getStart() + " not specified by annotation spec file!");
83  stats.addWarning("exb-annotation-panel-check", "Exb file " + cd.getURL().getFile().substring(cd.getURL().getFile().lastIndexOf("/") + 1)
84  + " is containing annotation with incompatible tag (" + content
85  + ") in its tier " + tier.getID() + " for the event " + event.getStart() + " not specified by annotation spec file!");
86  exmaError.addError("exb-annotation-panel-check", cd.getURL().getFile(), tier.getID(), event.getStart(), false,
87  "Exb file " + cd.getURL().getFile().substring(cd.getURL().getFile().lastIndexOf("/") + 1) + " is containing annotation with incompatible tag (" + content
88  + ") in its tier " + tier.getID() + " for the event " + event.getStart() + " not specified by annotation spec file!");
89  }
90  }
91  }
92  }
93  }
94  }
95  return stats;
96  }
97 
103  @Override
104  public Collection<Class<? extends CorpusData>> getIsUsableFor() {
105  try {
106  Class cl = Class.forName("de.uni_hamburg.corpora.AnnotationSpecification");
107  Class clSecond = Class.forName("de.uni_hamburg.corpora.BasicTranscriptionData");
108  IsUsableFor.add(cl);
109  IsUsableFor.add(clSecond);
110  } catch (ClassNotFoundException ex) {
111  report.addException(ex, " usable class not found");
112  }
113  return IsUsableFor;
114  }
115 
120  @Override
121  public String getDescription() {
122  String description = "This class checks whether the annotations in exb "
123  + "files comply with the annotation specification panel. ";
124  return description;
125  }
126 
127  @Override
128  public Report function(Corpus c, Boolean fix) throws SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException {
129  Report stats = new Report();
130  for (CorpusData cdata : c.getBasicTranscriptionData()) {
131  stats.merge(function(cdata, fix));
132  }
133  for (CorpusData adata : c.getAnnotationspecification()) {
134  stats.merge(function(adata, fix));
135  }
136  return stats;
137  }
138 
139 }
void merge(Report sr)
Definition: Report.java:73
Collection< Class<?extends CorpusData > > getIsUsableFor()
void addWarning(String statId, String description)
Definition: Report.java:164
static InputStream String2InputStream(String s)
void addException(Throwable e, String description)
Definition: Report.java:287