corpus-services  1.0
ComaTierOverviewCreator.java
Go to the documentation of this file.
1 package de.uni_hamburg.corpora.validation;
2 
11 import java.util.ArrayList;
12 import java.io.IOException;
13 import java.net.URISyntaxException;
14 import java.net.URL;
15 import java.sql.Timestamp;
16 import java.util.Collection;
17 import java.util.Collections;
18 import java.util.TreeSet;
19 import java.util.List;
20 import java.util.Set;
21 import javax.xml.parsers.ParserConfigurationException;
22 import javax.xml.transform.TransformerException;
23 import javax.xml.xpath.XPathExpressionException;
24 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
25 import org.exmaralda.partitureditor.jexmaralda.Tier;
26 import org.xml.sax.SAXException;
27 
33 public class ComaTierOverviewCreator extends Checker implements CorpusFunction {
34 
35  String comaLoc = "";
36 
38  //no fixing available
39  super(false);
40  }
41 
47  public Report function(CorpusData cd, Boolean fix)
48  throws SAXException, IOException, ParserConfigurationException, URISyntaxException, TransformerException, XPathExpressionException, JexmaraldaException, ClassNotFoundException {
49  Report stats = new Report();
50  ComaData ccd = (ComaData) cd;
51  CorpusIO cio = new CorpusIO();
52  Collection<URL> resulturls;
53  ArrayList<Tier> tiers = new ArrayList<>();
54  ArrayList<BasicTranscriptionData> btds = new ArrayList<>();
55  String htmltemplate = TypeConverter.InputStream2String(getClass().getResourceAsStream("/xsl/tier_overview_datatable_template.html"));
56  String overviewTable = "";
57  String communicationsTable = "";
58  resulturls = ccd.getAllBasicTranscriptionURLs();
59  for (URL resulturl : resulturls) {
60  CorpusData cdexb = cio.readFileURL(resulturl);
61  if (cdexb!=null) {
63 
64  btds.add(btexb);
65  Tier t;
66  for (int i = 0; i < btexb.getEXMARaLDAbt().getBody().getNumberOfTiers(); i++) {
67  t = btexb.getEXMARaLDAbt().getBody().getTierAt(i);
68  tiers.add(t);
69  }
70  } else {
71  stats.addCritical(function, cd, "The linked basic transcription " + resulturl + " cannot be opened.");
72  }
73  }
74  List<String> stringtiers = new ArrayList<String>();
75  for (Tier tier : tiers) {
76  //stringtiers.add(tier.getCategory() + "-" + tier.getType() + "-" + tier.getDisplayName());
77  stringtiers.add(tier.getCategory() + " (type: " + tier.getType() + ")");
78  }
79  Set<String> hash_Set = new TreeSet<String>(stringtiers);
80  //System.out.println(tiers);
81  //now we have all the existing tiers from the exbs, we need to make a table out of it
82  //use the html template and add the content into id
83  if (!tiers.isEmpty()) {
84  // get the HTML stylesheet
85 
86  String h1 = "<h1> Tier Overview over Whole Corpus (" + resulturls.size() + " exbs) </h1>";
87  String header = "<table id=\"\" class=\"compact\">\n"
88  + " <thead>\n"
89  + " <tr>\n"
90  + " <th class=\"compact\">Category-Type-DisplayName</th>\n"
91  + " <th class=\"compact\">Number of Tiers</th>\n"
92  + " </tr>\n"
93  + " </thead>\n"
94  + " <tbody>\n";
95  /* for (Tier tier : tiers) {
96  //stringtiers.add(tier.getCategory() + "-" + tier.getType() + "-" + tier.getDisplayName());
97  stringtiers.add(tier.getCategory() + "-" + tier.getType());
98  } */
99  // add the tables to the html
100  //first table: one column with categories, one with count
101  // add the overviewTable to the html
102  //first table: one column with categories, one with count
103  String content = "";
104 
105  for (String s : hash_Set) {
106  content = content + "<tr><td class=\"compact\">" + s + "</td><td class=\"compact\">" + Collections.frequency(stringtiers, s) + "</td></tr>";
107  }
108  String footer = " </tbody>\n"
109  + "</table>";
110 
111  overviewTable = h1 + header + content + footer;
112 
113  } else {
114  stats.addWarning(function, cd, "No tiers found in the linked exbs. ");
115  }
116  //now each exb linked in the coma file
117  //TODO
118  if (!btds.isEmpty()) {
119  String h1 = "<h1> Tiers in each exb </h1>";
120  communicationsTable = h1;
121  //first is the column for filename, then all the tier category/type combinations
122  String header = "<table id=\"\" class=\"compact\">\n"
123  + " <thead>\n"
124  + "<th class=\"compact\"> Exb Filename </th>";
125  for (String s : hash_Set) {
126  header = header + "<th class=\"compact\">" + s + "</th>";
127  }
128  header = header + "</tr>"
129  + " </thead>\n"
130  + " <tbody>\n";
131  String content = "";
132  for (BasicTranscriptionData btd : btds) {
133  //first is the column for filename, then all the tier category/type combinations
134  content = content + "<tr><td class=\"compact\">" + btd.getFilename() + "</td>";
135  for (String s : hash_Set) {
136  //TO DO
137  String[] catType = s.split("type: ");
138  String category = catType[0].substring(0, catType[0].length() - 2);
139  String type = catType[1].substring(0, catType[1].length() - 1);
140  String[] ids = btd.getEXMARaLDAbt().getBody().getTiersOfType(type);
141  int noOfEvents = 0;
142  boolean existence = false;
143  if (ids.length > 0) {
144  for (String id : ids) {
145  if (category.equals(btd.getEXMARaLDAbt().getBody().getTierWithID(id).getCategory())) {
146  noOfEvents += btd.getEXMARaLDAbt().getBody().getTierWithID(id).getNumberOfEvents();
147  existence = true;
148  }
149  }
150  if (existence) {
151  if (noOfEvents > 0) {
152  content = content + "<td class=\"compact\">" + noOfEvents + "</td>";
153  } else {
154  content = content + "<td class=\"compact\">0</td>";
155  }
156  } else {
157  content = content + "<td class=\"compact\"></td>";
158  }
159  } else {
160  content = content + "<td class=\"compact\"></td>";
161  }
162  }
163  content = content + "</tr>";
164  }
165 
166  String footer = " </tr>\n"
167  + " </tbody>\n"
168  + "</table>";
169  communicationsTable = h1 + header + content + footer;
170  } else {
171  stats.addWarning(function, cd, "No linked exbs found in the coma file. ");
172  }
173  String htmlend = " </body>\n</html>";
174  //add timestamp
175  String timestamp = "";
176  timestamp += " <div id='timestamp'>Generated: ";
177  Timestamp time = new Timestamp(System.currentTimeMillis());
178  timestamp += time + "</div>\n";
179  String result = htmltemplate + timestamp + overviewTable + communicationsTable + htmlend;
180  //String result = htmltemplate + overviewTable;
181 
182  URL overviewurl = new URL(cd.getParentURL(), "curation/tier_overview.html");
183  cio.write(result, overviewurl);
184 
185  stats.addCorrect(function, cd, "created tier overview at " + overviewurl);
186 
187  return stats; // return the report with warnings
188  }
189 
195  @Override
196  public Collection<Class<? extends CorpusData>> getIsUsableFor() {
197  try {
198  Class cl = Class.forName("de.uni_hamburg.corpora.ComaData");
199  IsUsableFor.add(cl);
200  } catch (ClassNotFoundException ex) {
201  report.addException(ex, "Usable class not found.");
202  }
203  return IsUsableFor;
204  }
205 
210  @Override
211  public String getDescription() {
212  String description = "This class creates a sort- and filterable html overview in table form "
213  + " of all tiers existing in the exbs linked in the coma file to make error "
214  + "checking and harmonizing easier. ";
215  return description;
216  }
217 
218  @Override
219  public Report function(Corpus c, Boolean fix) throws SAXException, IOException, ParserConfigurationException, URISyntaxException, TransformerException, XPathExpressionException, JexmaraldaException, ClassNotFoundException {
220  Report stats;
221  cd = c.getComaData();
222  stats = function(cd, fix);
223  return stats;
224  }
225 
226 }
CorpusData readFileURL(URL url, Collection< Class<?extends CorpusData >> clcds)
Definition: CorpusIO.java:125
Collection< Class<?extends CorpusData > > getIsUsableFor()
void addCritical(String description)
Definition: Report.java:104
static String InputStream2String(InputStream is)
void addWarning(String statId, String description)
Definition: Report.java:164
void addCorrect(String statId, String description)
Definition: Report.java:217
Collection< URL > getAllBasicTranscriptionURLs()
Definition: ComaData.java:126
void addException(Throwable e, String description)
Definition: Report.java:287
void write(CorpusData cd, URL url)
Definition: CorpusIO.java:66