corpus-services  1.0
ReportStatistics.java
Go to the documentation of this file.
1 package de.uni_hamburg.corpora.validation;
2 
7 import java.io.File;
8 import java.io.FileInputStream;
9 import java.io.FileOutputStream;
10 import java.io.IOException;
11 import java.io.PrintWriter;
12 import java.io.UnsupportedEncodingException;
13 import java.net.URISyntaxException;
14 import java.text.SimpleDateFormat;
15 import java.util.Collection;
16 import java.util.Date;
17 import java.util.regex.Matcher;
18 import java.util.regex.Pattern;
19 import javax.xml.parsers.ParserConfigurationException;
20 import javax.xml.transform.TransformerException;
21 import javax.xml.xpath.XPathExpressionException;
22 import org.apache.commons.io.IOUtils;
23 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
24 import org.jdom.JDOMException;
25 import org.xml.sax.SAXException;
26 
31 public class ReportStatistics extends Checker implements CorpusFunction {
32 
33  private static final String HTML_REPORT = "report-output.html";
34  String REPORT_STATISTICS;
35  CorpusData cd;
36  String corpusname = "";
37 
38  public ReportStatistics() {
39  //no fixing available
40  super(false);
41  }
42 
43  @Override
44  public Report function(CorpusData cd, Boolean fix)
45  throws SAXException, IOException, ParserConfigurationException, URISyntaxException, TransformerException, XPathExpressionException, JexmaraldaException {
46  Report stats = new Report();
47  String reportStatisticsPath = cd.getParentURL().getPath() + "curation/report-statistics.html";
48  String htmlReportPath = cd.getParentURL().getPath() + "curation/" +HTML_REPORT;
49  File htmlReportFile = new File(htmlReportPath);
50  if (htmlReportFile.isFile()) {
51  FileInputStream fis = new FileInputStream(htmlReportPath);
52  String html = IOUtils.toString(fis);
53  //Just operate on the substring we really want
54  //To do: We do not want the total sum added here!
55  Pattern singleStatistics = Pattern.compile("(?<!Total): [0-9\\\\.]{1,3} %: [0-9\\\\.]+ OK, [0-9\\\\.]+ bad, [0-9\\\\.]+ warnings and [0-9\\\\.]+ unknown. = [0-9\\\\.]+ items.");
56  Matcher statistics = singleStatistics.matcher(html);
57  String htmlmatch = "";
58  while (statistics.find()) {
59  htmlmatch += statistics.group();
60  }
61  File reportStatFile = new File(reportStatisticsPath);
62  if (reportStatFile.isFile()) {
63  String reportStatistics = IOUtils.toString(new FileInputStream(reportStatisticsPath));
64  Pattern ok = Pattern.compile("[0-9\\.]+ OK"); // get okay messages
65  Pattern bad = Pattern.compile("[0-9\\.]+ bad"); // get critical errors
66  Pattern warnings = Pattern.compile("[0-9\\.]+ warnings"); // get warnings
67  Pattern unknown = Pattern.compile("[0-9\\.]+ unknown"); // get unknown messages
68  Matcher mOk = ok.matcher(htmlmatch);
69  Matcher mBad = bad.matcher(htmlmatch);
70  Matcher mWarnings = warnings.matcher(htmlmatch);
71  Matcher mUnknown = unknown.matcher(htmlmatch);
72  int nOK = 0;
73  while (mOk.find()) {
74  String sOk = mOk.group();
75  nOK += Integer.parseInt(sOk.substring(0, sOk.indexOf("OK") - 1).replaceAll("\\.", ""));
76  }
77  int nBad = 0;
78  while (mBad.find()) {
79  String sBad = mBad.group();
80  nBad += Integer.parseInt(sBad.substring(0, sBad.indexOf("bad") - 1).replaceAll("\\.", ""));
81  }
82  int nWarnings = 0;
83  while (mWarnings.find()) {
84  String sWarnings = mWarnings.group();
85  nWarnings += Integer.parseInt(sWarnings.substring(0, sWarnings.indexOf("warnings") - 1).replaceAll("\\.", ""));
86  }
87  int nUnknown = 0;
88  while (mUnknown.find()) {
89  String sUnknown = mUnknown.group();
90  nUnknown += Integer.parseInt(sUnknown.substring(0, sUnknown.indexOf("unknown") - 1).replaceAll("\\.", ""));
91  }
92 
93  if (reportStatistics.indexOf("var labelCSV") != -1) {
94  Date date = new Date();
95  SimpleDateFormat formatter = new SimpleDateFormat("dd.MM.yyyy");
96  String strDate = formatter.format(date);
97  int sIndex = reportStatistics.indexOf("var labelCSV");
98  int eIndex = reportStatistics.indexOf(";", sIndex);
99  String exData = reportStatistics.substring(sIndex, eIndex + 1);
100  String newData = exData.replace("\";", "," + strDate + "\";");
101  reportStatistics = reportStatistics.replace(exData, newData);
102  }
103  if (reportStatistics.indexOf("var criticalsCSV") != -1) {
104  int sIndex = reportStatistics.indexOf("var criticalsCSV");
105  int eIndex = reportStatistics.indexOf(";", sIndex);
106  String exData = reportStatistics.substring(sIndex, eIndex + 1);
107  String newData = exData.replace("\";", "," + Integer.toString(nBad) + "\";");
108  reportStatistics = reportStatistics.replace(exData, newData);
109  }
110  if (reportStatistics.indexOf("var warningsCSV") != -1) {
111  int sIndex = reportStatistics.indexOf("var warningsCSV");
112  int eIndex = reportStatistics.indexOf(";", sIndex);
113  String exData = reportStatistics.substring(sIndex, eIndex + 1);
114  String newData = exData.replace("\";", "," + Integer.toString(nWarnings) + "\";");
115  reportStatistics = reportStatistics.replace(exData, newData);
116  }
117 
118  if (reportStatistics.indexOf("var notesCSV") != -1) {
119  int sIndex = reportStatistics.indexOf("var notesCSV");
120  int eIndex = reportStatistics.indexOf(";", sIndex);
121  String exData = reportStatistics.substring(sIndex, eIndex + 1);
122  String newData = exData.replace("\";", "," + Integer.toString(nOK) + "\";");
123  reportStatistics = reportStatistics.replace(exData, newData);
124  }
125  PrintWriter htmlOut = new PrintWriter(new FileOutputStream(reportStatisticsPath));
126  htmlOut.print(reportStatistics);
127  htmlOut.close();
128 
129  stats.addFix(function, cd, "Report Statistics file updated (see " + htmlReportPath + ").");
130  } else {
131  stats.addMissing(function, cd, "Corpus Report file not found "
132  + "at '" + htmlReportPath + "'. Report Statistics (graphic overview) not updated.");
133  }
134  } else {
135  stats.addMissing(function, cd, "Report Statistics file not found at "
136  + "'" + reportStatisticsPath + "'. Report Statistics (graphic overview) not updated.");
137  }
138  return stats;
139  }
140 
141  @Override
142  public Collection<Class<? extends CorpusData>> getIsUsableFor() {
143  try {
144  Class cl = Class.forName("de.uni_hamburg.corpora.ComaData");
145  IsUsableFor.add(cl);
146  } catch (ClassNotFoundException ex) {
147  report.addException(ex, "Usable class not found.");
148  }
149  return IsUsableFor;
150  }
151 
155  @Override
156  public String getDescription() {
157  String description = "This class creates or updates the html statistics report"
158  + " from the report output file outputted by the corpus services.";
159  return description;
160  }
161 
162  @Override
163  public Report function(Corpus c, Boolean fix) throws SAXException, JDOMException, IOException, JexmaraldaException, TransformerException, ParserConfigurationException, UnsupportedEncodingException, XPathExpressionException, URISyntaxException {
164  Report stats = new Report();
165  CorpusData cdata = c.getComaData();
166  stats = function(cdata, fix);
167  return stats;
168  }
169 }
void addMissing(String statId, String description)
Definition: Report.java:199
Collection< Class<?extends CorpusData > > getIsUsableFor()
void addException(Throwable e, String description)
Definition: Report.java:287
void addFix(String statId, CorpusData cd, String description)
Definition: Report.java:155