1 package de.uni_hamburg.corpora.validation;
8 import java.io.FileInputStream;
9 import java.io.FileOutputStream;
10 import java.io.IOException;
11 import java.io.PrintWriter;
12 import java.io.UnsupportedEncodingException;
13 import java.net.URISyntaxException;
14 import java.text.SimpleDateFormat;
15 import java.util.Collection;
16 import java.util.Date;
17 import java.util.regex.Matcher;
18 import java.util.regex.Pattern;
19 import javax.xml.parsers.ParserConfigurationException;
20 import javax.xml.transform.TransformerException;
21 import javax.xml.xpath.XPathExpressionException;
22 import org.apache.commons.io.IOUtils;
23 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
24 import org.jdom.JDOMException;
25 import org.xml.sax.SAXException;
33 private static final String HTML_REPORT =
"report-output.html";
34 String REPORT_STATISTICS;
36 String corpusname =
"";
45 throws SAXException, IOException, ParserConfigurationException, URISyntaxException, TransformerException, XPathExpressionException, JexmaraldaException {
47 String reportStatisticsPath = cd.getParentURL().getPath() +
"curation/report-statistics.html";
48 String htmlReportPath = cd.getParentURL().getPath() +
"curation/" +HTML_REPORT;
49 File htmlReportFile =
new File(htmlReportPath);
50 if (htmlReportFile.isFile()) {
51 FileInputStream fis =
new FileInputStream(htmlReportPath);
52 String html = IOUtils.toString(fis);
55 Pattern singleStatistics = Pattern.compile(
"(?<!Total): [0-9\\\\.]{1,3} %: [0-9\\\\.]+ OK, [0-9\\\\.]+ bad, [0-9\\\\.]+ warnings and [0-9\\\\.]+ unknown. = [0-9\\\\.]+ items.");
56 Matcher statistics = singleStatistics.matcher(html);
57 String htmlmatch =
"";
58 while (statistics.find()) {
59 htmlmatch += statistics.group();
61 File reportStatFile =
new File(reportStatisticsPath);
62 if (reportStatFile.isFile()) {
63 String reportStatistics = IOUtils.toString(
new FileInputStream(reportStatisticsPath));
64 Pattern ok = Pattern.compile(
"[0-9\\.]+ OK");
65 Pattern bad = Pattern.compile(
"[0-9\\.]+ bad");
66 Pattern warnings = Pattern.compile(
"[0-9\\.]+ warnings");
67 Pattern unknown = Pattern.compile(
"[0-9\\.]+ unknown");
68 Matcher mOk = ok.matcher(htmlmatch);
69 Matcher mBad = bad.matcher(htmlmatch);
70 Matcher mWarnings = warnings.matcher(htmlmatch);
71 Matcher mUnknown = unknown.matcher(htmlmatch);
74 String sOk = mOk.group();
75 nOK += Integer.parseInt(sOk.substring(0, sOk.indexOf(
"OK") - 1).replaceAll(
"\\.",
""));
79 String sBad = mBad.group();
80 nBad += Integer.parseInt(sBad.substring(0, sBad.indexOf(
"bad") - 1).replaceAll(
"\\.",
""));
83 while (mWarnings.find()) {
84 String sWarnings = mWarnings.group();
85 nWarnings += Integer.parseInt(sWarnings.substring(0, sWarnings.indexOf(
"warnings") - 1).replaceAll(
"\\.",
""));
88 while (mUnknown.find()) {
89 String sUnknown = mUnknown.group();
90 nUnknown += Integer.parseInt(sUnknown.substring(0, sUnknown.indexOf(
"unknown") - 1).replaceAll(
"\\.",
""));
93 if (reportStatistics.indexOf(
"var labelCSV") != -1) {
94 Date date =
new Date();
95 SimpleDateFormat formatter =
new SimpleDateFormat(
"dd.MM.yyyy");
96 String strDate = formatter.format(date);
97 int sIndex = reportStatistics.indexOf(
"var labelCSV");
98 int eIndex = reportStatistics.indexOf(
";", sIndex);
99 String exData = reportStatistics.substring(sIndex, eIndex + 1);
100 String newData = exData.replace(
"\";",
"," + strDate +
"\";");
101 reportStatistics = reportStatistics.replace(exData, newData);
103 if (reportStatistics.indexOf(
"var criticalsCSV") != -1) {
104 int sIndex = reportStatistics.indexOf(
"var criticalsCSV");
105 int eIndex = reportStatistics.indexOf(
";", sIndex);
106 String exData = reportStatistics.substring(sIndex, eIndex + 1);
107 String newData = exData.replace(
"\";",
"," + Integer.toString(nBad) +
"\";");
108 reportStatistics = reportStatistics.replace(exData, newData);
110 if (reportStatistics.indexOf(
"var warningsCSV") != -1) {
111 int sIndex = reportStatistics.indexOf(
"var warningsCSV");
112 int eIndex = reportStatistics.indexOf(
";", sIndex);
113 String exData = reportStatistics.substring(sIndex, eIndex + 1);
114 String newData = exData.replace(
"\";",
"," + Integer.toString(nWarnings) +
"\";");
115 reportStatistics = reportStatistics.replace(exData, newData);
118 if (reportStatistics.indexOf(
"var notesCSV") != -1) {
119 int sIndex = reportStatistics.indexOf(
"var notesCSV");
120 int eIndex = reportStatistics.indexOf(
";", sIndex);
121 String exData = reportStatistics.substring(sIndex, eIndex + 1);
122 String newData = exData.replace(
"\";",
"," + Integer.toString(nOK) +
"\";");
123 reportStatistics = reportStatistics.replace(exData, newData);
125 PrintWriter htmlOut =
new PrintWriter(
new FileOutputStream(reportStatisticsPath));
126 htmlOut.print(reportStatistics);
129 stats.
addFix(
function, cd,
"Report Statistics file updated (see " + htmlReportPath +
").");
131 stats.
addMissing(
function, cd,
"Corpus Report file not found " 132 +
"at '" + htmlReportPath +
"'. Report Statistics (graphic overview) not updated.");
135 stats.
addMissing(
function, cd,
"Report Statistics file not found at " 136 +
"'" + reportStatisticsPath +
"'. Report Statistics (graphic overview) not updated.");
144 Class cl = Class.forName(
"de.uni_hamburg.corpora.ComaData");
146 }
catch (ClassNotFoundException ex) {
157 String description =
"This class creates or updates the html statistics report" 158 +
" from the report output file outputted by the corpus services.";
163 public Report function(
Corpus c, Boolean fix)
throws SAXException, JDOMException, IOException, JexmaraldaException, TransformerException, ParserConfigurationException, UnsupportedEncodingException, XPathExpressionException, URISyntaxException {
166 stats =
function(cdata, fix);
void addMissing(String statId, String description)
Collection< Class<?extends CorpusData > > getIsUsableFor()
void addException(Throwable e, String description)
void addFix(String statId, CorpusData cd, String description)