8 package de.uni_hamburg.corpora;
13 import java.io.InputStream;
14 import java.io.IOException;
15 import java.io.PrintWriter;
16 import java.util.Locale;
17 import java.util.Collection;
19 import javax.servlet.annotation.MultipartConfig;
20 import javax.servlet.ServletConfig;
21 import javax.servlet.ServletContext;
22 import javax.servlet.ServletException;
23 import javax.servlet.http.Part;
24 import javax.servlet.http.HttpServlet;
25 import javax.servlet.http.HttpServletRequest;
26 import javax.servlet.http.HttpServletResponse;
27 import javax.xml.transform.Source;
28 import javax.xml.transform.stream.StreamSource;
29 import javax.xml.validation.Schema;
30 import javax.xml.validation.SchemaFactory;
31 import javax.xml.validation.Validator;
32 import javax.xml.XMLConstants;
33 import org.xml.sax.SAXException;
34 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
47 public void init() throws ServletException {
49 final ServletConfig cfg = getServletConfig();
50 final ServletContext ctx = getServletContext();
68 protected void doGet(HttpServletRequest request,
69 HttpServletResponse response)
throws ServletException, IOException {
70 response.setStatus(HttpServletResponse.SC_OK);
71 response.setContentType(
"text/html");
72 response.setCharacterEncoding(
"UTF-8");
73 PrintWriter output = response.getWriter();
74 output.print(
"<!DOCTYPE html!>\n<html>\n <head>\n");
75 output.print(
" <title>HZSK corpus validation</title>\n");
76 output.print(
" </head>\n <body>\n");
77 output.print(
" <h1>HZSK validations</h1>\n");
78 output.print(
" <p>A Service as a service that validates quality of " 79 +
"your corpora files.</p>\n");
80 output.print(
" <form action='validate.coma' method='post' enctype='multipart/form-data'>\n");
81 output.print(
" <h2>Coma</h2>\n");
82 output.print(
" <input type='file' name='coma'/>\n");
83 output.print(
" <input type='submit' name='upload'/>\n");
84 output.print(
" </form>\n");
85 output.print(
" <form action='validate.exb' method='post' enctype='multipart/form-data'>\n");
86 output.print(
" <h2>Exb</h2>\n");
87 output.print(
" <input type='file' name='exb'/>\n");
88 output.print(
" <input type='submit' name='upload'/>\n");
89 output.print(
" </form>\n </body>\n</html>");
96 private void generateComaReport(InputStream coma, PrintWriter output)
97 throws SAXException, IOException {
99 Locale.setDefault(Locale.ENGLISH);
100 URL COMA_XSD =
new URL(
"http://www.exmaralda.org/xml/comacorpus.xsd");
101 Source xmlStream =
new StreamSource(coma);
102 SchemaFactory schemaFactory =
103 SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
104 Schema schema = schemaFactory.newSchema(COMA_XSD);
105 Validator validator = schema.newValidator();
107 validator.setErrorHandler(eh);
108 validator.validate(xmlStream);
109 output.print(
" <h3>coma validations</h3>\n");
110 output.print(
" <p>These errors are found in XML validation</p>\n");
112 output.print(
" </body>\n</html>");
115 private void generateExbReport(String filename, PrintWriter output)
116 throws SAXException, JexmaraldaException {
118 output.print(
" <h3>exb validations</h3>\n");
121 output.print(
" </body>\n</html>");
129 protected void doPost(HttpServletRequest request,
130 HttpServletResponse response)
throws ServletException, IOException {
131 PrintWriter output = response.getWriter();
133 response.setStatus(HttpServletResponse.SC_OK);
134 response.setContentType(
"text/html");
135 response.setCharacterEncoding(
"UTF-8");
136 output.print(
"<!DOCTYPE html!>\n<html>\n <head>\n");
137 output.print(
" <title>HZSK validation</title>\n");
138 output.print(
" </head>\n <body>\n");
139 output.print(
" <h1>HZSK corpus validations</h1>\n");
140 output.print(
" <p>These are the results of validating the uploaded" 142 Collection<Part> parts = request.getParts();
143 for (Part part : parts) {
144 String fileName = getSubmittedFileName(part);
145 if (fileName == null) {
148 output.print(
"<h2><code>" + fileName +
"</code></h2>");
149 InputStream fileInputStream = part.getInputStream();
150 File localFile = File.createTempFile(
"hzsk-validate",
".xml");
151 localFile.deleteOnExit();
152 String localFileName = localFile.getAbsolutePath();
153 part.write(localFileName);
154 if (part.getName().equals(
"coma")) {
155 generateComaReport(fileInputStream, output);
157 else if (part.getName().equals(
"exb")) {
158 generateExbReport(localFileName, output);
160 output.print(
"<h3>Unknown file type–No validations</h3>\n" +
161 "<p>There are no validation checks for this file " +
165 }
catch (ServletException se) {
166 output.print(
"<!DOCTYPE html>\n" +
167 "<html><head><title>Error</title></head>\n" +
168 " <body><h1>FAILED!</h1>\n" +
169 " <p>No uploaded founds filed.</p>\n" +
171 se.printStackTrace(output);
172 output.print(
"</pre>\n</body>\n</html>");
173 }
catch (SAXException saxe) {
174 output.print(
"<h1>FAILED!</h1>\n" +
175 " <p>not an XML file</p>\n" +
177 saxe.printStackTrace(output);
178 output.print(
"</pre>\n</body>\n</html>");
179 }
catch (JexmaraldaException je) {
180 output.print(
"<h1>FAILED!</h1>\n" +
181 " <p>not an exb file</p>\n" +
183 je.printStackTrace(output);
184 output.print(
"</pre>\n</body>\n</html>");
193 private static String getSubmittedFileName(Part part) {
194 for (String cd : part.getHeader(
"content-disposition").split(
";")) {
195 if (cd.trim().startsWith(
"filename")) {
196 String fileName = cd.substring(cd.indexOf(
'=') + 1).trim().replace(
"\"",
"");
197 return fileName.substring(fileName.lastIndexOf(
'/') + 1).substring(fileName.lastIndexOf(
'\\') + 1);
void doPost(HttpServletRequest request, HttpServletResponse response)
void doGet(HttpServletRequest request, HttpServletResponse response)