corpus-services  1.0
HzskValidationServlet.java
Go to the documentation of this file.
1 
8 package de.uni_hamburg.corpora;
9 
11 import java.net.URL;
12 import java.io.File;
13 import java.io.InputStream;
14 import java.io.IOException;
15 import java.io.PrintWriter;
16 import java.util.Locale;
17 import java.util.Collection;
18 
19 import javax.servlet.annotation.MultipartConfig;
20 import javax.servlet.ServletConfig;
21 import javax.servlet.ServletContext;
22 import javax.servlet.ServletException;
23 import javax.servlet.http.Part;
24 import javax.servlet.http.HttpServlet;
25 import javax.servlet.http.HttpServletRequest;
26 import javax.servlet.http.HttpServletResponse;
27 import javax.xml.transform.Source;
28 import javax.xml.transform.stream.StreamSource;
29 import javax.xml.validation.Schema;
30 import javax.xml.validation.SchemaFactory;
31 import javax.xml.validation.Validator;
32 import javax.xml.XMLConstants;
33 import org.xml.sax.SAXException;
34 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
35 
39 @MultipartConfig
40 public class HzskValidationServlet extends HttpServlet {
41 
42 
46  @Override
47  public void init() throws ServletException {
48  super.init();
49  final ServletConfig cfg = getServletConfig();
50  final ServletContext ctx = getServletContext();
51  }
52 
53 
57  @Override
58  public void destroy() {
59  super.destroy();
60  }
61 
62 
67  @Override
68  protected void doGet(HttpServletRequest request,
69  HttpServletResponse response) throws ServletException, IOException {
70  response.setStatus(HttpServletResponse.SC_OK);
71  response.setContentType("text/html");
72  response.setCharacterEncoding("UTF-8");
73  PrintWriter output = response.getWriter();
74  output.print("<!DOCTYPE html!>\n<html>\n <head>\n");
75  output.print(" <title>HZSK corpus validation</title>\n");
76  output.print(" </head>\n <body>\n");
77  output.print(" <h1>HZSK validations</h1>\n");
78  output.print(" <p>A Service as a service that validates quality of "
79  + "your corpora files.</p>\n");
80  output.print(" <form action='validate.coma' method='post' enctype='multipart/form-data'>\n");
81  output.print(" <h2>Coma</h2>\n");
82  output.print(" <input type='file' name='coma'/>\n");
83  output.print(" <input type='submit' name='upload'/>\n");
84  output.print(" </form>\n");
85  output.print(" <form action='validate.exb' method='post' enctype='multipart/form-data'>\n");
86  output.print(" <h2>Exb</h2>\n");
87  output.print(" <input type='file' name='exb'/>\n");
88  output.print(" <input type='submit' name='upload'/>\n");
89  output.print(" </form>\n </body>\n</html>");
90  output.flush();
91  }
92 
96  private void generateComaReport(InputStream coma, PrintWriter output)
97  throws SAXException, IOException {
98  // XXX: some validation depends on language of exception message
99  Locale.setDefault(Locale.ENGLISH);
100  URL COMA_XSD = new URL("http://www.exmaralda.org/xml/comacorpus.xsd");
101  Source xmlStream = new StreamSource(coma);
102  SchemaFactory schemaFactory =
103  SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
104  Schema schema = schemaFactory.newSchema(COMA_XSD);
105  Validator validator = schema.newValidator();
107  validator.setErrorHandler(eh);
108  validator.validate(xmlStream);
109  output.print(" <h3>coma validations</h3>\n");
110  output.print(" <p>These errors are found in XML validation</p>\n");
111  //output.print(ErrorMessage.generateHTML(eh.getErrors()));
112  output.print(" </body>\n</html>");
113  }
114 
115  private void generateExbReport(String filename, PrintWriter output)
116  throws SAXException, JexmaraldaException {
117  //ExbErrorChecker checker = new ExbErrorChecker(filename);
118  output.print(" <h3>exb validations</h3>\n");
120  // checker.getStructureErrors()));
121  output.print(" </body>\n</html>");
122  }
123 
128  @Override
129  protected void doPost(HttpServletRequest request,
130  HttpServletResponse response) throws ServletException, IOException {
131  PrintWriter output = response.getWriter();
132  try {
133  response.setStatus(HttpServletResponse.SC_OK);
134  response.setContentType("text/html");
135  response.setCharacterEncoding("UTF-8");
136  output.print("<!DOCTYPE html!>\n<html>\n <head>\n");
137  output.print(" <title>HZSK validation</title>\n");
138  output.print(" </head>\n <body>\n");
139  output.print(" <h1>HZSK corpus validations</h1>\n");
140  output.print(" <p>These are the results of validating the uploaded"
141  + " files</p>\n");
142  Collection<Part> parts = request.getParts();
143  for (Part part : parts) {
144  String fileName = getSubmittedFileName(part);
145  if (fileName == null) {
146  continue;
147  }
148  output.print("<h2><code>" + fileName + "</code></h2>");
149  InputStream fileInputStream = part.getInputStream();
150  File localFile = File.createTempFile("hzsk-validate", ".xml");
151  localFile.deleteOnExit();
152  String localFileName = localFile.getAbsolutePath();
153  part.write(localFileName);
154  if (part.getName().equals("coma")) {
155  generateComaReport(fileInputStream, output);
156  }
157  else if (part.getName().equals("exb")) {
158  generateExbReport(localFileName, output);
159  } else {
160  output.print("<h3>Unknown file type–No validations</h3>\n" +
161  "<p>There are no validation checks for this file " +
162  "type.</p>\n");
163  }
164  }
165  } catch (ServletException se) {
166  output.print("<!DOCTYPE html>\n" +
167  "<html><head><title>Error</title></head>\n" +
168  " <body><h1>FAILED!</h1>\n" +
169  " <p>No uploaded founds filed.</p>\n" +
170  "<pre>");
171  se.printStackTrace(output);
172  output.print("</pre>\n</body>\n</html>");
173  } catch (SAXException saxe) {
174  output.print("<h1>FAILED!</h1>\n" +
175  " <p>not an XML file</p>\n" +
176  "<pre>");
177  saxe.printStackTrace(output);
178  output.print("</pre>\n</body>\n</html>");
179  } catch (JexmaraldaException je) {
180  output.print("<h1>FAILED!</h1>\n" +
181  " <p>not an exb file</p>\n" +
182  "<pre>");
183  je.printStackTrace(output);
184  output.print("</pre>\n</body>\n</html>");
185  }
186  output.flush();
187  }
188 
193  private static String getSubmittedFileName(Part part) {
194  for (String cd : part.getHeader("content-disposition").split(";")) {
195  if (cd.trim().startsWith("filename")) {
196  String fileName = cd.substring(cd.indexOf('=') + 1).trim().replace("\"", "");
197  return fileName.substring(fileName.lastIndexOf('/') + 1).substring(fileName.lastIndexOf('\\') + 1); // MSIE fix.
198  }
199  }
200  return null;
201  }
202 }
void doPost(HttpServletRequest request, HttpServletResponse response)
void doGet(HttpServletRequest request, HttpServletResponse response)