corpus-services  1.0
CmdiXsdChecker.java
Go to the documentation of this file.
1 
10 package de.uni_hamburg.corpora.validation;
11 
12 
13 
16 import java.io.File;
17 import java.io.FileInputStream;
18 import java.io.FileNotFoundException;
19 import java.io.IOException;
20 import java.net.URL;
21 import java.util.ArrayList;
22 import java.util.regex.Matcher;
23 import java.util.regex.Pattern;
24 import javax.xml.transform.Source;
25 import javax.xml.validation.Schema;
26 import javax.xml.validation.SchemaFactory;
27 import javax.xml.validation.Validator;
28 import javax.xml.XMLConstants;
29 import org.apache.commons.cli.Option;
30 import org.xml.sax.SAXException;
31 
36 public class CmdiXsdChecker {
37 
38  ValidatorSettings settings;
39 
40 
47  public Report check(String data) {
48  Report stats = new Report();
49  try {
50  stats = exceptionalCheck(data);
51  } catch(SAXException saxe) {
52  stats.addException(saxe, "Unknown parsing error.");
53  } catch(IOException ioe) {
54  stats.addException(ioe, "Unknown reading error.");
55  }
56  return stats;
57  }
58 
59 
60  private Report exceptionalCheck(String data)
61  throws SAXException, IOException {
62  // peek the profile first
63  Pattern xsdpattern = Pattern.compile("xsi2:schemaLocation\\s*=\\s*" +
64  "[\"']\\s*http://www.clarin.eu/cmd/?\\s\\s*([^\"']*)",
65  Pattern.MULTILINE);
66  Matcher xsdmatch = xsdpattern.matcher(data);
67  String cmdiProfileXsdURL;
68  if (!xsdmatch.find()) {
69  Report stats = new Report();
70  stats.addCritical("cmdi-xsd", "No CMDI XML schema found " +
71  "(should contain " +
72  "xsi2:schemalocation=\"http://www.clarin.eu/cmd )");
73  return stats;
74  } else {
75  cmdiProfileXsdURL = xsdmatch.group(1);
76  }
77  URL cmdiXsdURL = new URL(cmdiProfileXsdURL);
78  Source xmlStream = TypeConverter.String2StreamSource(data);
79  SchemaFactory schemaFactory =
80  SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
81  Schema schema = schemaFactory.newSchema(cmdiXsdURL);
82  Validator validator = schema.newValidator();
84  validator.setErrorHandler(eh);
85  validator.validate(xmlStream);
86  return eh.getErrors();
87  }
88 
89  public Report doMain(String[] args) {
90  settings = new ValidatorSettings("CmdiXsdChecker",
91  "Checks CMDI file against XML Schema",
92  "If input is a directory, performs recursive check " +
93  "from that directory, otherwise checks input file");
94  settings.handleCommandLine(args, new ArrayList<Option>());
95  if (settings.isVerbose()) {
96  System.out.println("Checking CMDI files against schema...");
97  }
98  Report stats = new Report();
99  for (File f : settings.getInputFiles()) {
100  if (settings.isVerbose()) {
101  System.out.println(" * " + f.getName());
102  }
103  try {
104  String s = TypeConverter.InputStream2String(new FileInputStream(f));
105  stats = check(s);
106  } catch (FileNotFoundException fnfe) {
107  fnfe.printStackTrace();
108  }
109  }
110  return stats;
111  }
112 
113  public static void main(String[] args) {
114  CmdiXsdChecker checker = new CmdiXsdChecker();
115  Report stats = checker.doMain(args);
116  System.out.println(stats.getSummaryLines());
117  System.out.println(stats.getErrorReports());
118  }
119 
120 }
CommandLine handleCommandLine(String[] args, List< Option > extraOptions)
static StreamSource String2StreamSource(String s)
void addCritical(String description)
Definition: Report.java:104
static String InputStream2String(InputStream is)
void addException(Throwable e, String description)
Definition: Report.java:287