corpus-services  1.0
ExbFileCoverageChecker.java
Go to the documentation of this file.
1 /*
2  * To change this license header, choose License Headers in Project Properties.
3  * To change this template file, choose Tools | Templates
4  * and open the template in the editor.
5  */
6 package de.uni_hamburg.corpora.validation;
7 
14 import java.io.File;
15 import java.io.IOException;
16 import java.net.URISyntaxException;
17 import java.net.URL;
18 import java.security.NoSuchAlgorithmException;
19 import java.util.ArrayList;
20 import java.util.Collection;
21 import java.util.List;
22 import javax.xml.parsers.DocumentBuilder;
23 import javax.xml.parsers.DocumentBuilderFactory;
24 import javax.xml.parsers.ParserConfigurationException;
25 import javax.xml.transform.TransformerException;
26 import javax.xml.xpath.XPathExpressionException;
27 import org.exmaralda.partitureditor.fsm.FSMException;
28 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
29 import org.jdom.JDOMException;
30 import org.w3c.dom.Document;
31 import org.w3c.dom.Element;
32 import org.w3c.dom.NodeList;
33 import org.xml.sax.SAXException;
34 
39 public class ExbFileCoverageChecker extends Checker implements CorpusFunction {
40 
41  static List<String> whitelist;
42  static List<String> fileendingwhitelist;
43 
45  //no fixing available
46  super(false);
47  // these are acceptable
48  setWhitelist();
49 
50  }
51 
56  @Override
57  public Report function(CorpusData cd, Boolean fix)
58  throws SAXException, IOException, ParserConfigurationException, URISyntaxException, TransformerException, XPathExpressionException {
59  Report stats = new Report();
60  // FIXME:
61  DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
62  DocumentBuilder db = dbf.newDocumentBuilder();
63  Document doc = db.parse(TypeConverter.String2InputStream(cd.toSaveableString())); // get the file as a document
64  NodeList reffiles = doc.getElementsByTagName("referenced-file");
65  ArrayList<String> refsInExb = new ArrayList<String>();
66  for (int i = 0; i < reffiles.getLength(); i++) {
67  Element reffile = (Element) reffiles.item(i);
68  String url = reffile.getAttribute("url");
69  if (!url.isEmpty()) {
70  if (url.startsWith("file:///C:") || url.startsWith("file:/C:")) {
71  stats.addCritical(function, cd, "Referenced-file " + url
72  + " points to absolute local path, fix to relative path first");
73  }
74  refsInExb.add(url);
75  }
76  }
77  URL referencePath = cd.getParentURL();
78 
79  File exbFolder = new File(referencePath.toURI());
80  ArrayList<String> files = new ArrayList<String>();
81  search(exbFolder, files);
82  for (String absolutePath : files) {
83  String relativePath = absolutePath.substring(absolutePath.indexOf(exbFolder.getAbsolutePath()) + exbFolder.getAbsolutePath().length() + File.separator.length());
84  if (refsInExb.contains(absolutePath)) {
85  stats.addCritical(function, cd, "Referenced-file " + absolutePath
86  + " points to absolute local path, fix to relative path first");
87  } else if (refsInExb.contains(relativePath)) {
88  stats.addCorrect(function, cd, "File " + relativePath + " found in the exb as a reference.");
89  } else {
90  stats.addCritical(function, cd, "File " + relativePath + " CANNOT be found in the exb as a reference!");
91  exmaError.addError(function, cd.getURL().getFile(), "", "", false, "File " + relativePath + " CANNOT be found in the exb as a reference!");
92  }
93  }
94  return stats;
95  }
96 
102  @Override
103  public Collection<Class<? extends CorpusData>> getIsUsableFor() {
104  try {
105  Class cl = Class.forName("de.uni_hamburg.corpora.BasicTranscriptionData");
106  IsUsableFor.add(cl);
107  } catch (ClassNotFoundException ex) {
108  report.addException(ex, "Usable class not found.");
109  }
110  return IsUsableFor;
111  }
112 
113  public static void setWhitelist() {
114  whitelist = new ArrayList<String>();
115  whitelist.add(".git");
116  whitelist.add(".gitignore");
117  whitelist.add("README");
118  whitelist.add("Thumbs.db");
119  fileendingwhitelist = new ArrayList<String>();
120  fileendingwhitelist.add("exb");
121  fileendingwhitelist.add("exs");
122  fileendingwhitelist.add("doc");
123  fileendingwhitelist.add("docx");
124  fileendingwhitelist.add("odt");
125  fileendingwhitelist.add("pdf");
126  fileendingwhitelist.add("rtf");
127  fileendingwhitelist.add("tex");
128  fileendingwhitelist.add("txt");
129  fileendingwhitelist.add("xml");
130  fileendingwhitelist.add("html");
131  fileendingwhitelist.add("flextext");
132  }
133 
138  public static void search(File folder, List<String> result) {
139  for (File f : folder.listFiles()) {
140  if (f.isDirectory()) {
141  search(f, result);
142  }
143  if (f.isFile() && !fileendingwhitelist.contains(getFileExtension(f))
144  && !whitelist.contains(f.getAbsolutePath())) {
145  result.add(f.getAbsolutePath());
146  }
147  }
148  }
149 
150  private static String getFileExtension(File f) {
151  String extension = "";
152  String fileName = f.getName();
153  int i = fileName.lastIndexOf('.');
154  int p = Math.max(fileName.lastIndexOf('/'), fileName.lastIndexOf('\\'));
155 
156  if (i > p) {
157  extension = fileName.substring(i + 1);
158  }
159  return extension;
160  }
161 
166  @Override
167  public String getDescription() {
168  String description = "This class checks whether files are both in the "
169  + "exb file and file system.";
170  return description;
171  }
172 
173  @Override
174  public Report function(Corpus c, Boolean fix) throws NoSuchAlgorithmException, ClassNotFoundException, FSMException, URISyntaxException, SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException, JDOMException {
175  Report stats = new Report();
176  for (CorpusData cdata : c.getBasicTranscriptionData()) {
177  stats.merge(function(cdata, fix));
178  }
179  return stats;
180  }
181 }
void merge(Report sr)
Definition: Report.java:73
void addCritical(String description)
Definition: Report.java:104
Collection< Class<?extends CorpusData > > getIsUsableFor()
static void search(File folder, List< String > result)
void addCorrect(String statId, String description)
Definition: Report.java:217
static InputStream String2InputStream(String s)
void addException(Throwable e, String description)
Definition: Report.java:287