6 package de.uni_hamburg.corpora.validation;
15 import java.io.IOException;
16 import java.net.URISyntaxException;
18 import java.security.NoSuchAlgorithmException;
19 import java.util.ArrayList;
20 import java.util.Collection;
21 import java.util.List;
22 import javax.xml.parsers.DocumentBuilder;
23 import javax.xml.parsers.DocumentBuilderFactory;
24 import javax.xml.parsers.ParserConfigurationException;
25 import javax.xml.transform.TransformerException;
26 import javax.xml.xpath.XPathExpressionException;
27 import org.exmaralda.partitureditor.fsm.FSMException;
28 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
29 import org.jdom.JDOMException;
30 import org.w3c.dom.Document;
31 import org.w3c.dom.Element;
32 import org.w3c.dom.NodeList;
33 import org.xml.sax.SAXException;
41 static List<String> whitelist;
42 static List<String> fileendingwhitelist;
58 throws SAXException, IOException, ParserConfigurationException, URISyntaxException, TransformerException, XPathExpressionException {
61 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
62 DocumentBuilder db = dbf.newDocumentBuilder();
64 NodeList reffiles = doc.getElementsByTagName(
"referenced-file");
65 ArrayList<String> refsInExb =
new ArrayList<String>();
66 for (
int i = 0; i < reffiles.getLength(); i++) {
67 Element reffile = (Element) reffiles.item(i);
68 String url = reffile.getAttribute(
"url");
70 if (url.startsWith(
"file:///C:") || url.startsWith(
"file:/C:")) {
71 stats.
addCritical(
function, cd,
"Referenced-file " + url
72 +
" points to absolute local path, fix to relative path first");
77 URL referencePath = cd.getParentURL();
79 File exbFolder =
new File(referencePath.toURI());
80 ArrayList<String> files =
new ArrayList<String>();
82 for (String absolutePath : files) {
83 String relativePath = absolutePath.substring(absolutePath.indexOf(exbFolder.getAbsolutePath()) + exbFolder.getAbsolutePath().length() + File.separator.length());
84 if (refsInExb.contains(absolutePath)) {
85 stats.
addCritical(
function, cd,
"Referenced-file " + absolutePath
86 +
" points to absolute local path, fix to relative path first");
87 }
else if (refsInExb.contains(relativePath)) {
88 stats.
addCorrect(
function, cd,
"File " + relativePath +
" found in the exb as a reference.");
90 stats.
addCritical(
function, cd,
"File " + relativePath +
" CANNOT be found in the exb as a reference!");
91 exmaError.addError(
function, cd.getURL().getFile(),
"",
"",
false,
"File " + relativePath +
" CANNOT be found in the exb as a reference!");
105 Class cl = Class.forName(
"de.uni_hamburg.corpora.BasicTranscriptionData");
107 }
catch (ClassNotFoundException ex) {
114 whitelist =
new ArrayList<String>();
115 whitelist.add(
".git");
116 whitelist.add(
".gitignore");
117 whitelist.add(
"README");
118 whitelist.add(
"Thumbs.db");
119 fileendingwhitelist =
new ArrayList<String>();
120 fileendingwhitelist.add(
"exb");
121 fileendingwhitelist.add(
"exs");
122 fileendingwhitelist.add(
"doc");
123 fileendingwhitelist.add(
"docx");
124 fileendingwhitelist.add(
"odt");
125 fileendingwhitelist.add(
"pdf");
126 fileendingwhitelist.add(
"rtf");
127 fileendingwhitelist.add(
"tex");
128 fileendingwhitelist.add(
"txt");
129 fileendingwhitelist.add(
"xml");
130 fileendingwhitelist.add(
"html");
131 fileendingwhitelist.add(
"flextext");
138 public static void search(File folder, List<String> result) {
139 for (File f : folder.listFiles()) {
140 if (f.isDirectory()) {
143 if (f.isFile() && !fileendingwhitelist.contains(getFileExtension(f))
144 && !whitelist.contains(f.getAbsolutePath())) {
145 result.add(f.getAbsolutePath());
150 private static String getFileExtension(File f) {
151 String extension =
"";
152 String fileName = f.getName();
153 int i = fileName.lastIndexOf(
'.');
154 int p = Math.max(fileName.lastIndexOf(
'/'), fileName.lastIndexOf(
'\\'));
157 extension = fileName.substring(i + 1);
168 String description =
"This class checks whether files are both in the " 169 +
"exb file and file system.";
174 public Report function(
Corpus c, Boolean fix)
throws NoSuchAlgorithmException, ClassNotFoundException, FSMException, URISyntaxException, SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException, JDOMException {
176 for (
CorpusData cdata : c.getBasicTranscriptionData()) {
177 stats.
merge(
function(cdata, fix));
static ExmaErrorList exmaError
void addCritical(String description)
Collection< Class<?extends CorpusData > > getIsUsableFor()
static void search(File folder, List< String > result)
void addCorrect(String statId, String description)
static InputStream String2InputStream(String s)
void addException(Throwable e, String description)
static void setWhitelist()