corpus-services  1.0
ComaNSLinksChecker.java
Go to the documentation of this file.
1 
9 package de.uni_hamburg.corpora.validation;
10 
16 import java.io.File;
17 import java.io.IOException;
18 import java.net.URL;
19 import java.util.Collection;
20 import javax.xml.parsers.ParserConfigurationException;
21 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
22 import org.jdom.JDOMException;
23 import org.w3c.dom.Document;
24 import org.w3c.dom.Element;
25 import org.w3c.dom.Node;
26 import org.w3c.dom.NodeList;
27 import org.w3c.dom.Text;
28 import org.xml.sax.SAXException;
30 import java.net.URI;
31 import java.net.URISyntaxException;
32 import java.nio.file.Paths;
33 import javax.xml.transform.TransformerException;
34 import javax.xml.xpath.XPathExpressionException;
35 
39 public class ComaNSLinksChecker extends Checker implements CorpusFunction {
40 
41  String referencePath = "./";
42  String comaLoc = "";
43  String communicationname;
44 
45  public ComaNSLinksChecker() {
46  //no fixing available
47  super(false);
48  }
49 
50  @Override
51  public Report function(Corpus c, Boolean fix) throws SAXException, JDOMException, IOException, JexmaraldaException, ParserConfigurationException, URISyntaxException, TransformerException, XPathExpressionException {
52  Report stats = new Report();
53  cd = c.getComaData();
54  stats = function(cd, fix);
55  return stats;
56  }
57 
58  @Override
59  public Report function(CorpusData cd, Boolean fix)
60  throws SAXException, IOException, ParserConfigurationException, URISyntaxException, TransformerException, XPathExpressionException {
61  Document doc = TypeConverter.JdomDocument2W3cDocument(TypeConverter.String2JdomDocument(cd.toSaveableString()));
62  NodeList nslinks = doc.getElementsByTagName("NSLink");
63  Report stats = new Report();
64  ComaData cdcoma = (ComaData) cd;
65  for (int i = 0; i < nslinks.getLength(); i++) {
66  Element nslink = (Element) nslinks.item(i);
67  Node communication = nslink.getParentNode();
68  if (communication.getNodeName() != null && communication.getNodeName().equals("Transcription")) {
69  communicationname = communication.getParentNode().getAttributes().getNamedItem("Name").getTextContent();
70  } else if (communication.getNodeName() != null && communication.getNodeName().equals("Media")) {
71  communicationname = communication.getParentNode().getParentNode().getAttributes().getNamedItem("Name").getTextContent();
72  } else {
73  //could not find matching communication name
74  communicationname = "Could not figure out Communication name";
75  }
76  NodeList nstexts = nslink.getChildNodes();
77  for (int j = 0; j < nstexts.getLength(); j++) {
78  Node maybeText = nstexts.item(j);
79  if (maybeText.getNodeType() != Node.TEXT_NODE) {
80  System.out.println("This is not a text node: "
81  + maybeText);
82  continue;
83  }
84  Text nstext = (Text) nstexts.item(j);
85  String nspath = nstext.getWholeText().replace("/", File.separator);
86  File justFile = new File(nspath);
87  boolean found = false;
88  if (justFile.exists()) {
89  found = true;
90  }
91  String absPath = referencePath + File.separator + nspath;
92  //System.out.println(absPath + "##############");
93  File absFile = new File(absPath);
94  if (absFile.exists()) {
95  found = true;
96  }
97  if (cd.getURL() != null) {
98  URL urlPath = cd.getURL();
99  //I think here is the Linux Problem
100  URL urlAbsPath = new URL(urlPath, nspath.replace(File.separator, "/"));
101  //System.out.println(urlPath + "##############");
102  File dataFile = new File(urlAbsPath.toURI());
103  if (dataFile.exists()) {
104  found = true;
105  }
106  }
107  if (cdcoma.getBasedirectory() != null) {
108  //File basedirectory = new File(cdcoma.getBasedirectory());
109  URI uri = cdcoma.getBasedirectory().toURI();
110  URI parentURI = uri.getPath().endsWith("/") ? uri.resolve("..") : uri.resolve(".");
111  String basePath
112  = Paths.get(parentURI).toString()
113  + File.separator + nspath;
114  File baseFile = new File(basePath);
115  if (baseFile.exists()) {
116  found = true;
117  }
118  }
119  if (!found) {
120  stats.addCritical(function, cd,
121  "In Communication: " + communicationname + " File in NSLink not found: " + nspath);
122  } else {
123  stats.addCorrect(function, cd,
124  "File in NSLink was found: " + nspath);
125  }
126  }
127  }
128  NodeList relpathnodes = doc.getElementsByTagName("relPath");
129  for (int i = 0; i < relpathnodes.getLength(); i++) {
130  Element relpathnode = (Element) relpathnodes.item(i);
131  NodeList reltexts = relpathnode.getChildNodes();
132  for (int j = 0; j < reltexts.getLength(); j++) {
133  Node maybeText = reltexts.item(j);
134  Node communicationrel = maybeText.getParentNode().getParentNode();
135  if (communicationrel.getNodeName() != null && communicationrel.getNodeName().equals("File") && communicationrel.getParentNode().hasAttributes() && communicationrel.getParentNode().getAttributes().getNamedItem("Name") != null) {
136  communicationname = communicationrel.getParentNode().getAttributes().getNamedItem("Name").getTextContent();
137  } else {
138  //could not find matching communication name
139  communicationname = "Could not figure out Communication name";
140  }
141  if (maybeText.getNodeType() != Node.TEXT_NODE) {
142  System.out.println("This is not a text node: "
143  + maybeText);
144  continue;
145  }
146  Text reltext = (Text) reltexts.item(j);
147  String relpath = reltext.getWholeText().replace("/", File.separator);
148  File justFile = new File(relpath);
149  boolean found = false;
150  if (justFile.exists()) {
151  found = true;
152  }
153  String absPath = referencePath + File.separator + relpath;
154  File absFile = new File(absPath);
155  if (absFile.exists()) {
156  found = true;
157  }
158  if (cd.getURL() != null) {
159  URL urlPath = cd.getURL();
160  URL urlRelPath = new URL(urlPath, relpath.replace("\\", "/"));
161  File dataFile = new File(urlRelPath.toURI());
162  if (dataFile.exists()) {
163  found = true;
164  }
165  }
166  if (cdcoma.getBasedirectory() != null) {
167  URI uri = cdcoma.getBasedirectory().toURI();
168  URI parentURI = uri.getPath().endsWith("/") ? uri.resolve("..") : uri.resolve(".");
169  String basePath
170  = Paths.get(parentURI).toString()
171  + File.separator + relpath;
172  File baseFile = new File(basePath);
173  if (baseFile.exists()) {
174  found = true;
175  }
176  }
177  if (!found) {
178  stats.addCritical(function, cd,
179  "In Communication: " + communicationname + " File in relPath not found: " + relpath);
180  } else {
181  stats.addCorrect(function, cd,
182  "File in relPath was found: " + relpath);
183  }
184  }
185  }
186  return stats;
187  }
188 
189  @Override
190  public Collection<Class<? extends CorpusData>> getIsUsableFor() {
191  try {
192  Class cl = Class.forName("de.uni_hamburg.corpora.ComaData");
193  IsUsableFor.add(cl);
194  } catch (ClassNotFoundException ex) {
195  report.addException(ex, "Usable class not found.");
196  }
197  return IsUsableFor;
198  }
199 
204  @Override
205  public String getDescription() {
206  String description = "This class checks for existence of files linked in the "
207  + "coma file.";
208  return description;
209  }
210 
211 }
Collection< Class<?extends CorpusData > > getIsUsableFor()
void addCritical(String description)
Definition: Report.java:104
static org.w3c.dom.Document JdomDocument2W3cDocument(org.jdom.Document jdomDoc)
void addCorrect(String statId, String description)
Definition: Report.java:217
static org.jdom.Document String2JdomDocument(String stringRespresentingDocument)
void addException(Throwable e, String description)
Definition: Report.java:287