1 package de.uni_hamburg.corpora.validation;
9 import java.io.IOException;
10 import java.net.MalformedURLException;
11 import java.net.URISyntaxException;
13 import java.nio.file.Path;
14 import java.nio.file.Paths;
15 import java.util.Collection;
16 import java.util.List;
17 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
18 import org.jdom.Attribute;
19 import org.jdom.Document;
20 import org.jdom.Element;
21 import org.jdom.JDOMException;
22 import org.jdom.xpath.XPath;
23 import org.xml.sax.SAXException;
26 import java.io.UnsupportedEncodingException;
27 import java.security.NoSuchAlgorithmException;
28 import javax.xml.parsers.ParserConfigurationException;
29 import javax.xml.transform.TransformerException;
30 import javax.xml.xpath.XPathExpressionException;
31 import org.exmaralda.partitureditor.fsm.FSMException;
40 Path pathRelative = null;
41 String nameOfCorpusFolder;
42 String nameOfExbFolder;
50 public Report function(
CorpusData cd, Boolean fix)
throws SAXException, JexmaraldaException, ClassNotFoundException, JDOMException, URISyntaxException, TransformerException, ParserConfigurationException, IOException, MalformedURLException, XPathExpressionException {
52 Class cl = Class.forName(
"de.uni_hamburg.corpora.BasicTranscriptionData");
53 Class cl3 = Class.forName(
"de.uni_hamburg.corpora.SegmentedTranscriptionData");
54 Class cl2 = Class.forName(
"de.uni_hamburg.corpora.ComaData");
55 if (cl.isInstance(cd) || cl3.isInstance(cd)) {
60 for (
int i = 0; i < al.size(); i++) {
62 Attribute a = (Attribute) o;
64 String refurl = a.getValue();
66 if (refurl.startsWith(
"file")) {
67 URL refurlurl =
new URL(refurl);
68 pabs = Paths.get(refurlurl.toURI());
70 pabs = Paths.get(refurl);
72 if (pabs.isAbsolute()) {
74 if (!(pathRelative == null)) {
75 a.setValue(pathRelative.toString());
80 cio.
write(cd, cd.getURL());
81 report.
addFix(
function, cd,
"removed absolute path");
84 "relative path " + pabs.toString() +
" cannot be figured out");
87 report.
addCritical(
function, cd,
"absolute path info needs to be replaced");
89 if (cl.isInstance(cd)) {
90 exmaError.addError(
"RemoveAbsolutePaths", cd.getURL().getFile(),
"",
"",
false,
"absolute path info needs to be replaced");
94 report.
addCorrect(
function, cd,
"path is already relative, nothing to do");
100 for (
int i = 0; i < ale.size(); i++) {
101 Object o = ale.get(i);
102 Element ae = (Element) o;
104 String refurl = ae.getText();
106 if (refurl.startsWith(
"file")) {
107 URL refurlurl =
new URL(refurl);
108 pabs = Paths.get(refurlurl.toURI());
110 pabs = Paths.get(refurl);
112 if (pabs.isAbsolute()) {
114 if (!(pathRelative == null)) {
115 ae.setText(pathRelative.toString());
120 cio.
write(cd, cd.getURL());
121 report.
addFix(
function, cd,
"removed absolute path");
124 "relative path " + pabs.toString() +
" cannot be figured out");
128 report.
addCritical(
function, cd,
"absolute path info needs to be replaced");
130 if (cl.isInstance(cd)) {
131 exmaError.addError(
"RemoveAbsolutePaths", cd.getURL().getFile(),
"",
"",
false,
"absolute path info needs to be replaced");
135 report.
addCorrect(
function, cd,
"path is already relative, nothing to do");
139 }
else if (cl2.isInstance(cd)) {
143 for (
int i = 0; i < al.size(); i++) {
144 Object o = al.get(i);
145 Element e = (Element) o;
146 String refurl = e.getText();
148 if (refurl.startsWith(
"file")) {
149 URL refurlurl =
new URL(refurl);
150 pabs = Paths.get(refurlurl.toURI());
152 pabs = Paths.get(refurl);
155 if (pabs.isAbsolute()) {
156 report.
addCritical(
function, cd,
"absolute path info needs to be replaced");
159 report.
addCorrect(
function, cd,
"path is already relative, nothing to do");
165 report.
addCritical(
function, cd,
"File is neither coma nor exb nor exs file");
173 Class cl = Class.forName(
"de.uni_hamburg.corpora.BasicTranscriptionData");
175 Class cl2 = Class.forName(
"de.uni_hamburg.corpora.SegmentedTranscriptionData");
176 IsUsableFor.add(cl2);
177 Class cl3 = Class.forName(
"de.uni_hamburg.corpora.ComaData");
178 IsUsableFor.add(cl3);
179 }
catch (ClassNotFoundException ex) {
180 report.
addException(ex,
"usable class not found error");
185 public List
findAllAbsolutePathsExbAttribute(
CorpusData cd)
throws JDOMException, URISyntaxException, MalformedURLException, TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException {
190 xp1 = XPath.newInstance(
"//head/meta-information/referenced-file/@url");
191 List allAbsolutePaths = xp1.selectNodes(doc);
192 if (allAbsolutePaths.isEmpty()) {
193 report.
addWarning(
function, cd,
"no paths found");
195 return allAbsolutePaths;
198 public List
findAllAbsolutePathsExbElement(
CorpusData cd)
throws JDOMException, URISyntaxException, MalformedURLException, TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException {
203 xp1 = XPath.newInstance(
"//ud-meta-information/ud-information[@attribute-name='# EXB-SOURCE']");
204 List allAbsolutePaths = xp1.selectNodes(doc);
205 if (allAbsolutePaths.isEmpty()) {
206 report.
addWarning(
function, cd,
"no paths found");
208 return allAbsolutePaths;
211 public List
findAllAbsolutePathsComa(
CorpusData cd)
throws JDOMException, URISyntaxException, MalformedURLException, TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException {
216 xp1 = XPath.newInstance(
"/Corpus/CorpusData/Communication/File/relPath | /Corpus/CorpusData/Communication/File/absPath | /Corpus/CorpusData/Communication/Transcription/NSLink | /Corpus/CorpusData/Communication/Transcription/Description/Key[@Name='# EXB-SOURCE'] | /Corpus/CorpusData/Communication/Recording/Media/NSLink");
217 List allAbsolutePaths = xp1.selectNodes(doc);
218 if (allAbsolutePaths.isEmpty()) {
219 report.
addWarning(
function, cd,
"no paths found");
221 return allAbsolutePaths;
226 for (
int i = 0; i < filepath.getNameCount() - 1; i++) {
227 if (filepath.getName(i).toString().equals(directory)) {
228 Path trimmedPath = filepath.subpath(i + 1, filepath.getNameCount());
241 String description =
"This class finds paths that are absolute" 242 +
" in files and replaces them with paths relative to the corpus folder. ";
247 public Report function(
Corpus c, Boolean fix)
throws SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException, NoSuchAlgorithmException, ClassNotFoundException, FSMException, URISyntaxException, JDOMException {
249 for (
CorpusData cdata : c.getBasicTranscriptionData()) {
250 stats.
merge(
function(cdata, fix));
252 for (
CorpusData sdata : c.getSegmentedTranscriptionData()) {
253 stats.
merge(
function(sdata, fix));
255 stats.
merge(
function(c.getComaData(), fix));
List findAllAbsolutePathsExbElement(CorpusData cd)
List findAllAbsolutePathsComa(CorpusData cd)
static Path trimFilePathBeforeDirectory(Path filepath, String directory)
static ExmaErrorList exmaError
void addCritical(String description)
void addWarning(String statId, String description)
Collection< Class<?extends CorpusData > > getIsUsableFor()
void addCorrect(String statId, String description)
static org.jdom.Document String2JdomDocument(String stringRespresentingDocument)
List findAllAbsolutePathsExbAttribute(CorpusData cd)
static String JdomDocument2String(org.jdom.Document jdomDocument)
void addException(Throwable e, String description)
void write(CorpusData cd, URL url)
void addFix(String statId, CorpusData cd, String description)