corpus-services  1.0
RemoveAbsolutePaths.java
Go to the documentation of this file.
1 package de.uni_hamburg.corpora.validation;
2 
9 import java.io.IOException;
10 import java.net.MalformedURLException;
11 import java.net.URISyntaxException;
12 import java.net.URL;
13 import java.nio.file.Path;
14 import java.nio.file.Paths;
15 import java.util.Collection;
16 import java.util.List;
17 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
18 import org.jdom.Attribute;
19 import org.jdom.Document;
20 import org.jdom.Element;
21 import org.jdom.JDOMException;
22 import org.jdom.xpath.XPath;
23 import org.xml.sax.SAXException;
26 import java.io.UnsupportedEncodingException;
27 import java.security.NoSuchAlgorithmException;
28 import javax.xml.parsers.ParserConfigurationException;
29 import javax.xml.transform.TransformerException;
30 import javax.xml.xpath.XPathExpressionException;
31 import org.exmaralda.partitureditor.fsm.FSMException;
32 
37 public class RemoveAbsolutePaths extends Checker implements CorpusFunction {
38 
39  Document doc = null;
40  Path pathRelative = null;
41  String nameOfCorpusFolder;
42  String nameOfExbFolder;
43 
45  //fixing is possible
46  super(true);
47  }
48 
49  @Override
50  public Report function(CorpusData cd, Boolean fix) throws SAXException, JexmaraldaException, ClassNotFoundException, JDOMException, URISyntaxException, TransformerException, ParserConfigurationException, IOException, MalformedURLException, XPathExpressionException {
51 
52  Class cl = Class.forName("de.uni_hamburg.corpora.BasicTranscriptionData");
53  Class cl3 = Class.forName("de.uni_hamburg.corpora.SegmentedTranscriptionData");
54  Class cl2 = Class.forName("de.uni_hamburg.corpora.ComaData");
55  if (cl.isInstance(cd) || cl3.isInstance(cd)) {
57  //if there is no absolute path, nothing needs to be done
58  //check if the paths that are there are absolute
59  if (!al.isEmpty()) {
60  for (int i = 0; i < al.size(); i++) {
61  Object o = al.get(i);
62  Attribute a = (Attribute) o;
63  //System.out.println(a);
64  String refurl = a.getValue();
65  Path pabs;
66  if (refurl.startsWith("file")) {
67  URL refurlurl = new URL(refurl);
68  pabs = Paths.get(refurlurl.toURI());
69  } else {
70  pabs = Paths.get(refurl);
71  }
72  if (pabs.isAbsolute()) {
73  if (fix) {
74  if (!(pathRelative == null)) {
75  a.setValue(pathRelative.toString());
76  //then save file
77  //add a report message
78  CorpusIO cio = new CorpusIO();
79  cd.updateUnformattedString(TypeConverter.JdomDocument2String(doc));
80  cio.write(cd, cd.getURL());
81  report.addFix(function, cd, "removed absolute path");
82  } else {
83  report.addCritical(function, cd,
84  "relative path " + pabs.toString() + " cannot be figured out");
85  }
86  } else {
87  report.addCritical(function, cd, "absolute path info needs to be replaced");
88  }
89  if (cl.isInstance(cd)) {
90  exmaError.addError("RemoveAbsolutePaths", cd.getURL().getFile(), "", "", false, "absolute path info needs to be replaced");
91  }
92  } else {
93  al.remove(o);
94  report.addCorrect(function, cd, "path is already relative, nothing to do");
95  }
96  }
97  }
98  List ale = findAllAbsolutePathsExbElement(cd);
99  if (!ale.isEmpty()) {
100  for (int i = 0; i < ale.size(); i++) {
101  Object o = ale.get(i);
102  Element ae = (Element) o;
103  //System.out.println(a);
104  String refurl = ae.getText();
105  Path pabs;
106  if (refurl.startsWith("file")) {
107  URL refurlurl = new URL(refurl);
108  pabs = Paths.get(refurlurl.toURI());
109  } else {
110  pabs = Paths.get(refurl);
111  }
112  if (pabs.isAbsolute()) {
113  if (fix) {
114  if (!(pathRelative == null)) {
115  ae.setText(pathRelative.toString());
116  //then save file
117  //add a report message
118  CorpusIO cio = new CorpusIO();
119  cd.updateUnformattedString(TypeConverter.JdomDocument2String(doc));
120  cio.write(cd, cd.getURL());
121  report.addFix(function, cd, "removed absolute path");
122  } else {
123  report.addCritical(function, cd,
124  "relative path " + pabs.toString() + " cannot be figured out");
125  }
126  } else {
127 
128  report.addCritical(function, cd, "absolute path info needs to be replaced");
129  }
130  if (cl.isInstance(cd)) {
131  exmaError.addError("RemoveAbsolutePaths", cd.getURL().getFile(), "", "", false, "absolute path info needs to be replaced");
132  }
133  } else {
134  al.remove(o);
135  report.addCorrect(function, cd, "path is already relative, nothing to do");
136  }
137  }
138  }
139  } else if (cl2.isInstance(cd)) {
140  List al = findAllAbsolutePathsComa(cd);
141  //if there is no autosave, nothing needs to be done
142  if (!al.isEmpty()) {
143  for (int i = 0; i < al.size(); i++) {
144  Object o = al.get(i);
145  Element e = (Element) o;
146  String refurl = e.getText();
147  Path pabs;
148  if (refurl.startsWith("file")) {
149  URL refurlurl = new URL(refurl);
150  pabs = Paths.get(refurlurl.toURI());
151  } else {
152  pabs = Paths.get(refurl);
153  }
154  //Path pabs = Paths.get(e.getText());
155  if (pabs.isAbsolute()) {
156  report.addCritical(function, cd, "absolute path info needs to be replaced");
157  } else {
158  al.remove(o);
159  report.addCorrect(function, cd, "path is already relative, nothing to do");
160  }
161 
162  }
163  }
164  } else {
165  report.addCritical(function, cd, "File is neither coma nor exb nor exs file");
166  }
167  return report;
168  }
169 
170  @Override
171  public Collection<Class<? extends CorpusData>> getIsUsableFor() {
172  try {
173  Class cl = Class.forName("de.uni_hamburg.corpora.BasicTranscriptionData");
174  IsUsableFor.add(cl);
175  Class cl2 = Class.forName("de.uni_hamburg.corpora.SegmentedTranscriptionData");
176  IsUsableFor.add(cl2);
177  Class cl3 = Class.forName("de.uni_hamburg.corpora.ComaData");
178  IsUsableFor.add(cl3);
179  } catch (ClassNotFoundException ex) {
180  report.addException(ex, "usable class not found error");
181  }
182  return IsUsableFor;
183  }
184 
185  public List findAllAbsolutePathsExbAttribute(CorpusData cd) throws JDOMException, URISyntaxException, MalformedURLException, TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException {
186  doc = TypeConverter.String2JdomDocument(cd.toSaveableString());
187  XPath xp1;
188  // in exbs: <referenced-file url="ChND_99_Barusi_flkd.wav"/>
189  //working for exs too
190  xp1 = XPath.newInstance("//head/meta-information/referenced-file/@url");
191  List allAbsolutePaths = xp1.selectNodes(doc);
192  if (allAbsolutePaths.isEmpty()) {
193  report.addWarning(function, cd, "no paths found");
194  }
195  return allAbsolutePaths;
196  }
197 
198  public List findAllAbsolutePathsExbElement(CorpusData cd) throws JDOMException, URISyntaxException, MalformedURLException, TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException {
199  doc = TypeConverter.String2JdomDocument(cd.toSaveableString());
200  XPath xp1;
201  // in exbs: <referenced-file url="ChND_99_Barusi_flkd.wav"/>
202  //working for exs too
203  xp1 = XPath.newInstance("//ud-meta-information/ud-information[@attribute-name='# EXB-SOURCE']");
204  List allAbsolutePaths = xp1.selectNodes(doc);
205  if (allAbsolutePaths.isEmpty()) {
206  report.addWarning(function, cd, "no paths found");
207  }
208  return allAbsolutePaths;
209  }
210 
211  public List findAllAbsolutePathsComa(CorpusData cd) throws JDOMException, URISyntaxException, MalformedURLException, TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException {
212  doc = TypeConverter.String2JdomDocument(cd.toSaveableString());
213  XPath xp1;
214  // in Coma: NSLinks and relPaths <NSLink>narrative/KBD_71_Fish_nar/KBD_71_Fish_nar_s.exs</NSLink>
215  // <relPath>narrative/KBD_71_Fish_nar/NG_6_1971_506-507_KBD_71_Fish_nar.pdf</relPath>
216  xp1 = XPath.newInstance("/Corpus/CorpusData/Communication/File/relPath | /Corpus/CorpusData/Communication/File/absPath | /Corpus/CorpusData/Communication/Transcription/NSLink | /Corpus/CorpusData/Communication/Transcription/Description/Key[@Name='# EXB-SOURCE'] | /Corpus/CorpusData/Communication/Recording/Media/NSLink");
217  List allAbsolutePaths = xp1.selectNodes(doc);
218  if (allAbsolutePaths.isEmpty()) {
219  report.addWarning(function, cd, "no paths found");
220  }
221  return allAbsolutePaths;
222  }
223 
224  public static Path trimFilePathBeforeDirectory(Path filepath, String directory) {
225  //find the index where the directoryname occurs
226  for (int i = 0; i < filepath.getNameCount() - 1; i++) {
227  if (filepath.getName(i).toString().equals(directory)) {
228  Path trimmedPath = filepath.subpath(i + 1, filepath.getNameCount());
229  return trimmedPath;
230  }
231  }
232  return null;
233  }
234 
239  @Override
240  public String getDescription() {
241  String description = "This class finds paths that are absolute"
242  + " in files and replaces them with paths relative to the corpus folder. ";
243  return description;
244  }
245 
246  @Override
247  public Report function(Corpus c, Boolean fix) throws SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException, NoSuchAlgorithmException, ClassNotFoundException, FSMException, URISyntaxException, JDOMException {
248  Report stats = new Report();
249  for (CorpusData cdata : c.getBasicTranscriptionData()) {
250  stats.merge(function(cdata, fix));
251  }
252  for (CorpusData sdata : c.getSegmentedTranscriptionData()) {
253  stats.merge(function(sdata, fix));
254  }
255  stats.merge(function(c.getComaData(), fix));
256  return stats;
257  }
258 }
void merge(Report sr)
Definition: Report.java:73
static Path trimFilePathBeforeDirectory(Path filepath, String directory)
void addCritical(String description)
Definition: Report.java:104
void addWarning(String statId, String description)
Definition: Report.java:164
Collection< Class<?extends CorpusData > > getIsUsableFor()
void addCorrect(String statId, String description)
Definition: Report.java:217
static org.jdom.Document String2JdomDocument(String stringRespresentingDocument)
static String JdomDocument2String(org.jdom.Document jdomDocument)
void addException(Throwable e, String description)
Definition: Report.java:287
void write(CorpusData cd, URL url)
Definition: CorpusIO.java:66
void addFix(String statId, CorpusData cd, String description)
Definition: Report.java:155