6 package de.uni_hamburg.corpora;
8 import org.exmaralda.coma.root.Coma;
10 import java.io.IOException;
11 import java.net.MalformedURLException;
12 import java.net.URISyntaxException;
14 import java.util.Collection;
15 import java.nio.file.Files;
16 import java.nio.file.Paths;
17 import java.util.logging.Level;
18 import java.util.logging.Logger;
19 import org.jdom.Document;
20 import org.jdom.JDOMException;
21 import org.jdom.input.SAXBuilder;
22 import org.xml.sax.SAXException;
24 import java.util.ArrayList;
25 import java.util.List;
26 import javax.xml.parsers.ParserConfigurationException;
27 import javax.xml.transform.TransformerException;
28 import javax.xml.xpath.XPathExpressionException;
29 import org.jdom.Element;
30 import org.jdom.xpath.XPath;
31 import org.apache.commons.io.FilenameUtils;
32 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
44 Document readcomaasjdom =
new Document();
45 String originalstring;
47 String filenamewithoutending;
51 public static String
SEGMENTED_FILE_XPATH =
"//Transcription[Description/Key[@Name='segmented']/text()='true']/NSLink";
52 public static String
BASIC_FILE_XPATH =
"//Transcription[Description/Key[@Name='segmented']/text()='false']/NSLink";
63 public ComaData(URL url)
throws SAXException, JexmaraldaException {
66 SAXBuilder builder =
new SAXBuilder();
67 readcomaasjdom = builder.build(url);
68 originalstring =
new String(Files.readAllBytes(Paths.get(url.toURI())),
"UTF-8");
69 URI uri = url.toURI();
70 URI parentURI = uri.getPath().endsWith(
"/") ? uri.resolve(
"..") : uri.resolve(
".");
71 CORPUS_BASEDIRECTORY = parentURI.toURL();
72 filename = FilenameUtils.getName(url.getPath());
73 filenamewithoutending = FilenameUtils.getBaseName(url.getPath());
74 }
catch (JDOMException ex) {
76 }
catch (IOException ex) {
78 }
catch (URISyntaxException ex) {
95 public String
toSaveableString() throws TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException {
96 return toPrettyPrintedXML();
99 private String toPrettyPrintedXML()
throws TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException {
103 return prettyCorpusData;
108 return originalstring;
115 if (!referencedCorpusDataURLs.contains(rurul)) {
116 referencedCorpusDataURLs.add(rurul);
128 ArrayList<URL> resulturls =
new ArrayList<>();
130 XPath xpath = XPath.newInstance(BASIC_FILE_XPATH);
131 List transcriptionList = xpath.selectNodes(readcomaasjdom);
132 for (
int pos = 0; pos < transcriptionList.size(); pos++) {
133 Element nslink = (Element) (transcriptionList.get(pos));
135 resulturl =
new URL(CORPUS_BASEDIRECTORY + nslink.getText());
137 resulturls.add(resulturl);
139 }
catch (JDOMException ex) {
140 ex.printStackTrace();
147 ArrayList<String> result =
new ArrayList<>();
148 XPath xpath = XPath.newInstance(BASIC_FILE_XPATH);
149 List transcriptionList = xpath.selectNodes(readcomaasjdom);
150 for (
int pos = 0; pos < transcriptionList.size(); pos++) {
151 Element nslink = (Element) (transcriptionList.get(pos));
155 result.add(nslink.getText());
160 }
catch (JDOMException ex) {
161 ex.printStackTrace();
168 ArrayList<URL> resulturls =
new ArrayList<>();
170 XPath xpath = XPath.newInstance(SEGMENTED_FILE_XPATH);
171 List transcriptionList = xpath.selectNodes(readcomaasjdom);
172 for (
int pos = 0; pos < transcriptionList.size(); pos++) {
173 Element nslink = (Element) (transcriptionList.get(pos));
175 resulturl =
new URL(CORPUS_BASEDIRECTORY + nslink.getText());
177 resulturls.add(resulturl);
179 }
catch (JDOMException ex) {
180 ex.printStackTrace();
185 public Collection<URL>
getAllURLs() throws MalformedURLException, URISyntaxException {
187 ArrayList<URL> resulturls =
new ArrayList<>();
189 XPath xpath = XPath.newInstance(ALL_FILE_XPATH);
190 List transcriptionList = xpath.selectNodes(readcomaasjdom);
191 for (
int pos = 0; pos < transcriptionList.size(); pos++) {
192 Element nslink = (Element) (transcriptionList.get(pos));
194 resulturl =
new URL(CORPUS_BASEDIRECTORY + nslink.getText());
196 if (!resulturls.contains(resulturl)) {
197 resulturls.add(resulturl);
200 }
catch (JDOMException ex) {
201 ex.printStackTrace();
207 originalstring = newUnformattedString;
211 CORPUS_BASEDIRECTORY = url;
215 URI uri = url.toURI();
216 URI parentURI = uri.getPath().endsWith(
"/") ? uri.resolve(
"..") : uri.resolve(
".");
217 CORPUS_BASEDIRECTORY = parentURI.toURL();
233 CORPUS_BASEDIRECTORY = url;
248 return filenamewithoutending;
253 filenamewithoutending = s;
258 return readcomaasjdom;
263 readcomaasjdom = jdom;
275 XPath xpath = XPath.newInstance(CORPUSNAME_XPATH);
276 Element name = (Element) xpath.selectSingleNode(readcomaasjdom);
277 corpusname = name.getText();
286 return XPath.selectNodes(readcomaasjdom,
"//Communication");
290 return (Element) XPath.selectSingleNode(readcomaasjdom,
"/Corpus/Description");
295 return (Element) XPath.selectSingleNode(readcomaasjdom,
"/Corpus/CorpusData");
ArrayList< URL > referencedCorpusDataURLs
Collection< URL > getReferencedCorpusDataURLs()
void setJdom(Document jdom)
void setOriginalString(String s)
ArrayList< String > getAllBasicTranscriptionFilenames()
Collection< URL > getAllSegmentedTranscriptionURLs()
String indent(String xml, String suppressedElements)
static String ALL_FILE_XPATH
void setFilenameWithoutFileEnding(String s)
List< Element > getCommunications()
Collection< URL > getAllBasicTranscriptionURLs()
Element getCorpusDescription()
Collection< URL > getAllURLs()
String getFilenameWithoutFileEnding()
void setFilename(String s)
String toUnformattedString()
void setBaseDirectory(URL url)
void setCorpusName(String s)
static String CORPUSNAME_XPATH
String toSaveableString()
void updateUnformattedString(String newUnformattedString)
static String BASIC_FILE_XPATH
void setParentURL(URL url)
static String SEGMENTED_FILE_XPATH