6 package de.uni_hamburg.corpora;
9 import java.io.IOException;
11 import java.net.URISyntaxException;
13 import java.nio.file.Files;
14 import java.nio.file.Paths;
15 import java.util.List;
16 import java.util.logging.Level;
17 import java.util.logging.Logger;
18 import javax.xml.parsers.ParserConfigurationException;
19 import javax.xml.transform.TransformerException;
20 import javax.xml.xpath.XPathExpressionException;
21 import org.apache.commons.io.FilenameUtils;
22 import org.jdom.Document;
23 import org.jdom.JDOMException;
24 import org.jdom.input.SAXBuilder;
25 import org.jdom.xpath.XPath;
26 import org.xml.sax.SAXException;
36 String originalstring;
39 String filenamewithoutending;
49 SAXBuilder builder =
new SAXBuilder();
50 jdom = builder.build(url);
51 originalstring =
new String(Files.readAllBytes(Paths.get(url.toURI())),
"UTF-8");
52 URI uri = url.toURI();
53 URI parentURI = uri.getPath().endsWith(
"/") ? uri.resolve(
"..") : uri.resolve(
".");
54 parenturl = parentURI.toURL();
55 filename = FilenameUtils.getName(url.getPath());
56 filenamewithoutending = FilenameUtils.getBaseName(url.getPath());
57 }
catch (JDOMException ex) {
59 }
catch (IOException ex) {
61 }
catch (URISyntaxException ex) {
72 public String
toSaveableString() throws TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException {
73 return toPrettyPrintedXML();
78 return originalstring;
81 private String toPrettyPrintedXML()
throws TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException {
85 return prettyCorpusData;
90 originalstring = newUnformattedString;
130 return filenamewithoutending;
135 filenamewithoutending = s;
139 XPath context = XPath.newInstance(
"/segmented-transcription/head/meta-information/ud-meta-information/ud-information[starts-with(@attribute-name,'#')]");
140 List allContextInstances = context.selectNodes(jdom);
141 segmentCounts = allContextInstances;
142 return segmentCounts;
void setJdom(Document doc)
void setFilenameWithoutFileEnding(String s)
SegmentedTranscriptionData(URL url)
SegmentedTranscriptionData()
void setParentURL(URL url)
void setFilename(String s)
String indent(String xml, String suppressedElements)
String toUnformattedString()
String toSaveableString()
void updateUnformattedString(String newUnformattedString)
String getFilenameWithoutFileEnding()