corpus-services  1.0
SegmentedTranscriptionData.java
Go to the documentation of this file.
1 /*
2  * To change this license header, choose License Headers in Project Properties.
3  * To change this template file, choose Tools | Templates
4  * and open the template in the editor.
5  */
6 package de.uni_hamburg.corpora;
7 
9 import java.io.IOException;
10 import java.net.URI;
11 import java.net.URISyntaxException;
12 import java.net.URL;
13 import java.nio.file.Files;
14 import java.nio.file.Paths;
15 import java.util.List;
16 import java.util.logging.Level;
17 import java.util.logging.Logger;
18 import javax.xml.parsers.ParserConfigurationException;
19 import javax.xml.transform.TransformerException;
20 import javax.xml.xpath.XPathExpressionException;
21 import org.apache.commons.io.FilenameUtils;
22 import org.jdom.Document;
23 import org.jdom.JDOMException;
24 import org.jdom.input.SAXBuilder;
25 import org.jdom.xpath.XPath;
26 import org.xml.sax.SAXException;
27 
32 public class SegmentedTranscriptionData implements CorpusData, ContentData, XMLData {
33 
34  Document jdom;
35  URL url;
36  String originalstring;
37  URL parenturl;
38  String filename;
39  String filenamewithoutending;
40  List segmentCounts;
41 
43 
44  }
45 
46  public SegmentedTranscriptionData(URL url) {
47  try {
48  this.url = url;
49  SAXBuilder builder = new SAXBuilder();
50  jdom = builder.build(url);
51  originalstring = new String(Files.readAllBytes(Paths.get(url.toURI())), "UTF-8");
52  URI uri = url.toURI();
53  URI parentURI = uri.getPath().endsWith("/") ? uri.resolve("..") : uri.resolve(".");
54  parenturl = parentURI.toURL();
55  filename = FilenameUtils.getName(url.getPath());
56  filenamewithoutending = FilenameUtils.getBaseName(url.getPath());
57  } catch (JDOMException ex) {
58  Logger.getLogger(SegmentedTranscriptionData.class.getName()).log(Level.SEVERE, null, ex);
59  } catch (IOException ex) {
60  Logger.getLogger(SegmentedTranscriptionData.class.getName()).log(Level.SEVERE, null, ex);
61  } catch (URISyntaxException ex) {
62  Logger.getLogger(SegmentedTranscriptionData.class.getName()).log(Level.SEVERE, null, ex);
63  }
64  }
65 
66  @Override
67  public URL getURL() {
68  return url;
69  }
70 
71  @Override
72  public String toSaveableString() throws TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException {
73  return toPrettyPrintedXML();
74  }
75 
76  @Override
77  public String toUnformattedString() {
78  return originalstring;
79  }
80 
81  private String toPrettyPrintedXML() throws TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException {
82  PrettyPrinter pp = new PrettyPrinter();
83  String prettyCorpusData = pp.indent(toUnformattedString(), "event");
84  //String prettyCorpusData = pp.indent(bt.toXML(bt.getTierFormatTable()), "event");
85  return prettyCorpusData;
86  }
87 
88  @Override
89  public void updateUnformattedString(String newUnformattedString) {
90  originalstring = newUnformattedString;
91  }
92 
93  @Override
94  public Document getJdom() {
95  return jdom;
96  }
97 
98  @Override
99  public void setJdom(Document doc) {
100  jdom = doc;
101  }
102 
103  @Override
104  public URL getParentURL() {
105  return parenturl;
106  }
107 
108  @Override
109  public void setURL(URL nurl) {
110  url = nurl;
111  }
112 
113  @Override
114  public void setParentURL(URL url) {
115  parenturl = url;
116  }
117 
118  @Override
119  public String getFilename() {
120  return filename;
121  }
122 
123  @Override
124  public void setFilename(String s) {
125  filename = s;
126  }
127 
128  @Override
130  return filenamewithoutending;
131  }
132 
133  @Override
134  public void setFilenameWithoutFileEnding(String s) {
135  filenamewithoutending = s;
136  }
137 
138  public List getSegmentCounts() throws JDOMException {
139  XPath context = XPath.newInstance("/segmented-transcription/head/meta-information/ud-meta-information/ud-information[starts-with(@attribute-name,'#')]");
140  List allContextInstances = context.selectNodes(jdom);
141  segmentCounts = allContextInstances;
142  return segmentCounts;
143  }
144 
145 }
String indent(String xml, String suppressedElements)