corpus-services  1.0
Corpus.java
Go to the documentation of this file.
1 /*
2  * To change this license header, choose License Headers in Project Properties.
3  * To change this template file, choose Tools | Templates
4  * and open the template in the editor.
5  */
6 package de.uni_hamburg.corpora;
7 
8 import java.net.URL;
9 import java.util.Collection;
12 import java.io.IOException;
13 import java.net.MalformedURLException;
14 import java.net.URISyntaxException;
15 import java.util.ArrayList;
16 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
17 import org.jdom.JDOMException;
18 import org.jdom.xpath.XPath;
19 import org.xml.sax.SAXException;
20 
25 public class Corpus {
26 
27  //only the metadata file, coma or cmdi in most cases, or a list of files
28  Collection<Metadata> metadata = new ArrayList();
29  //the transcriptions
30  Collection<ContentData> contentdata = new ArrayList();
31  Collection<Recording> recording = new ArrayList();
32  Collection<AdditionalData> additionaldata = new ArrayList();
33  Collection<AnnotationSpecification> annotationspecification = new ArrayList();
34  Collection<ConfigParameters> configparameters = new ArrayList();
35  private Collection<CmdiData> cmdidata = new ArrayList();
36  Collection<BasicTranscriptionData> basictranscriptiondata = new ArrayList();
37  Collection<SegmentedTranscriptionData> segmentedtranscriptiondata = new ArrayList();
38  ComaData comadata;
39  //all the data together
40  Collection<CorpusData> cdc = new ArrayList<CorpusData>();
41  URL basedirectory;
42  String corpusname;
43 
44  public Corpus() {
45  }
46 
47  public Corpus(URL url) {
48  }
49 
50  //only read in the files we need!
51  public Corpus(ComaData coma, Collection<Class<? extends CorpusData>> clcds) throws MalformedURLException, MalformedURLException, MalformedURLException, SAXException, JexmaraldaException, URISyntaxException, IOException, ClassNotFoundException, JDOMException {
52  CorpusIO cio = new CorpusIO();
53  //todo: only read what we need :)
54  //cl.isInstance(cd) - needs to be read already for this :/
55  //TODO
56  //get the needed files from the NSLinks in the coma file as URLs
57  // public Collection<URL> URLtoList(URL url)
58  Collection<URL> urllist = coma.getReferencedCorpusDataURLs();
59  basedirectory = coma.getParentURL();
60  corpusname = coma.getCorpusName();
61  for (URL url : urllist) {
62  CorpusData cddd = cio.readFileURL(url, clcds);
63  if (cddd != null && !cdc.contains(cddd)) {
64  cdc.add(cddd);
65  }
66  }
67  //Coma is coma is
68  comadata = coma;
69  //Now create the needed
70  for (CorpusData cd : cdc) {
71  if (cd instanceof ContentData) {
72  contentdata.add((ContentData) cd);
73  if (cd instanceof BasicTranscriptionData) {
74  basictranscriptiondata.add((BasicTranscriptionData) cd);
75  } else if (cd instanceof SegmentedTranscriptionData) {
76  segmentedtranscriptiondata.add((SegmentedTranscriptionData) cd);
77  }
78  } else if (cd instanceof Recording) {
79  recording.add((Recording) cd);
80  } else if (cd instanceof AdditionalData) {
81  additionaldata.add((AdditionalData) cd);
82  } else if (cd instanceof Metadata) {
83  //can only be CMDI since it's a coma file...
84  metadata.add((Metadata) cd);
85  if (cd instanceof CmdiData) {
86  cmdidata.add((CmdiData) cd);
87  } else if (cd instanceof AnnotationSpecification) {
88  annotationspecification.add((AnnotationSpecification) cd);
89  } else if (cd instanceof ConfigParameters) {
90  configparameters.add((ConfigParameters) cd);
91  }
92  }
93  }
94  //we don't need to check it because we know it
95  cdc.add(coma);
96  }
97 
98  public Corpus(Collection<CorpusData> cdc) throws MalformedURLException, MalformedURLException, MalformedURLException, SAXException, JexmaraldaException {
99  for (CorpusData cd : cdc) {
100  if (cd instanceof ContentData) {
101  contentdata.add((ContentData) cd);
102  } else if (cd instanceof Recording) {
103  recording.add((Recording) cd);
104  } else if (cd instanceof AdditionalData) {
105  additionaldata.add((AdditionalData) cd);
106  } else if (cd instanceof Metadata) {
107  metadata.add((Metadata) cd);
108  } else if (cd instanceof AnnotationSpecification) {
109  annotationspecification.add((AnnotationSpecification) cd);
110  } else if (cd instanceof ConfigParameters) {
111  configparameters.add((ConfigParameters) cd);
112  } else if (cd instanceof CmdiData) {
113  cmdidata.add((CmdiData) cd);
114  }
115  }
116  //and also the other collections maybe
117  }
118 
119  public Collection<CorpusData> getCorpusData() {
120  return cdc;
121  }
122 
123  public Collection<Metadata> getMetadata() {
124  return metadata;
125  }
126 
127  public Collection<ContentData> getContentdata() {
128  return contentdata;
129  }
130 
131  public Collection<Recording> getRecording() {
132  return recording;
133  }
134 
135  public Collection<AdditionalData> getAdditionaldata() {
136  return additionaldata;
137  }
138 
139  public Collection<AnnotationSpecification> getAnnotationspecification() {
140  return annotationspecification;
141  }
142 
143  public Collection<ConfigParameters> getConfigparameters() {
144  return configparameters;
145  }
146 
147  public Collection<CmdiData> getCmdidata() {
148  return cmdidata;
149  }
150 
151  public Collection<BasicTranscriptionData> getBasicTranscriptionData() {
152  return basictranscriptiondata;
153  }
154 
155  public Collection<SegmentedTranscriptionData> getSegmentedTranscriptionData() {
156  return segmentedtranscriptiondata;
157  }
158 
160  return comadata;
161  }
162 
163  public void setMetadata(Collection<Metadata> metadata) {
164  this.metadata = metadata;
165  }
166 
167  public void setContentdata(Collection<ContentData> contentdata) {
168  this.contentdata = contentdata;
169  }
170 
171  public void setRecording(Collection<Recording> recording) {
172  this.recording = recording;
173  }
174 
175  public void setAdditionaldata(Collection<AdditionalData> additionaldata) {
176  this.additionaldata = additionaldata;
177  }
178 
179  public void setAnnotationspecification(Collection<AnnotationSpecification> annotationspecification) {
180  this.annotationspecification = annotationspecification;
181  }
182 
183  public void setConfigparameters(Collection<ConfigParameters> configparameters) {
184  this.configparameters = configparameters;
185  }
186 
187  public void setCdc(Collection<CorpusData> cdc) {
188  this.cdc = cdc;
189  }
190 
191  public void setCmdidata(Collection<CmdiData> cmdidata) {
192  this.cmdidata = cmdidata;
193  }
194 
195  public void setBasicTranscriptionData(Collection<BasicTranscriptionData> basictranscriptions) {
196  this.basictranscriptiondata = basictranscriptions;
197  }
198 
199  public void setSegmentedTranscriptionData(Collection<SegmentedTranscriptionData> segmentedtranscriptions) {
200  this.segmentedtranscriptiondata = segmentedtranscriptions;
201  }
202 
203  public void setComaData(ComaData coma) {
204  this.comadata = coma;
205  }
206 
207  public URL getBaseDirectory() {
208  return basedirectory;
209  }
210 
211  public String getCorpusName() {
212  return corpusname;
213  }
214 
215  public void setCorpusName(String s) {
216  corpusname = s;
217  }
218 
219  //TODO make this more sustainable, it is very INEL specific
220  String getCorpusSentenceNumber() throws JDOMException {
221  XPath xpath = XPath.newInstance("sum(//Transcription/Description/Key[@Name = '# HIAT:u'])");
222  double DoubleValue = (double) xpath.selectSingleNode(comadata.getJdom());
223  int IntValue = (int) DoubleValue;
224  return "" + IntValue;
225  }
226 
227  String getCorpusTranscriptionNumber() throws JDOMException {
228  XPath xpath = XPath.newInstance("count(//Transcription/Description/Key[@Name = 'segmented' and text() = 'false'])");
229  double DoubleValue = (double) xpath.selectSingleNode(comadata.getJdom());
230  int IntValue = (int) DoubleValue;
231  return "" + IntValue;
232  }
233 
234  String getCorpusSpeakerNumber() throws JDOMException {
235  XPath xpath = XPath.newInstance("count(//Speaker)");
236  double DoubleValue = (double) xpath.selectSingleNode(comadata.getJdom());
237  int IntValue = (int) DoubleValue;
238  return "" + IntValue;
239  }
240 
241  String getCorpusCommunicationNumber() throws JDOMException {
242  XPath xpath = XPath.newInstance("count(//Communication)");
243  double DoubleValue = (double) xpath.selectSingleNode(comadata.getJdom());
244  int IntValue = (int) DoubleValue;
245  return "" + IntValue;
246  }
247 
248  String getCorpusWords() throws JDOMException {
249  XPath xpath = XPath.newInstance("sum(//Transcription/Description/Key[@Name = '# HIAT:w'])");
250  double DoubleValue = (double) xpath.selectSingleNode(comadata.getJdom());
251  int IntValue = (int) DoubleValue;
252  return "" + IntValue;
253  }
254 }
void setRecording(Collection< Recording > recording)
Definition: Corpus.java:171
Collection< URL > getReferencedCorpusDataURLs()
Definition: ComaData.java:113
void setConfigparameters(Collection< ConfigParameters > configparameters)
Definition: Corpus.java:183
Collection< ContentData > getContentdata()
Definition: Corpus.java:127
Collection< Metadata > getMetadata()
Definition: Corpus.java:123
void setCdc(Collection< CorpusData > cdc)
Definition: Corpus.java:187
Collection< AnnotationSpecification > getAnnotationspecification()
Definition: Corpus.java:139
void setCorpusName(String s)
Definition: Corpus.java:215
Corpus(Collection< CorpusData > cdc)
Definition: Corpus.java:98
void setAdditionaldata(Collection< AdditionalData > additionaldata)
Definition: Corpus.java:175
Collection< CorpusData > getCorpusData()
Definition: Corpus.java:119
void setContentdata(Collection< ContentData > contentdata)
Definition: Corpus.java:167
CorpusData readFileURL(URL url, Collection< Class<?extends CorpusData >> clcds)
Definition: CorpusIO.java:125
Collection< AdditionalData > getAdditionaldata()
Definition: Corpus.java:135
Corpus(ComaData coma, Collection< Class<?extends CorpusData >> clcds)
Definition: Corpus.java:51
Collection< CmdiData > getCmdidata()
Definition: Corpus.java:147
Collection< ConfigParameters > getConfigparameters()
Definition: Corpus.java:143
Collection< Recording > getRecording()
Definition: Corpus.java:131
void setMetadata(Collection< Metadata > metadata)
Definition: Corpus.java:163
void setBasicTranscriptionData(Collection< BasicTranscriptionData > basictranscriptions)
Definition: Corpus.java:195
Collection< BasicTranscriptionData > getBasicTranscriptionData()
Definition: Corpus.java:151
void setCmdidata(Collection< CmdiData > cmdidata)
Definition: Corpus.java:191
Collection< SegmentedTranscriptionData > getSegmentedTranscriptionData()
Definition: Corpus.java:155
void setSegmentedTranscriptionData(Collection< SegmentedTranscriptionData > segmentedtranscriptions)
Definition: Corpus.java:199
void setAnnotationspecification(Collection< AnnotationSpecification > annotationspecification)
Definition: Corpus.java:179
void setComaData(ComaData coma)
Definition: Corpus.java:203