corpus-services  1.0
AddCSVMetadataToComa.java
Go to the documentation of this file.
1 package de.uni_hamburg.corpora.conversion;
2 
3 import com.opencsv.CSVReader;
8 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
9 import org.xml.sax.SAXException;
10 import java.io.BufferedWriter;
11 import java.io.FileNotFoundException;
12 import java.io.FileOutputStream;
13 import java.io.FileReader;
14 import java.io.IOException;
15 import java.io.OutputStreamWriter;
16 import java.io.Writer;
17 import java.net.URISyntaxException;
18 import java.security.NoSuchAlgorithmException;
19 import java.util.Arrays;
20 import java.util.Collection;
21 import java.util.List;
22 import javax.xml.parsers.ParserConfigurationException;
23 import javax.xml.transform.TransformerException;
24 import javax.xml.xpath.XPathExpressionException;
25 import org.exmaralda.partitureditor.fsm.FSMException;
26 import org.jdom.Document;
27 import org.jdom.Element;
28 import org.jdom.JDOMException;
29 import org.jdom.output.XMLOutputter;
30 import org.jdom.xpath.XPath;
31 
41 public class AddCSVMetadataToComa extends Converter implements CorpusFunction {
42 
43  private String comaFile;
44  private String csvFile;
45  private Document coma;
46  private String SpeakerOrCommunication;
47  private Boolean IsSpeaker;
48 
52  public AddCSVMetadataToComa(String corpusPath) {
53  }
54 
56  }
57 
61  public AddCSVMetadataToComa(String corpusPath, String csvPath, String SpeakerOrCommunication) {
62  this.comaFile = corpusPath;
63  this.csvFile = csvPath;
64  this.SpeakerOrCommunication = SpeakerOrCommunication;
65  if (SpeakerOrCommunication.equals("speaker")) {
66  IsSpeaker = true;
67  }
68  if (SpeakerOrCommunication.equals("communication")) {
69  IsSpeaker = false;
70  }
71  }
72 
78  public Report check(CorpusData cd) throws SAXException, JexmaraldaException {
79  Report stats = new Report();
80  try {
81  stats = function(cd);
82  } catch (ParserConfigurationException pce) {
83  stats.addException(pce, function, cd, "Unknown parsing error");
84  } catch (IOException ioe) {
85  stats.addException(ioe, function, cd, "Unknown file reading error");
86  } catch (JDOMException ex) {
87  stats.addException(ex, function, cd, "Unknown JDOM error");
88  }
89  return stats;
90  }
91 
96  public Report function(CorpusData cd)
97  throws SAXException, IOException, ParserConfigurationException, JexmaraldaException, JDOMException {
98  Report stats = new Report(); //create a new report
99  this.comaFile = cd.getURL().getPath(); // set the path of the coma file
100  List<String[]> allElements = readData(); // read the data from the csv file
101  // put the elements in the report
102  for (String[] row : allElements) {
103  System.out.println(Arrays.toString(row));
104  stats.addNote(function, cd, Arrays.toString(row));
105  }
106  System.out.println(Arrays.toString(allElements.get(0)));
107  stats.addNote(function, cd, Arrays.toString(allElements.get(0)));
108  System.out.println(allElements.get(0)[0]);
109  stats.addNote(function, cd, allElements.get(0)[0]);
110 
111  coma = org.exmaralda.common.jdomutilities.IOUtilities.readDocumentFromLocalFile(comaFile);
112  //add the key and value to speaker/description or communication/description
113  for (int i = 1; i < allElements.size(); i++) {
114  for (int a = 1; a < allElements.get(i).length; a++) {
115  if (IsSpeaker) {
116  //the place is the xpath where it should be inserted
117  String place = "//Speaker[Sigle/text()=\"" + allElements.get(i)[0] + "\"]/Description";
118  System.out.println(place);
119  stats.addNote(function, cd, place);
120  XPath p = XPath.newInstance(place);
121  //System.out.println(p.selectSingleNode(coma));
122  Object o = p.selectSingleNode(coma);
123  if (o != null) {
124  Element desc = (Element) o;
125  //the new Key element that is inserted
126  Element key = new Element("Key");
127  desc.addContent(key);
128  key.setAttribute("Name", allElements.get(0)[a]);
129  System.out.println(desc.getAttributes());
130  stats.addNote(function, cd, Arrays.toString(desc.getAttributes().toArray()));
131  key.setText(allElements.get(i)[a]);
132  }
133  }
134  if (!IsSpeaker) {
135  //the place is the xpath where it should be inserted
136  String place = "//Communication[@Name=\"" + allElements.get(i)[0] + "\"]/Description";
137  System.out.println(place);
138  stats.addNote(function, cd, place);
139  XPath p = XPath.newInstance(place);
140  System.out.println(p.selectSingleNode(coma));
141  stats.addNote(function, cd, p.selectSingleNode(coma).toString());
142  Object o = p.selectSingleNode(coma);
143  if (o != null) {
144  Element desc = (Element) o;
145  //the new Key element that is inserted
146  Element key = new Element("Key");
147  desc.addContent(key);
148  key.setAttribute("Name", allElements.get(0)[a]);
149  System.out.println(desc.getAttributes());
150  stats.addNote(function, cd, Arrays.toString(desc.getAttributes().toArray()));
151  key.setText(allElements.get(i)[a]);
152  }
153  }
154  }
155  }
156  //save the coma file!
157  Writer fileWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(comaFile), "UTF8"));
158  XMLOutputter serializer = new XMLOutputter();
159  serializer.output(coma, fileWriter);
160  stats.addNote(function, cd, "The data in the csv file has been added into the coma.");
161  return stats;
162  }
163 
167  public void inputData() throws IOException, JDOMException {
169  }
170 
174  public List<String[]> readData() throws FileNotFoundException, IOException {
175  CSVReader reader = new CSVReader(new FileReader(csvFile), ';');
176  List<String[]> allElements = null;
177  allElements = reader.readAll();
178  return allElements;
179  }
180 
184  public void insertDataIntoComa(List<String[]> allElements) throws JDOMException, IOException {
185 
186  for (String[] row : allElements) {
187  System.out.println(Arrays.toString(row));
188 
189  //first row = keys
190  //other rows = values
191  //first column = communication or speaker name
192  }
193  System.out.println(Arrays.toString(allElements.get(0)));
194 
195  System.out.println(allElements.get(0)[0]);
196  coma = org.exmaralda.common.jdomutilities.IOUtilities.readDocumentFromLocalFile(comaFile);
197  //add the key and value to speaker/description or communication/description
198  for (int i = 1; i < allElements.size(); i++) {
199  for (int a = 1; a < allElements.get(i).length; a++) {
200  if (IsSpeaker) {
201  //the place is the xpath where it should be inserted
202  String place = "//Speaker[Sigle/text()=\"" + allElements.get(i)[0] + "\"]/Description";
203  System.out.println(place);
204  XPath p = XPath.newInstance(place);
205  //System.out.println(p.selectSingleNode(coma));
206  Object o = p.selectSingleNode(coma);
207  if (o != null) {
208  Element desc = (Element) o;
209  //the new Key element that is inserted
210  Element key = new Element("Key");
211  desc.addContent(key);
212  key.setAttribute("Name", allElements.get(0)[a]);
213  System.out.println(desc.getAttributes());
214  key.setText(allElements.get(i)[a]);
215  }
216  }
217  if (!IsSpeaker) {
218  //the place is the xpath where it should be inserted
219  String place = "//Communication[@Name=\"" + allElements.get(i)[0] + "\"]/Description";
220  System.out.println(place);
221  XPath p = XPath.newInstance(place);
222  System.out.println(p.selectSingleNode(coma));
223  Object o = p.selectSingleNode(coma);
224  if (o != null) {
225  Element desc = (Element) o;
226  //the new Key element that is inserted
227  Element key = new Element("Key");
228  desc.addContent(key);
229  key.setAttribute("Name", allElements.get(0)[a]);
230  System.out.println(desc.getAttributes());
231  key.setText(allElements.get(i)[a]);
232  }
233  }
234  }
235  }
236  //save the coma file!
237  Writer fileWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(comaFile), "UTF8"));
238  XMLOutputter serializer = new XMLOutputter();
239  serializer.output(coma, fileWriter);
240  }
241 
242  //@Override
243  public String getXpathToTranscriptions() {
244  throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
245 
246  }
247 
248  //@Override
249  public void process(String filename) throws JexmaraldaException, SAXException {
250  throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
251  }
252 
253 
254  // sets the CSV file path which is provided as input
255  public void setCSVFilePath(String path) {
256  this.csvFile = path;
257  }
258 
259  // set what sort of data the csv file contain which will eventually be added to the coma
260  public void setSpeakerOrCommunication(String spOrCommInput) {
261  if (spOrCommInput.equals("speaker")) {
262  IsSpeaker = true;
263  }
264  if (spOrCommInput.equals("communication")) {
265  IsSpeaker = false;
266  }
267  }
268 
274  @Override
275  public Collection<Class<? extends CorpusData>> getIsUsableFor() {
276  try {
277  Class cl = Class.forName("de.uni_hamburg.corpora.ComaData");
278  IsUsableFor.add(cl);
279  } catch (ClassNotFoundException ex) {
280  report.addException(ex, "unknown class not found error");
281  }
282  return IsUsableFor;
283  }
284 
285  @Override
286  public String getDescription() {
287  String description = "this class can be used from the command line to insert data in a csv file "
288  + " into an existing coma file there needs to be a header with information of the "
289  + " information in the columns the first line has to consist of the sigle of the "
290  + " speaker or name of the communication the metadata should be assigned to";
291  return description;
292  }
293 
294  @Override
295  public Report function(Corpus c) throws Exception, NoSuchAlgorithmException, ClassNotFoundException, FSMException, URISyntaxException, SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException, JDOMException {
296  throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
297  }
298 
299 
300 }
Collection< Class<?extends CorpusData > > getIsUsableFor()
void addNote(String statId, String description)
Definition: Report.java:245
AddCSVMetadataToComa(String corpusPath, String csvPath, String SpeakerOrCommunication)
void addException(Throwable e, String description)
Definition: Report.java:287