hzsk-corpus-services  0.2
ExbSegmenter.java
Go to the documentation of this file.
1 
10 package de.uni_hamburg.corpora.validation;
11 
19 import java.io.IOException;
20 import java.io.File;
21 import java.io.UnsupportedEncodingException;
22 import java.net.URL;
23 import java.util.Collection;
24 import java.util.List;
25 import java.util.ArrayList;
26 import java.util.logging.Level;
27 import java.util.logging.Logger;
28 import javax.xml.parsers.ParserConfigurationException;
29 import javax.xml.transform.TransformerException;
30 import javax.xml.xpath.XPathExpressionException;
31 
32 import org.apache.commons.cli.Option;
33 import org.xml.sax.SAXException;
34 
35 import org.exmaralda.partitureditor.jexmaralda.BasicTranscription;
36 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
37 import org.exmaralda.partitureditor.jexmaralda.segment.AbstractSegmentation;
38 import org.exmaralda.partitureditor.fsm.FSMException;
39 import org.exmaralda.partitureditor.jexmaralda.SegmentedTranscription;
40 import org.jdom.Document;
41 import org.jdom.JDOMException;
42 
46 public class ExbSegmenter extends Checker implements CorpusFunction {
47 
48  static String filename;
49  static BasicTranscription bt;
50  static BasicTranscriptionData btd;
51  static File exbfile;
52  AbstractSegmentation segmentation;
53  static ValidatorSettings settings;
54  final String EXB_SEG = "exb-segmentation-checker";
55  String segmentationName = "GENERIC";
56  String path2ExternalFSM = "";
57 
58  public static Report check(File f) {
59  Report stats = new Report();
60  try {
61  stats = exceptionalCheck(f);
62  } catch (SAXException saxe) {
63  saxe.printStackTrace();
64  } catch (JexmaraldaException je) {
65  je.printStackTrace();
66  }
67  return stats;
68  }
69 
70  public static Report
71  exceptionalCheck(File f) throws SAXException, JexmaraldaException {
72  filename = f.getAbsolutePath();
73  bt = new BasicTranscription(filename);
74 
75  //EditErrorsDialog eed = new EditErrorsDialog(table.parent, false);
76  //eed.setOpenSaveButtonsVisible(false);
77  //eed.setTitle("Structure errors");
78  //eed.addErrorCheckerListener(table);
79  //eed.setErrorList(errorsDocument);
80  //eed.setLocationRelativeTo(table);
81  //eed.setVisible(true);
82  return new Report();
83  }
84 
85  public static void main(String[] args) {
86  settings = new ValidatorSettings("ExbSegmentationChecker",
87  "Checks Exmaralda .exb file for segmentation problems",
88  "If input is a directory, performs recursive check "
89  + "from that directory, otherwise checks input file");
90  settings.handleCommandLine(args, new ArrayList<Option>());
91  if (settings.isVerbose()) {
92  System.out.println("Checking EXB files for segmentation "
93  + "problems...");
94  }
95  for (File f : settings.getInputFiles()) {
96  if (settings.isVerbose()) {
97  System.out.println(" * " + f.getName());
98  }
99  Report stats = check(f);
100  if (settings.isVerbose()) {
101  System.out.println(stats.getFullReports());
102  } else {
103  System.out.println(stats.getSummaryLines());
104  }
105  }
106  }
107 
113  @Override
114  public Report check(CorpusData cd) throws SAXException, JexmaraldaException {
115  Report stats = new Report();
116  try {
117  stats = exceptionalCheck(cd);
118  } catch (SAXException saxe) {
119  saxe.printStackTrace();
120  } catch (JexmaraldaException je) {
121  je.printStackTrace();
122  } catch (IOException ex) {
123  stats.addException(ex, "Unknown read error");
124  } catch (ParserConfigurationException ex) {
125  stats.addException(ex, "Unknown read error");
126  }
127  return stats;
128  }
129 
135  throws SAXException, IOException, ParserConfigurationException, JexmaraldaException {
136  Report stats = new Report();
137  btd = new BasicTranscriptionData(cd.getURL());
138  if (segmentationName.equals("HIAT")) {
139  segmentation = new org.exmaralda.partitureditor.jexmaralda.segment.HIATSegmentation();
140  } else if (segmentationName.equals("GAT")) {
141  segmentation = new org.exmaralda.partitureditor.jexmaralda.segment.GATSegmentation();
142  } else if (segmentationName.equals("cGAT_MINIMAL")) {
143  segmentation = new org.exmaralda.partitureditor.jexmaralda.segment.cGATMinimalSegmentation();
144  } else if (segmentationName.equals("CHAT")) {
145  segmentation = new org.exmaralda.partitureditor.jexmaralda.segment.CHATSegmentation();
146  } else if (segmentationName.equals("CHAT_MINIMAL")) {
147  segmentation = new org.exmaralda.partitureditor.jexmaralda.segment.CHATMinimalSegmentation();
148  } else if (segmentationName.equals("DIDA")) {
149  segmentation = new org.exmaralda.partitureditor.jexmaralda.segment.DIDASegmentation();
150  } else if (segmentationName.equals("IPA")) {
151  segmentation = new org.exmaralda.partitureditor.jexmaralda.segment.IPASegmentation();
152  } else {
153  segmentation = new org.exmaralda.partitureditor.jexmaralda.segment.GenericSegmentation();
154  }
155  if (!path2ExternalFSM.equals("")) {
156  segmentation.pathToExternalFSM = path2ExternalFSM;
157  }
158  List v = segmentation.getSegmentationErrors(btd.getEXMARaLDAbt());
159  for (Object o : v) {
160  FSMException fsme = (FSMException) o;
161  String text = fsme.getMessage();
162  stats.addCritical(EXB_SEG, cd, text);
163  exmaError.addError(EXB_SEG, filename, fsme.getTierID(), fsme.getTLI(), false, text);
164  }
165  return stats;
166  }
167 
171  @Override
172  public Report fix(CorpusData cd) throws SAXException, JexmaraldaException {
173  Report stats = new Report();
174  try {
175  stats = exceptionalFix(cd);
176  } catch (SAXException saxe) {
177  saxe.printStackTrace();
178  } catch (JexmaraldaException je) {
179  je.printStackTrace();
180  } catch (IOException ex) {
181  stats.addException(ex, "Unknown read error");
182  } catch (JDOMException ex) {
183  stats.addException(ex, "Unknown JDOM error");
184  } catch (FSMException ex) {
185  stats.addException(ex, "Unknown FSM error");
186  } catch (TransformerException ex) {
187  stats.addException(ex, "Unknown Transformer error");
188  } catch (ParserConfigurationException ex) {
189  stats.addException(ex, "Unknown Parser error");
190  } catch (XPathExpressionException ex) {
191  stats.addException(ex, "Unknown XPath error");
192  }
193  return stats;
194  }
195 
196 
197  public Report exceptionalFix(CorpusData cd) throws SAXException, JDOMException, IOException, JexmaraldaException, FSMException, TransformerException, ParserConfigurationException, UnsupportedEncodingException, XPathExpressionException {
198  Report stats = new Report();
199  btd = new BasicTranscriptionData(cd.getURL());
200  if (segmentationName.equals("HIAT")) {
201  segmentation = new org.exmaralda.partitureditor.jexmaralda.segment.HIATSegmentation();
202  } else if (segmentationName.equals("GAT")) {
203  segmentation = new org.exmaralda.partitureditor.jexmaralda.segment.GATSegmentation();
204  } else if (segmentationName.equals("cGAT_MINIMAL")) {
205  segmentation = new org.exmaralda.partitureditor.jexmaralda.segment.cGATMinimalSegmentation();
206  } else if (segmentationName.equals("CHAT")) {
207  segmentation = new org.exmaralda.partitureditor.jexmaralda.segment.CHATSegmentation();
208  } else if (segmentationName.equals("CHAT_MINIMAL")) {
209  segmentation = new org.exmaralda.partitureditor.jexmaralda.segment.CHATMinimalSegmentation();
210  } else if (segmentationName.equals("DIDA")) {
211  segmentation = new org.exmaralda.partitureditor.jexmaralda.segment.DIDASegmentation();
212  } else if (segmentationName.equals("IPA")) {
213  segmentation = new org.exmaralda.partitureditor.jexmaralda.segment.IPASegmentation();
214  } else {
215  segmentation = new org.exmaralda.partitureditor.jexmaralda.segment.GenericSegmentation();
216  }
217  if (!path2ExternalFSM.equals("")) {
218  segmentation.pathToExternalFSM = path2ExternalFSM;
219  }
220  List v = segmentation.getSegmentationErrors(btd.getEXMARaLDAbt());
221  if (v.isEmpty()){
222  SegmentedTranscription st = segmentation.BasicToSegmented(btd.getEXMARaLDAbt());
223  URL url = new URL(cd.getParentURL() + cd.getFilenameWithoutFileEnding() + "_s.exs");
224  CorpusIO cio = new CorpusIO();
225  Document doc = TypeConverter.String2JdomDocument(st.toXML());
226  cio.write(doc, url);
227  stats.addCorrect(EXB_SEG, cd, "Exs successfully created at " + url);
228  } else {
229  for (Object o : v) {
230  FSMException fsme = (FSMException) o;
231  String text = fsme.getMessage();
232  stats.addCritical(EXB_SEG, cd, text);
233  exmaError.addError(EXB_SEG, filename, fsme.getTierID(), fsme.getTLI(), false, text);
234  }
235  }
236  return stats;
237  }
238 
244  @Override
245  public Collection<Class<? extends CorpusData>> getIsUsableFor() {
246  try {
247  Class cl = Class.forName("de.uni_hamburg.corpora.BasicTranscriptionData");
248  IsUsableFor.add(cl);
249  } catch (ClassNotFoundException ex) {
250  Logger.getLogger(ExbSegmenter.class.getName()).log(Level.SEVERE, null, ex);
251  }
252  return IsUsableFor;
253  }
254 
255  public void setSegmentation(String s) {
256  segmentationName = s;
257  }
258 
259  public void setExternalFSM(String s) {
260  path2ExternalFSM = s;
261  }
262 }
Collection< Class<?extends CorpusData > > getIsUsableFor()
CommandLine handleCommandLine(String[] args, List< Option > extraOptions)
void addCritical(String description)
Definition: Report.java:101
void addCorrect(String statId, String description)
Definition: Report.java:205
static org.jdom.Document String2JdomDocument(String stringRespresentingDocument)
void addException(Throwable e, String description)
Definition: Report.java:275