corpus-services  1.0
ListHTML.java
Go to the documentation of this file.
1 /*
2  * To change this license header, choose License Headers in Project Properties.
3  * To change this template file, choose Tools | Templates
4  * and open the template in the editor.
5  */
6 package de.uni_hamburg.corpora.visualization;
7 
15 import javax.xml.transform.TransformerConfigurationException;
16 import javax.xml.transform.TransformerException;
17 import org.exmaralda.common.corpusbuild.FileIO;
18 import org.exmaralda.common.jdomutilities.IOUtilities;
19 import org.exmaralda.partitureditor.fsm.FSMException;
20 import org.exmaralda.partitureditor.jexmaralda.ListTranscription;
21 import org.exmaralda.partitureditor.jexmaralda.SegmentedTranscription;
22 import org.exmaralda.partitureditor.jexmaralda.segment.CHATSegmentation;
23 import org.exmaralda.partitureditor.jexmaralda.segment.GATSegmentation;
24 import org.exmaralda.partitureditor.jexmaralda.segment.GenericSegmentation;
25 import org.exmaralda.partitureditor.jexmaralda.segment.IPASegmentation;
26 import org.exmaralda.partitureditor.jexmaralda.segment.SegmentedToListInfo;
27 import org.jdom.Document;
28 import org.xml.sax.SAXException;
29 import java.io.IOException;
30 import java.io.PrintWriter;
31 import java.io.UnsupportedEncodingException;
32 import java.net.URL;
33 import java.nio.file.Files;
34 import java.nio.file.Paths;
35 import java.util.Collection;
36 import java.util.logging.Level;
37 import java.util.logging.Logger;
38 import java.util.regex.Matcher;
39 import java.util.regex.Pattern;
40 import org.exmaralda.partitureditor.jexmaralda.segment.HIATSegmentation;
41 
49 public class ListHTML extends Visualizer {
50 
51  private String utteranceList = null;
52 
53  // resources loaded from directory supplied in pom.xml
54  private static final String STYLESHEET_PATH = "/xsl/HIAT2ListHTML.xsl";
55  private static final String GAT_STYLESHEET_PATH = "/xsl/GAT2ListHTML.xsl";
56  private static final String GENERIC_STYLESHEET_PATH = "/xsl/Generic2ListHTML.xsl";
57  private static final String SERVICE_NAME = "ListHTML";
58  //static String INEL_FSM = "/de/uni_hamburg/corpora/utilities/segmentation/INEL_Segmentation_FSM.xml";
59  //static String INEL_FSM = "/org/exmaralda/partitureditor/fsm/xml/HIAT_UtteranceWord.xml";
60 
61  URL targeturl;
62  CorpusData cd;
63  String corpusname = "";
64  String path2ExternalFSM = "";
65  String segmentationAlgorithm = "GENERIC";
66 
67  public ListHTML() {
68  }
69 
70  public ListHTML(String btAsString, String segmAlgorithm) {
71  Report stats = new Report();
72  try {
73  createFromBasicTranscription(btAsString, segmAlgorithm);
74  } catch (Exception ex) {
75  stats.addException(SERVICE_NAME, ex, "Exception");
76  }
77  }
78 
85  private String createFromBasicTranscription(String btAsString, String segmAlgorithm) throws Exception {
86 
87  basicTranscriptionString = btAsString;
89  segmentationAlgorithm = segmAlgorithm;
90 
91  String result = null;
92  // create an utterance list as XML basis for transformation
93  createUtteranceList();
94  if (getUtteranceList() != null) {
95  // get the XSLT stylesheet
96  String xsl = "";
97  if (segmAlgorithm.equals("HIAT")) {
99  getClass().getResourceAsStream(STYLESHEET_PATH));
100  } else if (segmAlgorithm.equals("GAT")) {
102  getClass().getResourceAsStream(GAT_STYLESHEET_PATH));
103  } else if (segmAlgorithm.equals("GENERIC")) {
105  getClass().getResourceAsStream(GENERIC_STYLESHEET_PATH));
106  } else {
108  getClass().getResourceAsStream(GENERIC_STYLESHEET_PATH));
109  }
110 
111  // create XSLTransformer and set the parameters
112  XSLTransformer xt = new XSLTransformer();
113  xt.setParameter("EMAIL_ADDRESS", EMAIL_ADDRESS);
114  xt.setParameter("WEBSERVICE_NAME", SERVICE_NAME + " (" + segmAlgorithm + ")");
115  xt.setParameter("HZSK_WEBSITE", HZSK_WEBSITE);
116  xt.setParameter("TRANSCRIPTION_NAME", cd.getFilenameWithoutFileEnding());
117  //xt.setParameter("CORPUS_NAME", cd.getFilenameWithoutFileEnding());
118  if (!corpusname.equals("")) {
119  xt.setParameter("CORPUS_NAME", corpusname);
120  }
121  // perform XSLT transformation
122  result = xt.transform(getUtteranceList(), xsl);
123  if (result != null) {
124 
125  // replace JS/CSS placeholders from XSLT output
126  Pattern regex = Pattern.compile("(<hzsk\\-pi:include( xmlns:hzsk\\-pi=\"https://corpora\\.uni\\-hamburg\\.de/hzsk/xmlns/processing\\-instruction\")?>([^<]+)</hzsk\\-pi:include>)", Pattern.DOTALL);
127  Matcher m = regex.matcher(result);
128  StringBuffer sb = new StringBuffer();
129  while (m.find()) {
130  String insertion = TypeConverter.InputStream2String(getClass().getResourceAsStream(m.group(3)));
131  m.appendReplacement(sb, m.group(0).replaceFirst(Pattern.quote(m.group(1)), insertion));
132  }
133  m.appendTail(sb);
134  result = sb.toString();
135  setHTML(result);
136  }
137  }
138  //System.out.println(result);
139  return result;
140  }
141 
149  private void createUtteranceList() throws Exception {
150 
151  String list = null;
152 
153  switch (segmentationAlgorithm) {
154  case "HIAT": {
155  HIATSegmentation hS = new HIATSegmentation();
156  if (!path2ExternalFSM.equals("")) {
157  hS.pathToExternalFSM = path2ExternalFSM;
158  }
159  ListTranscription lt = hS.BasicToUtteranceList(basicTranscription);
160  final Document listXML = FileIO.readDocumentFromString(lt.toXML());
161  list = IOUtilities.documentToString(listXML);
162  break;
163  }
164  case "CHAT": {
165  CHATSegmentation cS = new CHATSegmentation();
166  if (!path2ExternalFSM.equals("")) {
167  cS.pathToExternalFSM = path2ExternalFSM;
168  }
169  ListTranscription lt = cS.BasicToUtteranceList(basicTranscription);
170  final Document listXML = FileIO.readDocumentFromString(lt.toXML());
171  list = IOUtilities.documentToString(listXML);
172  break;
173  }
174  case "GAT": {
175  GATSegmentation gS = new GATSegmentation();
176  if (!path2ExternalFSM.equals("")) {
177  gS.pathToExternalFSM = path2ExternalFSM;
178  }
179  ListTranscription lt = gS.BasicToIntonationUnitList(basicTranscription);
180  final Document listXML = FileIO.readDocumentFromString(lt.toXML());
181  list = IOUtilities.documentToString(listXML);
182  break;
183  }
184  case "IPA": {
185  IPASegmentation ipaS = new IPASegmentation();
186  if (!path2ExternalFSM.equals("")) {
187  ipaS.pathToExternalFSM = path2ExternalFSM;
188  }
189  SegmentedTranscription st = ipaS.BasicToSegmented(basicTranscription);
190  ListTranscription lt = st.toListTranscription(new SegmentedToListInfo(st, SegmentedToListInfo.TURN_SEGMENTATION));
191  final Document listXML = FileIO.readDocumentFromString(lt.toXML());
192  list = IOUtilities.documentToString(listXML);
193  break;
194  }
195  case "Generic": {
196  GenericSegmentation genS = new GenericSegmentation();
197  if (!path2ExternalFSM.equals("")) {
198  genS.pathToExternalFSM = path2ExternalFSM;
199  }
200  SegmentedTranscription st = genS.BasicToSegmented(basicTranscription);
201  ListTranscription lt = st.toListTranscription(new SegmentedToListInfo(st, SegmentedToListInfo.TURN_SEGMENTATION));
202  final Document listXML = FileIO.readDocumentFromString(lt.toXML());
203  list = IOUtilities.documentToString(listXML);
204  break;
205  }
206  default:
207  throw new Exception("createUtteranceList - unsupported parameter segmAlgorithm='" + segmentationAlgorithm + "'");
208  }
209 
210  setUtteranceList(list);
211  }
212 
220  public void setUtteranceList(String u) {
221  utteranceList = u;
222  }
223 
231  public String getUtteranceList() {
232  return utteranceList;
233  }
234 
235  public static void main(String[] args) {
236  ListHTML lhtml = new ListHTML();
237  Report stats = lhtml.doMain(args);
238  System.out.println(stats.getSummaryLines());
239  System.out.println(stats.getErrorReports());
240  }
241 
242  @Override
243  public Report function(CorpusData ccd) {
244  Report stats = new Report();
245  try {
246  cd = ccd;
247  String result = createFromBasicTranscription(cd.toUnformattedString(), segmentationAlgorithm);
248  targeturl = new URL(cd.getParentURL() + cd.getFilenameWithoutFileEnding() + "_list.html");
249  CorpusIO cio = new CorpusIO();
250  if (result == null) {
251  stats.addCritical(SERVICE_NAME, cd, "Visualization of file was not possible!");
252  } else {
253  cio.write(result, targeturl);
254  stats.addCorrect(SERVICE_NAME, cd, "Visualization of file was successfully saved at " + targeturl);
255  }
256  } catch (IOException ex) {
257  stats.addException(SERVICE_NAME, ex, "IO Exception");
258  } catch (Exception ex) {
259  stats.addException(SERVICE_NAME, ex, "Exception");
260  }
261  return stats;
262  }
263 
264  @Override
265  public Report function(Corpus co) throws TransformerException, TransformerConfigurationException, IOException, SAXException {
266  Report stats = new Report();
267  Collection<BasicTranscriptionData> btc = co.getBasicTranscriptionData();
268  for (BasicTranscriptionData bt : btc) {
269  stats.merge(function(bt));
270  }
271  return stats;
272  }
273 
274  @Override
275  public Collection<Class<? extends CorpusData>> getIsUsableFor() {
276  try {
277  Class cl = Class.forName("de.uni_hamburg.corpora.BasicTranscriptionData");
278  IsUsableFor.add(cl);
279  } catch (ClassNotFoundException ex) {
280  report.addException(ex, "Usable class not found.");
281  }
282  return IsUsableFor;
283  }
284 
285  public Report
286  doMain(String[] args) {
287  try {
288  if (args.length < 2) {
289  System.out.println("Usage: " + ListHTML.class
290  .getName()
291  + "EXB SEGMENTATION [HTML]");
292  System.out.println("\nSEGMENTATION is one of: "
293  + " HIAT, CHAT, IPA, Generic");
294  System.exit(1);
295  } else {
296  byte[] encoded = Files.readAllBytes(Paths.get(args[0]));
297  String btString = new String(encoded, "UTF-8");
298  ListHTML list = new ListHTML(btString, args[1]);
299  if (args.length >= 3) {
300  PrintWriter htmlOut = new PrintWriter(args[2]);
301  htmlOut.print(list.getHTML());
302  htmlOut.close();
303  } else {
304  System.out.println(list.getHTML());
305  }
306  }
307  } catch (UnsupportedEncodingException uee) {
308  uee.printStackTrace();
309  } catch (IOException ioe) {
310  ioe.printStackTrace();
311  }
312  return report;
313  }
314 
315  public URL getTargetURL() {
316  return targeturl;
317  }
318 
319  public String getSegmentation() {
320  return segmentationAlgorithm;
321  }
322 
323  public void setSegmentation(String s) {
324  segmentationAlgorithm = s;
325  }
326 
327  public void setCorpusName(String s) {
328  corpusname = s;
329  }
330 
331  public void setExternalFSM(String s) {
332  path2ExternalFSM = s;
333  }
334 
335  @Override
336  public String getDescription() {
337  String description = "This class creates an html visualization "
338  + "in the List format from an exb. ";
339  return description;
340  }
341 
342 }
void setParameter(String parameterName, Object parameterValue)
void merge(Report sr)
Definition: Report.java:73
void addCritical(String description)
Definition: Report.java:104
static BasicTranscription String2BasicTranscription(String btAsString)
static String InputStream2String(InputStream is)
Collection< Class<?extends CorpusData > > getIsUsableFor()
Definition: ListHTML.java:275
void addCorrect(String statId, String description)
Definition: Report.java:217
ListHTML(String btAsString, String segmAlgorithm)
Definition: ListHTML.java:70
void addException(Throwable e, String description)
Definition: Report.java:287