6 package de.uni_hamburg.corpora.visualization;
15 import javax.xml.
transform.TransformerConfigurationException;
16 import javax.xml.transform.TransformerException;
17 import org.exmaralda.common.corpusbuild.FileIO;
18 import org.exmaralda.common.jdomutilities.IOUtilities;
19 import org.exmaralda.partitureditor.fsm.FSMException;
20 import org.exmaralda.partitureditor.jexmaralda.ListTranscription;
21 import org.exmaralda.partitureditor.jexmaralda.SegmentedTranscription;
22 import org.exmaralda.partitureditor.jexmaralda.segment.CHATSegmentation;
23 import org.exmaralda.partitureditor.jexmaralda.segment.GATSegmentation;
24 import org.exmaralda.partitureditor.jexmaralda.segment.GenericSegmentation;
25 import org.exmaralda.partitureditor.jexmaralda.segment.IPASegmentation;
26 import org.exmaralda.partitureditor.jexmaralda.segment.SegmentedToListInfo;
27 import org.jdom.Document;
28 import org.xml.sax.SAXException;
29 import java.io.IOException;
30 import java.io.PrintWriter;
31 import java.io.UnsupportedEncodingException;
33 import java.nio.file.Files;
34 import java.nio.file.Paths;
35 import java.util.Collection;
36 import java.util.logging.Level;
37 import java.util.logging.Logger;
38 import java.util.regex.Matcher;
39 import java.util.regex.Pattern;
40 import org.exmaralda.partitureditor.jexmaralda.segment.HIATSegmentation;
51 private String utteranceList = null;
54 private static final String STYLESHEET_PATH =
"/xsl/HIAT2ListHTML.xsl";
55 private static final String GAT_STYLESHEET_PATH =
"/xsl/GAT2ListHTML.xsl";
56 private static final String GENERIC_STYLESHEET_PATH =
"/xsl/Generic2ListHTML.xsl";
57 private static final String SERVICE_NAME =
"ListHTML";
63 String corpusname =
"";
64 String path2ExternalFSM =
"";
65 String segmentationAlgorithm =
"GENERIC";
70 public ListHTML(String btAsString, String segmAlgorithm) {
73 createFromBasicTranscription(btAsString, segmAlgorithm);
74 }
catch (Exception ex) {
85 private String createFromBasicTranscription(String btAsString, String segmAlgorithm)
throws Exception {
89 segmentationAlgorithm = segmAlgorithm;
93 createUtteranceList();
97 if (segmAlgorithm.equals(
"HIAT")) {
99 getClass().getResourceAsStream(STYLESHEET_PATH));
100 }
else if (segmAlgorithm.equals(
"GAT")) {
102 getClass().getResourceAsStream(GAT_STYLESHEET_PATH));
103 }
else if (segmAlgorithm.equals(
"GENERIC")) {
105 getClass().getResourceAsStream(GENERIC_STYLESHEET_PATH));
108 getClass().getResourceAsStream(GENERIC_STYLESHEET_PATH));
114 xt.
setParameter(
"WEBSERVICE_NAME", SERVICE_NAME +
" (" + segmAlgorithm +
")");
118 if (!corpusname.equals(
"")) {
123 if (result != null) {
126 Pattern regex = Pattern.compile(
"(<hzsk\\-pi:include( xmlns:hzsk\\-pi=\"https://corpora\\.uni\\-hamburg\\.de/hzsk/xmlns/processing\\-instruction\")?>([^<]+)</hzsk\\-pi:include>)", Pattern.DOTALL);
127 Matcher m = regex.matcher(result);
128 StringBuffer sb =
new StringBuffer();
131 m.appendReplacement(sb, m.group(0).replaceFirst(Pattern.quote(m.group(1)), insertion));
134 result = sb.toString();
149 private void createUtteranceList()
throws Exception {
153 switch (segmentationAlgorithm) {
155 HIATSegmentation hS =
new HIATSegmentation();
156 if (!path2ExternalFSM.equals(
"")) {
157 hS.pathToExternalFSM = path2ExternalFSM;
160 final Document listXML = FileIO.readDocumentFromString(lt.toXML());
161 list = IOUtilities.documentToString(listXML);
165 CHATSegmentation cS =
new CHATSegmentation();
166 if (!path2ExternalFSM.equals(
"")) {
167 cS.pathToExternalFSM = path2ExternalFSM;
170 final Document listXML = FileIO.readDocumentFromString(lt.toXML());
171 list = IOUtilities.documentToString(listXML);
175 GATSegmentation gS =
new GATSegmentation();
176 if (!path2ExternalFSM.equals(
"")) {
177 gS.pathToExternalFSM = path2ExternalFSM;
180 final Document listXML = FileIO.readDocumentFromString(lt.toXML());
181 list = IOUtilities.documentToString(listXML);
185 IPASegmentation ipaS =
new IPASegmentation();
186 if (!path2ExternalFSM.equals(
"")) {
187 ipaS.pathToExternalFSM = path2ExternalFSM;
190 ListTranscription lt = st.toListTranscription(
new SegmentedToListInfo(st, SegmentedToListInfo.TURN_SEGMENTATION));
191 final Document listXML = FileIO.readDocumentFromString(lt.toXML());
192 list = IOUtilities.documentToString(listXML);
196 GenericSegmentation genS =
new GenericSegmentation();
197 if (!path2ExternalFSM.equals(
"")) {
198 genS.pathToExternalFSM = path2ExternalFSM;
201 ListTranscription lt = st.toListTranscription(
new SegmentedToListInfo(st, SegmentedToListInfo.TURN_SEGMENTATION));
202 final Document listXML = FileIO.readDocumentFromString(lt.toXML());
203 list = IOUtilities.documentToString(listXML);
207 throw new Exception(
"createUtteranceList - unsupported parameter segmAlgorithm='" + segmentationAlgorithm +
"'");
232 return utteranceList;
235 public static void main(String[] args) {
247 String result = createFromBasicTranscription(cd.
toUnformattedString(), segmentationAlgorithm);
250 if (result == null) {
251 stats.
addCritical(SERVICE_NAME, cd,
"Visualization of file was not possible!");
253 cio.write(result, targeturl);
254 stats.
addCorrect(SERVICE_NAME, cd,
"Visualization of file was successfully saved at " + targeturl);
256 }
catch (IOException ex) {
258 }
catch (Exception ex) {
265 public Report function(
Corpus co)
throws TransformerException, TransformerConfigurationException, IOException, SAXException {
267 Collection<BasicTranscriptionData> btc = co.getBasicTranscriptionData();
269 stats.
merge(
function(bt));
277 Class cl = Class.forName(
"de.uni_hamburg.corpora.BasicTranscriptionData");
279 }
catch (ClassNotFoundException ex) {
288 if (args.length < 2) {
289 System.out.println(
"Usage: " +
ListHTML.class
291 +
"EXB SEGMENTATION [HTML]");
292 System.out.println(
"\nSEGMENTATION is one of: " 293 +
" HIAT, CHAT, IPA, Generic");
296 byte[] encoded = Files.readAllBytes(Paths.get(args[0]));
297 String btString =
new String(encoded,
"UTF-8");
299 if (args.length >= 3) {
300 PrintWriter htmlOut =
new PrintWriter(args[2]);
304 System.out.println(list.
getHTML());
307 }
catch (UnsupportedEncodingException uee) {
308 uee.printStackTrace();
309 }
catch (IOException ioe) {
310 ioe.printStackTrace();
320 return segmentationAlgorithm;
324 segmentationAlgorithm = s;
332 path2ExternalFSM = s;
337 String description =
"This class creates an html visualization " 338 +
"in the List format from an exb. ";
String getFilenameWithoutFileEnding()
String toUnformattedString()
void setExternalFSM(String s)
void setCorpusName(String s)
String basicTranscriptionString
void addCritical(String description)
String getUtteranceList()
static BasicTranscription String2BasicTranscription(String btAsString)
static String InputStream2String(InputStream is)
Collection< Class<?extends CorpusData > > getIsUsableFor()
Report doMain(String[] args)
void addCorrect(String statId, String description)
BasicTranscription basicTranscription
void setSegmentation(String s)
ListHTML(String btAsString, String segmAlgorithm)
void addException(Throwable e, String description)
void setUtteranceList(String u)
static void main(String[] args)