6 package de.uni_hamburg.corpora.visualization;
12 import java.io.IOException;
13 import java.net.URISyntaxException;
14 import java.security.NoSuchAlgorithmException;
15 import java.util.Collection;
16 import javax.xml.parsers.ParserConfigurationException;
17 import javax.xml.transform.TransformerException;
18 import javax.xml.xpath.XPathExpressionException;
19 import org.exmaralda.partitureditor.fsm.FSMException;
20 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
21 import org.jdom.JDOMException;
22 import org.xml.sax.SAXException;
23 import com.google.gson.Gson;
24 import com.google.gson.GsonBuilder;
25 import com.google.gson.JsonElement;
26 import com.google.gson.JsonObject;
27 import com.google.gson.JsonParser;
28 import com.opencsv.CSVReader;
31 import java.io.FileNotFoundException;
32 import java.io.FileReader;
33 import java.io.InputStreamReader;
35 import java.util.ArrayList;
36 import java.util.Collections;
37 import java.util.List;
38 import org.jdom.Attribute;
39 import org.jdom.Element;
40 import org.jdom.xpath.XPath;
48 private static final String CONFIG_PATH =
"/vikus-viewer/config.json";
49 private static final String DATA_PATH =
"/vikus-viewer/data.csv";
50 private static final String INFO_PATH =
"/vikus-viewer/info.md";
51 private static final String TIMELINE_PATH =
"/vikus-viewer/timeline.csv";
52 private static final String AUDIO_IMAGE_PATH =
"/vikus-viewer/sound.jpg";
53 ArrayList<String> keywordblacklist =
new ArrayList<>();
60 ArrayList<String> allDistinctYears =
new ArrayList<>();
63 public Report function(
CorpusData cd)
throws NoSuchAlgorithmException, ClassNotFoundException, FSMException, URISyntaxException, SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException, JDOMException {
67 vikusviewerurl =
new URL(cd.
getParentURL() +
"resources/vikus-viewer");
68 File vikusviewerfolder =
new File((vikusviewerurl).getFile());
69 if (!vikusviewerfolder.exists()) {
71 vikusviewerfolder.mkdirs();
75 Element descriptioncoma = (Element) XPath.selectSingleNode(comadescription,
"descendant::Key[@Name='DC:description']");
76 description = descriptioncoma.getText();
77 Element elcorpusPrefix = (Element) XPath.selectSingleNode(comadescription,
"descendant::Key[@Name='hzsk:corpusPrefix']");
78 corpusPrefix = elcorpusPrefix.getText();
79 Element eltitle = (Element) XPath.selectSingleNode(comadescription,
"descendant::Key[@Name='DC:title']");
80 title = eltitle.getText();
81 Element elversion = (Element) XPath.selectSingleNode(comadescription,
"descendant::Key[@Name='hzsk:corpusVersion']");
82 version = elversion.getText();
83 Element ellicence = (Element) XPath.selectSingleNode(comadescription,
"descendant::Key[@Name='DC:rights']");
84 licence = ellicence.getText();
93 keywordblacklist.add(
"and");
94 keywordblacklist.add(
"a");
95 keywordblacklist.add(
"the");
96 keywordblacklist.add(
"i");
97 keywordblacklist.add(
"in");
98 keywordblacklist.add(
"are");
99 keywordblacklist.add(
"is");
100 keywordblacklist.add(
"how");
101 keywordblacklist.add(
"an");
102 keywordblacklist.add(
"on");
103 keywordblacklist.add(
"of");
104 keywordblacklist.add(
"my");
105 keywordblacklist.add(
"with");
106 keywordblacklist.add(
"at");
107 keywordblacklist.add(
"...");
116 reader =
new CSVReader(
new InputStreamReader(getClass().getResourceAsStream(DATA_PATH)),
',');
117 List<String[]> data = reader.readAll();
120 String transrepourl =
"https://corpora.uni-hamburg.de/repository/transcript:" + corpusPrefix +
"-" + version +
"_";
121 String filerepourl =
"https://corpora.uni-hamburg.de/repository/file:" + corpusPrefix +
"-" + version +
"_";
122 String recrepourl =
"https://corpora.uni-hamburg.de/repository/recording:" + corpusPrefix +
"-" + version +
"_";
124 String[] comrow =
new String[16];
126 Attribute
id = (Attribute) XPath.selectSingleNode(communication,
"@Name");
127 comrow[0] =
id.getValue();
129 Element year = (Element) XPath.selectSingleNode(communication,
"descendant::Description/Key[contains(@Name,'Date of recording')]");
130 System.out.println(year.getText());
131 if (!allDistinctYears.contains(year.getText())) {
132 allDistinctYears.add(year.getText());
134 Element descriptiondesc = (Element) XPath.selectSingleNode(communication,
"descendant::Description/Key[contains(@Name,'Title')]");
136 Element genre = (Element) XPath.selectSingleNode(communication,
"descendant::Description/Key[contains(@Name,'Genre')]");
137 System.out.println(genre.getText());
138 Element settlement = (Element) XPath.selectSingleNode(communication,
"descendant::Location/Description/Key[contains(@Name,'Settlement')]");
139 if(settlement==null){
140 settlement =
new Element(
"Settlement");
142 System.out.println(settlement.getText());
143 Element speaker = (Element) XPath.selectSingleNode(communication,
"descendant::Description/Key[contains(@Name,'Speakers')]");
144 System.out.println(speaker.getText());
145 String keywords =
"\"";
146 if (descriptiondesc != null) {
147 System.out.println(descriptiondesc.getText());
149 for (String s : descriptiondesc.getText().split(
" ")) {
150 if (!keywordblacklist.contains(s.toLowerCase())) {
155 keywords += year.getText() +
"," + genre.getText() +
"," + settlement.getText() +
"," + speaker.getText() +
"\"";
156 comrow[1] = keywords;
160 Element dialect = (Element) XPath.selectSingleNode(communication,
"descendant::Description/Key[contains(@Name,'Dialect')]");
161 System.out.println(dialect.getText());
164 Element country = (Element) XPath.selectSingleNode(communication,
"descendant::Location/Description/Key[contains(@Name,'Country')]");
166 country =
new Element(
"Country");
168 System.out.println(country.getText());
171 Element region = (Element) XPath.selectSingleNode(communication,
"descendant::Location/Description/Key[contains(@Name,'Region')]");
173 region =
new Element(
"Region");
175 System.out.println(region.getText());
180 Element language = (Element) XPath.selectSingleNode(communication,
"descendant::Language/LanguageCode");
181 System.out.println(language.getText());
184 comrow[8] =
"\"" + speaker.getText() +
"\"";
187 String transcrurl = transrepourl +
id.getValue() +
"/EXB/" +
id.getValue() +
".exb";
191 comrow[9] = transcrurl;
195 String scoreurl = transrepourl +
id.getValue() +
"/SCORE/" +
id.getValue() +
"-score.html";
196 comrow[10] = scoreurl;
200 String listurl = transrepourl +
id.getValue() +
"/LIST/" +
id.getValue() +
"-list.html";
201 comrow[11] = listurl;
203 Element pdf = (Element) XPath.selectSingleNode(communication,
"descendant::File[mimetype='application/pdf']/relPath']");
205 Element audio = (Element) XPath.selectSingleNode(communication,
"descendant::Recording/Media/NSLink");
207 String pdfrurl = filerepourl +
id.getValue() +
"/PDF/" +
id.getValue() +
".pdf";
208 String audiourl = recrepourl +
id.getValue() +
"/MP3/" +
id.getValue() +
".mp3";
209 Element transcription = (Element) XPath.selectSingleNode(communication,
"descendant::Transcription/NSLink");
210 URL imageLocation = null;
211 if (transcription != null) {
212 imageLocation =
new URL(cd.
getParentURL() + transcription.getText().replaceFirst(
"[.][^.]+$",
"") +
".jpg");
214 stats.
addCritical(
function, cd,
id.getValue() +
": No transcription linked in communication in the coma file!");
216 if (pdf == null && audio == null) {
217 comrow[12] =
"np pdf";
218 comrow[13] =
"no audio";
219 stats.
addCritical(
function, cd,
id.getValue() +
": No audio or pdf linked in communication in the coma file!");
220 }
else if (pdf != null && audio != null) {
222 comrow[12] = pdfrurl;
223 comrow[13] = audiourl;
224 stats.
addCritical(
function, cd,
id.getValue() +
": Audio AND pdf linked in communication in the coma file!");
225 }
else if (pdf != null) {
226 comrow[12] = pdfrurl;
227 comrow[13] =
"no audio";
232 comrow[12] =
"no pdf";
233 comrow[13] = audiourl;
235 if (imageLocation != null) {
240 System.out.println(genre.getText());
241 comrow[14] = genre.getText();
243 if (descriptiondesc != null) {
244 String descdesc =
"\"" + descriptiondesc.getText() +
"\"";
245 comrow[15] = descdesc;
250 for (String[] row : data) {
251 newdata += String.join(
",", row) +
"\n";
256 URL configJSONlocation =
new URL(vikusviewerurl +
"/data.csv");
257 cio.
write(newdata, configJSONlocation);
258 stats.
addCorrect(
function, cd,
"vikus-viewer config successfully created at " + configJSONlocation.toString());
266 JsonElement jelement =
new JsonParser().parse(config);
267 JsonObject jobject = jelement.getAsJsonObject();
268 jobject = jobject.getAsJsonObject(
"project");
269 jobject.addProperty(
"name", title +
" " + version);
270 Gson gson =
new GsonBuilder().setPrettyPrinting().create();
271 String prettyJsonString = gson.toJson(jelement);
274 URL configJSONlocation =
new URL(vikusviewerurl +
"/config.json");
275 cio.
write(prettyJsonString, configJSONlocation);
276 stats.
addCorrect(
function, cd,
"vikus-viewer config successfully created at " + configJSONlocation.toString());
284 String corpusnameandversion = title +
" " + version;
285 info = info.replaceAll(
"_CORPUSNAME_", corpusnameandversion);
287 info = info.replaceAll(
"_DESCRIPTION_", description);
289 info = info.replaceAll(
"_LICENCE_", licence);
291 URL infoMDlocation =
new URL(vikusviewerurl +
"/info.md");
292 cio.
write(info, infoMDlocation);
293 stats.
addCorrect(
function, cd,
"vikus-viewer info.md successfully created at " + infoMDlocation.toString());
303 reader =
new CSVReader(
new InputStreamReader(getClass().getResourceAsStream(TIMELINE_PATH)),
',');
304 Collections.sort(allDistinctYears);
305 List<String[]> time = reader.readAll();
306 for (String year : allDistinctYears) {
307 String[] timerow =
new String[6];
317 for (String[] row : time) {
318 newtime += String.join(
",", row) +
"\n";
321 URL timelineCSVlocation =
new URL(vikusviewerurl +
"/timeline.csv");
322 cio.
write(newtime, timelineCSVlocation);
323 stats.
addCorrect(
function, cd,
"vikus-viewer config successfully created at " + timelineCSVlocation.toString());
328 public Report function(
Corpus c)
throws NoSuchAlgorithmException, ClassNotFoundException, FSMException, URISyntaxException, SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException, JDOMException {
330 cd = c.getComaData();
331 stats =
function(cd);
338 Class cl = Class.forName(
"de.uni_hamburg.corpora.ComaData");
340 }
catch (ClassNotFoundException ex) {
347 s = s.replace(
',',
' ');
348 s = s.replace(
'"',
' ');
349 s = s.replace(
'\'',
' ');
355 String description =
"This class creates an config files needed " 356 +
"for the vikus-viewer software. ";
String cleanForCSV(String s)
Report createConfigJSON(CorpusData cd)
Report createDataCSV(CorpusData cd)
void addCritical(String description)
String readInternalResourceAsString(String path2resource)
List< Element > getCommunications()
Collection< Class<?extends CorpusData > > getIsUsableFor()
void addCorrect(String statId, String description)
Element getCorpusDescription()
Report createTimelineCSV(CorpusData cd)
Report createInfoMD(CorpusData cd)
void addException(Throwable e, String description)
void copyInternalBinaryFile(String internalPath, URL url)
void write(CorpusData cd, URL url)