1 package de.uni_hamburg.corpora.validation;
9 import java.io.IOException;
10 import java.net.URISyntaxException;
12 import java.nio.file.Paths;
13 import java.util.Collection;
14 import java.util.HashMap;
15 import java.util.logging.Level;
16 import java.util.logging.Logger;
17 import javax.xml.parsers.DocumentBuilder;
18 import javax.xml.parsers.DocumentBuilderFactory;
19 import javax.xml.parsers.ParserConfigurationException;
20 import javax.xml.transform.TransformerConfigurationException;
21 import javax.xml.transform.TransformerException;
22 import javax.xml.xpath.XPathExpressionException;
23 import org.jdom.JDOMException;
24 import org.w3c.dom.Document;
25 import org.w3c.dom.Element;
26 import org.w3c.dom.NodeList;
27 import org.xml.sax.SAXException;
38 HashMap<String, String> birthPlace;
39 HashMap<String, String> domicile;
40 HashMap<String, String> commLocation;
41 HashMap<String, String> lngLat;
43 final String KEYBIRTHPLACE =
"1a Place of birth";
44 final String KEYBIRTHPLACELL =
"1c Place of birth (LngLat)";
45 final String KEYREGION =
"2 Region";
46 final String KEYCOUNTRY =
"3 Country";
47 final String KEYDOMICILE =
"7a Domicile";
48 final String KEYDOMICILELL =
"7c Domicile (LngLat)";
49 final String KEYOTHER =
"8a Other information";
50 final String KEYCOUNTRYBARE =
"Country";
51 final String KEYREGIONBARE =
"Region";
52 final String KEYSETTLEMENT =
"Settlement";
53 final String KEYSETTLEMENTLL =
"Settlement (LngLat)";
65 throws SAXException, IOException, ParserConfigurationException, URISyntaxException, TransformerConfigurationException, TransformerException, XPathExpressionException {
69 if (kmlFile != null) {
71 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
72 DocumentBuilder db = dbf.newDocumentBuilder();
74 NodeList communications = doc.getElementsByTagName(
"Communication");
75 NodeList speakers = doc.getElementsByTagName(
"Speaker");
77 if (birthPlace == null) {
78 birthPlace =
new HashMap<>();
80 if (domicile == null) {
81 domicile =
new HashMap<>();
83 if (commLocation == null) {
84 commLocation =
new HashMap<>();
86 for (
int i = 0; i < speakers.getLength(); i++) {
87 Element speaker = (Element) speakers.item(i);
88 Element sigle = (Element) speaker.getElementsByTagName(
"Sigle").item(0);
89 String sigleString = sigle.getTextContent();
90 NodeList locations = speaker.getElementsByTagName(
"Location");
91 String languageCode = speaker.getElementsByTagName(
"LanguageCode").item(0).getTextContent();
92 for (
int j = 0; j < locations.getLength(); j++) {
93 Element location = (Element) locations.item(j);
94 if (location.getAttribute(
"Type").equals(
"Basic biogr. data")) {
95 NodeList keys = location.getElementsByTagName(
"Key");
96 String placeOfBirth =
"";
98 String country = null;
99 String domicileStr =
"";
100 boolean coorFlag =
false;
101 boolean domCoor =
false;
103 Element domRef = null;
104 for (
int k = 0; k < keys.getLength(); k++) {
105 Element key = (Element) keys.item(k);
106 switch (key.getAttribute(
"Name")) {
108 placeOfBirth = key.getTextContent();
110 case KEYBIRTHPLACELL:
114 region = key.getTextContent();
118 country = key.getTextContent();
121 domicileStr = key.getTextContent();
131 if (!placeOfBirth.equals(
"...") && !placeOfBirth.equals(
"")) {
132 if (placeOfBirth.endsWith(
"(?)")) {
133 placeOfBirth = placeOfBirth.substring(0, placeOfBirth.indexOf(
" (?)"));
135 if (placeOfBirth.endsWith(
"`")) {
136 placeOfBirth = placeOfBirth.substring(0, placeOfBirth.indexOf(
"`"));
140 String coordinates =
"";
141 if(lngLat.containsKey(domicileStr +
"-" + languageCode)){
142 coordinates = lngLat.get(domicileStr +
"-" + languageCode);
143 }
else if(lngLat.containsKey(domicileStr +
"-")){
144 coordinates = lngLat.get(domicileStr +
"-");
147 if (coorFlag ==
false && (!coordinates.equals(
""))) {
148 Element coordinatesKey = doc.createElement(
"Key");
149 coordinatesKey.setAttribute(
"Name", KEYBIRTHPLACELL);
150 coordinatesKey.setTextContent(coordinates);
151 Element loc = (Element) location.getElementsByTagName(
"Description").item(0);
152 loc.insertBefore(coordinatesKey, ref);
153 String message =
"Added Key " + KEYBIRTHPLACELL +
": " + coordinatesKey +
") from KML (" + kmlFile +
") " + domicileStr +
"' " 154 +
"for speaker '" + sigleString +
"'";
155 stats.
addFix(
function, cd, message);
156 }
else if (!lngLat.containsKey(placeOfBirth +
"-" + languageCode)) {
157 String message =
"KML (" + kmlFile +
") does not contain the birthplace '" + placeOfBirth +
"' " 158 +
"from speaker '" + sigleString +
"'";
159 System.out.println(message);
164 if (!domicileStr.equals(
"...") && !domicileStr.equals(
"")) {
165 if (domicileStr.endsWith(
"(?)")) {
166 domicileStr = domicileStr.substring(0, domicileStr.indexOf(
" (?)"));
168 if (domicileStr.endsWith(
"`")) {
169 domicileStr = domicileStr.substring(0, domicileStr.indexOf(
"`"));
173 String coordinates =
"";
174 if(lngLat.containsKey(domicileStr +
"-" + languageCode)){
175 coordinates = lngLat.get(domicileStr +
"-" + languageCode);
176 }
else if(lngLat.containsKey(domicileStr +
"-")){
177 coordinates = lngLat.get(domicileStr +
"-");
180 if (domCoor ==
false && (!coordinates.equals(
""))) {
181 Element coordinatesKey = doc.createElement(
"Key");
182 coordinatesKey.setAttribute(
"Name", KEYDOMICILELL);
183 coordinatesKey.setTextContent(coordinates);
184 Element loc = (Element) location.getElementsByTagName(
"Description").item(0);
185 loc.insertBefore(coordinatesKey, domRef);
186 String message =
"Added Key " + KEYDOMICILELL +
": " + coordinatesKey +
") from KML (" + kmlFile +
") " + domicileStr +
"' " 187 +
"for speaker '" + sigleString +
"'";
188 stats.
addFix(
function, cd, message);
189 }
else if (!lngLat.containsKey(domicileStr +
"-" + languageCode)) {
190 String message =
"KML (" + kmlFile +
") does not contain the domicile '" + domicileStr +
"' " 191 +
"from speaker '" + sigleString +
"'";
192 System.out.println(message);
196 birthPlace.put(sigleString,
new String(placeOfBirth +
", " + region +
", " + country));
197 domicile.put(sigleString, domicileStr);
202 for (
int i = 0; i < communications.getLength(); i++) {
203 Element communication = (Element) communications.item(i);
204 Element location = (Element) communication.getElementsByTagName(
"Location").item(0);
205 String communicationID = communication.getAttribute(
"Id");
206 String communicationName = communication.getAttribute(
"Name");
207 NodeList keys = location.getElementsByTagName(
"Key");
210 String settlement =
"";
211 String languageCode = communication.getElementsByTagName(
"LanguageCode").item(0).getTextContent();
212 boolean coorFlag =
false;
213 for (
int j = 0; j < keys.getLength(); j++) {
214 Element key = (Element) keys.item(j);
215 switch (key.getAttribute(
"Name")) {
217 country = key.getTextContent();
220 region = key.getTextContent();
223 settlement = key.getTextContent();
225 case KEYSETTLEMENTLL:
230 if (!settlement.equals(
"...") && !settlement.equals(
"")) {
231 if (settlement.endsWith(
"(?)")) {
232 settlement = settlement.substring(0, settlement.indexOf(
" (?)"));
234 if (settlement.endsWith(
"`")) {
235 settlement = settlement.substring(0, settlement.indexOf(
"`"));
239 String coordinates =
"";
240 if(lngLat.containsKey(settlement +
"-" + languageCode)){
241 coordinates = lngLat.get(settlement +
"-" + languageCode);
242 }
else if(lngLat.containsKey(settlement +
"-")){
243 coordinates = lngLat.get(settlement +
"-");
246 if (coorFlag ==
false && (!coordinates.equals(
""))) {
247 Element coordinatesKey = doc.createElement(
"Key");
248 coordinatesKey.setAttribute(
"Name", KEYSETTLEMENTLL);
249 coordinatesKey.setTextContent(coordinates);
250 Element loc = (Element) location.getElementsByTagName(
"Description").item(0);
251 loc.appendChild(coordinatesKey);
252 String message =
"Added Key " + KEYSETTLEMENTLL +
": " + coordinatesKey +
") from KML (" + kmlFile +
") " 253 +
"for communication '" + communicationName +
"'";
254 stats.
addFix(
function, cd, message);
255 }
else if (!lngLat.containsKey(settlement +
"-" + languageCode)) {
256 String message =
"KML (" + kmlFile +
") does not contain the settlement '" + settlement +
"' " 257 +
"from communication '" + communicationName +
"'";
258 System.out.println(message);
262 commLocation.put(communicationID,
new String(settlement +
", " + region +
", " + country));
267 cio.
write(cd, cd.getURL());
270 stats.
addCritical(
function,
"No KML file path supplied");
272 }
catch (JDOMException ex) {
274 }
catch (ParserConfigurationException ex) {
275 stats.
addException(ex,
function, cd,
"The KML file could not be parsed.");
276 }
catch (TransformerException ex) {
277 stats.
addException(ex,
function, cd,
"Unknown Transformer error.");
278 }
catch (XPathExpressionException ex) {
279 stats.
addException(ex,
function, cd,
"Unknown XPath error.");
280 }
catch (IOException ex) {
281 stats.
addException(ex,
function, cd,
"The KML file could not be parsed.");
282 }
catch (URISyntaxException ex) {
283 stats.
addException(ex,
function, cd,
"URI syntax Exception.");
294 public Report getCoordinates() throws ParserConfigurationException, SAXException, IOException, JDOMException, URISyntaxException {
298 if (kmlFile != null) {
299 URL url = Paths.get(kmlFile).toUri().toURL();
301 if (kmlString != null) {
303 if (lngLat == null) {
304 lngLat =
new HashMap<>();
307 NodeList placeMarks = doc.getElementsByTagName(
"Placemark");
308 for (
int i = 0; i < placeMarks.getLength(); i++) {
309 Element placeMark = (Element) placeMarks.item(i);
310 Element name = (Element) placeMark.getElementsByTagName(
"name").item(0);
311 String nameOfPlace = name.getTextContent();
312 String language =
"";
313 NodeList data = placeMark.getElementsByTagName(
"Data");
314 for (
int j = 0; j < data.getLength(); j++) {
315 Element datum = (Element) data.item(j);
316 if (datum.getAttribute(
"name").equals(
"lang")) {
317 Element value = (Element) datum.getElementsByTagName(
"value").item(0);
318 language = value.getTextContent();
321 String coordinatesWithAltitude = placeMark.getElementsByTagName(
"coordinates").item(0).getTextContent();
322 String coordinate = coordinatesWithAltitude.trim().substring(0, coordinatesWithAltitude.trim().lastIndexOf(
","));
323 lngLat.put(nameOfPlace +
"-" + language, coordinate);
326 stats.
addCritical(
function,
"KML file cannot be read");
329 stats.
addCritical(
function,
"KML file cannot be read");
332 stats.
addCritical(
function,
"No KML file path supplied");
345 Class cl = Class.forName(
"de.uni_hamburg.corpora.ComaData");
347 }
catch (ClassNotFoundException ex) {
359 String description =
"This class identifies and lists fields which contain" 360 +
" location information; creates a list of different location names;" 361 +
" gets geo-coordinates for the location names via Google API.";
366 public Report function(
Corpus c, Boolean fix)
throws SAXException, IOException, ParserConfigurationException, URISyntaxException, TransformerException, TransformerConfigurationException, XPathExpressionException {
368 cd = c.getComaData();
369 stats =
function(cd, fix);
static org.w3c.dom.Document String2W3cDocument(String stringRespresentingDocument)
String readExternalResourceAsString(String path2resource)
void addCritical(String description)
Collection< Class<?extends CorpusData > > getIsUsableFor()
void addWarning(String statId, String description)
static String W3cDocument2String(org.w3c.dom.Document doc)
static InputStream String2InputStream(String s)
void addException(Throwable e, String description)
void write(CorpusData cd, URL url)
void addFix(String statId, CorpusData cd, String description)
void setKMLFilePath(String path)