9 package de.uni_hamburg.corpora.validation;
15 import java.io.IOException;
16 import java.util.Collection;
17 import javax.xml.parsers.DocumentBuilder;
18 import javax.xml.parsers.DocumentBuilderFactory;
19 import javax.xml.parsers.ParserConfigurationException;
20 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
21 import org.w3c.dom.Document;
22 import org.w3c.dom.Element;
23 import org.w3c.dom.NodeList;
24 import org.xml.sax.SAXException;
26 import javax.xml.transform.TransformerException;
27 import javax.xml.xpath.XPathExpressionException;
51 throws SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException {
53 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
54 DocumentBuilder db = dbf.newDocumentBuilder();
56 NodeList keys = doc.getElementsByTagName(
"Key");
57 String corpusPrefix =
"";
58 String corpusVersion =
"";
59 for (
int i = 0; i < keys.getLength(); i++) {
60 Element keyElement = (Element) keys.item(i);
61 if (keyElement.getAttribute(
"Name").equalsIgnoreCase(
"HZSK:corpusprefix")) {
62 corpusPrefix = keyElement.getTextContent();
63 }
else if (keyElement.getAttribute(
"Name").equalsIgnoreCase(
"HZSK:corpusversion")) {
64 corpusVersion = keyElement.getTextContent();
67 if (corpusPrefix.equals(
"")) {
69 "Missing Key[@name='HZSK:corpusprefix']. " 70 +
"PID length cannot be estimated accurately. " 71 +
"Add that key in coma.");
72 corpusPrefix =
"muster";
75 "HZSK corpus prefix OK: " + corpusPrefix);
77 if (corpusVersion.equals(
"")) {
79 "Missing Key[@name='HZSK:corpusversion']. " 80 +
"PID length cannot be estimated accurately. " 81 +
"Add that key in coma.");
82 corpusVersion =
"0.0";
85 "HZSK corpus version OK: " + corpusVersion);
89 NodeList communications = doc.getElementsByTagName(
"Communication");
90 for (
int i = 0; i < communications.getLength(); i++) {
91 Element communication = (Element) communications.item(i);
92 String communicationName = communication.getAttribute(
"Name");
93 String fedoraPID =
new String(
"communication:" + corpusPrefix
95 +
"_" + communicationName);
98 String shortenedCommuniationName;
99 if (communicationName.length() > 39) {
100 shortenedCommuniationName = communicationName.substring(0, 40);
102 shortenedCommuniationName = communicationName;
106 if (fedoraPID.length() >= 64) {
108 "Fedora PID would be too long (max. 64) for communication name (" + fedoraPID.length() +
" chars): " + fedoraPID);
112 "Fedora PID can be generated for communication: " + fedoraPID);
127 Class cl = Class.forName(
"de.uni_hamburg.corpora.ComaData");
129 }
catch (ClassNotFoundException ex) {
141 String description =
"This class loads coma data and check for potential " 142 +
"problems with HZSK repository depositing; it checks the Exmaralda " 143 +
".coma file for ID's that violate Fedora's PID limits. ";
148 public Report function(
Corpus c, Boolean fix)
throws SAXException, JexmaraldaException, IOException, ParserConfigurationException, TransformerException, XPathExpressionException {
150 cd = c.getComaData();
151 stats =
function(cd, fix);
void addCritical(String description)
void addWarning(String statId, String description)
ComaFedoraIdentifierLengthChecker()
void addCorrect(String statId, String description)
Collection< Class<?extends CorpusData > > getIsUsableFor()
static InputStream String2InputStream(String s)
void addException(Throwable e, String description)