corpus-services  1.0
ComaFedoraIdentifierLengthChecker.java
Go to the documentation of this file.
1 
9 package de.uni_hamburg.corpora.validation;
10 
15 import java.io.IOException;
16 import java.util.Collection;
17 import javax.xml.parsers.DocumentBuilder;
18 import javax.xml.parsers.DocumentBuilderFactory;
19 import javax.xml.parsers.ParserConfigurationException;
20 import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
21 import org.w3c.dom.Document;
22 import org.w3c.dom.Element;
23 import org.w3c.dom.NodeList;
24 import org.xml.sax.SAXException;
26 import javax.xml.transform.TransformerException;
27 import javax.xml.xpath.XPathExpressionException;
28 
33 public class ComaFedoraIdentifierLengthChecker extends Checker implements CorpusFunction {
34 
35  String comaLoc = "";
36 
42  //no fix available
43  super(false);
44  }
45 
50  public Report function(CorpusData cd, Boolean fix)
51  throws SAXException, IOException, ParserConfigurationException, JexmaraldaException, TransformerException, XPathExpressionException {
52  Report stats = new Report();
53  DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
54  DocumentBuilder db = dbf.newDocumentBuilder();
55  Document doc = db.parse(TypeConverter.String2InputStream(cd.toSaveableString()));
56  NodeList keys = doc.getElementsByTagName("Key");
57  String corpusPrefix = "";
58  String corpusVersion = "";
59  for (int i = 0; i < keys.getLength(); i++) {
60  Element keyElement = (Element) keys.item(i);
61  if (keyElement.getAttribute("Name").equalsIgnoreCase("HZSK:corpusprefix")) {
62  corpusPrefix = keyElement.getTextContent();
63  } else if (keyElement.getAttribute("Name").equalsIgnoreCase("HZSK:corpusversion")) {
64  corpusVersion = keyElement.getTextContent();
65  }
66  }
67  if (corpusPrefix.equals("")) {
68  stats.addWarning(function, cd,
69  "Missing Key[@name='HZSK:corpusprefix']. "
70  + "PID length cannot be estimated accurately. "
71  + "Add that key in coma.");
72  corpusPrefix = "muster";
73  } else {
74  stats.addCorrect(function, cd,
75  "HZSK corpus prefix OK: " + corpusPrefix);
76  }
77  if (corpusVersion.equals("")) {
78  stats.addWarning(function, cd,
79  "Missing Key[@name='HZSK:corpusversion']. "
80  + "PID length cannot be estimated accurately. "
81  + "Add that key in coma.");
82  corpusVersion = "0.0";
83  } else {
84  stats.addCorrect(function, cd,
85  "HZSK corpus version OK: " + corpusVersion);
86  }
87 
88  //iterate <Communication>
89  NodeList communications = doc.getElementsByTagName("Communication");
90  for (int i = 0; i < communications.getLength(); i++) {
91  Element communication = (Element) communications.item(i);
92  String communicationName = communication.getAttribute("Name");
93  String fedoraPID = new String("communication:" + corpusPrefix
94  + "-" + corpusVersion
95  + "_" + communicationName);
96 
97  //just strip some characters at the end to make a suggestion
98  String shortenedCommuniationName;
99  if (communicationName.length() > 39) {
100  shortenedCommuniationName = communicationName.substring(0, 40);
101  } else {
102  shortenedCommuniationName = communicationName;
103  }
104 
105  //test length of Fedora PID and report
106  if (fedoraPID.length() >= 64) {
107  stats.addCritical(function, cd,
108  "Fedora PID would be too long (max. 64) for communication name (" + fedoraPID.length() + " chars): " + fedoraPID);
109  // + " You could shorten it to: " + shortenedCommuniationName + ", or change the corpus prefix");
110  } else {
111  stats.addCorrect(function, cd,
112  "Fedora PID can be generated for communication: " + fedoraPID);
113  }
114  }
115 
116  return stats;
117  }
118 
124  @Override
125  public Collection<Class<? extends CorpusData>> getIsUsableFor() {
126  try {
127  Class cl = Class.forName("de.uni_hamburg.corpora.ComaData");
128  IsUsableFor.add(cl);
129  } catch (ClassNotFoundException ex) {
130  report.addException(ex, " usable class not found");
131  }
132  return IsUsableFor;
133  }
134 
139  @Override
140  public String getDescription() {
141  String description = "This class loads coma data and check for potential "
142  + "problems with HZSK repository depositing; it checks the Exmaralda "
143  + ".coma file for ID's that violate Fedora's PID limits. ";
144  return description;
145  }
146 
147  @Override
148  public Report function(Corpus c, Boolean fix) throws SAXException, JexmaraldaException, IOException, ParserConfigurationException, TransformerException, XPathExpressionException {
149  Report stats = new Report();
150  cd = c.getComaData();
151  stats = function(cd, fix);
152  return stats;
153  }
154 
155 }
void addCritical(String description)
Definition: Report.java:104
void addWarning(String statId, String description)
Definition: Report.java:164
void addCorrect(String statId, String description)
Definition: Report.java:217
static InputStream String2InputStream(String s)
void addException(Throwable e, String description)
Definition: Report.java:287