6 package de.uni_hamburg.corpora.publication;
8 import com.sun.org.apache.xerces.internal.impl.dv.util.Base64;
15 import java.io.BufferedReader;
16 import java.io.FileNotFoundException;
17 import java.io.IOException;
18 import java.io.InputStream;
19 import java.io.InputStreamReader;
20 import java.io.OutputStreamWriter;
21 import java.net.HttpURLConnection;
23 import java.text.DateFormat;
24 import java.text.SimpleDateFormat;
25 import java.util.Calendar;
26 import java.util.Collection;
27 import java.util.Date;
28 import javax.xml.parsers.ParserConfigurationException;
29 import javax.xml.transform.TransformerException;
30 import org.w3c.dom.Document;
31 import org.w3c.dom.Element;
32 import org.w3c.dom.Node;
33 import org.w3c.dom.NodeList;
34 import org.xml.sax.SAXException;
37 import java.io.UnsupportedEncodingException;
38 import javax.xml.xpath.XPathExpressionException;
50 String EpicApiUser =
"";
51 String EpicApiPass =
"";
52 String HandlePrefix =
"11022";
53 String HandleEndpoint =
"http://pid.gwdg.de/handles/";
54 String HandleUrlBase =
"http://hdl.handle.net/";
57 String[] ElementNames = {
"MdSelfLink",
"ResourceRef",
"IsPartOf",
"PID"};
67 throws SAXException, IOException, ParserConfigurationException {
72 Document doc = JdomDocument2W3cDocument(cmdi.
getJdom());
76 doc.getDocumentElement().normalize();
78 Element root = doc.getDocumentElement();
80 for (
int x = 0; x < ElementNames.length; x++) {
81 NodeList nodes = root.getElementsByTagName(ElementNames[x]);
82 for (
int i = 0; i < nodes.getLength(); i++) {
83 Node node = nodes.item(i).getFirstChild();
87 String oldURL = node.getTextContent();
90 if(oldURL.matches(
"^\\s*(https?://)?hdl\\.handle\\.net/.*$")){
91 stats.
addWarning(
function, cd,
"URL is already a Handle PID: " + oldURL);
96 String newURL = oldURL;
97 String partIdentifier =
"";
98 if(oldURL.matches(
"^.+/[A-Z0-9]{2,6}$") && !oldURL.endsWith(
"/CMDI")){
99 int endIndex = oldURL.lastIndexOf(
"/");
101 partIdentifier =
"@"+oldURL.substring(endIndex + 1);
102 oldURL = oldURL.substring(0, endIndex);
107 String existingHandle =
getPID(oldURL);
110 if(existingHandle != null && !existingHandle.equals(
"")){
111 newURL = HandleUrlBase + HandlePrefix +
"/" + existingHandle + partIdentifier;
112 newURL = newURL.replaceAll(
"[\\s\\n]+",
"");
113 stats.
addNote(
function, cd,
"Retrieved existing Handle PID for " + oldURL +
":\n" + newURL);
114 System.out.println(
"Retrieved existing Handle PID for " + oldURL +
":\n" + newURL);
119 newURL = HandleUrlBase + HandlePrefix +
"/" + newHandle + partIdentifier;
120 newURL = newURL.replaceAll(
"[\\s\\n]+",
"");
121 stats.
addNote(
function, cd,
"Registered new Handle PID for " + oldURL +
":\n" + newURL);
122 System.out.println(
"Registered new Handle PID for " + oldURL +
":\n" + newURL);
125 node.setNodeValue(newURL);
135 DateFormat df =
new SimpleDateFormat(
"yyyy-MM-dd HH:mm:ss");
136 Date today = Calendar.getInstance().getTime();
137 String newCmdiXmlInString = newCmdiXmlInStringOneLine.replaceAll(
"\\?>\\s*<!\\-\\-",
"?>\n<!--Handle PIDs generated by HandlePIDs.java on "+df.format(today)+
"-->\n<!--");
144 stats.
addFix(
function, cd,
"Handle PIDs were retrieved and file was updated.");
145 }
catch (UnsupportedEncodingException ex) {
146 stats.
addCritical(
function, cd,
"UnsupportedEncodingException: " + ex);
147 }
catch (XPathExpressionException ex) {
148 stats.
addCritical(
function, cd,
"XPathExpressionException: " + ex);
149 }
catch (TransformerException ex) {
150 stats.
addCritical(
function, cd,
"TransformerException: " + ex);
162 String authString = EpicApiUser +
":" + EpicApiPass;
163 String authStringEnc = Base64.encode(authString.getBytes(
"UTF-8"));
165 URL url =
new URL(HandleEndpoint + HandlePrefix +
"?URL=" + handleURL);
166 HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();
167 urlConnection.setRequestProperty(
"Authorization",
"Basic " + authStringEnc);
168 urlConnection.setRequestProperty(
"Accept",
"text/plain");
170 int rc = urlConnection.getResponseCode();
171 InputStream is = urlConnection.getInputStream();
172 InputStreamReader isr =
new InputStreamReader(is);
175 char[] charArray =
new char[1024];
176 StringBuffer sb =
new StringBuffer();
177 while ((numCharsRead = isr.read(charArray)) > 0) {
178 sb.append(charArray, 0, numCharsRead);
181 return sb.toString();
182 }
catch(FileNotFoundException fnfe) {
183 System.out.println(
"FileNotFound? " + fnfe.toString());
189 throws ParserConfigurationException, SAXException, IOException{
195 String authString = EpicApiUser +
":" + EpicApiPass;
196 String authStringEnc = Base64.encode(authString.getBytes(
"UTF-8"));
198 URL
object=
new URL(HandleEndpoint + HandlePrefix +
"/");
199 HttpURLConnection con = (HttpURLConnection)
object.openConnection();
200 con.setRequestProperty(
"Authorization",
"Basic " + authStringEnc);
201 con.setDoOutput(
true);
202 con.setDoInput(
true);
203 con.setRequestProperty(
"Content-Type",
"application/json");
204 con.setRequestProperty(
"Accept",
"application/xml");
205 con.setRequestMethod(
"POST");
207 OutputStreamWriter wr=
new OutputStreamWriter(con.getOutputStream());
208 wr.write(
"[{\"type\":\"URL\",\"parsed_data\":\""+handleURL+
"\"}]");
212 StringBuilder sb =
new StringBuilder();
213 BufferedReader br =
new BufferedReader(
new InputStreamReader(con.getInputStream(),
"utf-8"));
215 while ((line = br.readLine()) != null) {
216 sb.append(line +
"\n");
220 String responseInString =
""+sb.toString();
221 System.out.println(responseInString);
223 Element root = responseAsXml.getDocumentElement();
224 NodeList nList = root.getElementsByTagName(
"dd");
225 String handlePID = nList.item(0).getFirstChild().getTextContent();
226 handlePID = handlePID.replaceAll(
"[\\s\\n]+",
"");
240 HandlePrefix = prefix;
252 Class cl = Class.forName(
"de.uni_hamburg.corpora.CmdiData");
254 }
catch (ClassNotFoundException ex) {
265 String description =
"This class loads CMDI data and retrieves already existing or newly registered " 266 +
"Handle PIDs for URLs from specific XML elements.";
271 public Report function(
Corpus c)
throws SAXException, IOException, ParserConfigurationException {
273 for(
CmdiData cmdid : c.getCmdidata()){
274 stats.
merge(
function(cmdid));
void setUser(String user)
void setHandlePrefix(String prefix)
void addNote(String statId, String description)
void setPass(String pass)
Collection< Class<?extends CorpusData > > getIsUsableFor()
String registerPID(String handleURL)
static org.w3c.dom.Document String2W3cDocument(String stringRespresentingDocument)
void addCritical(String description)
String getPID(String handleURL)
static org.w3c.dom.Document JdomDocument2W3cDocument(org.jdom.Document jdomDoc)
void addWarning(String statId, String description)
static String W3cDocument2String(org.w3c.dom.Document doc)
void addException(Throwable e, String description)
void updateUnformattedString(String newUnformattedString)
void write(CorpusData cd, URL url)
void addFix(String statId, CorpusData cd, String description)