6 package de.uni_hamburg.corpora.publication;
14 import java.io.IOException;
15 import java.util.ArrayList;
16 import java.util.Arrays;
17 import java.util.Collection;
18 import java.util.List;
19 import javax.xml.parsers.DocumentBuilder;
20 import javax.xml.parsers.DocumentBuilderFactory;
21 import javax.xml.parsers.ParserConfigurationException;
22 import javax.xml.transform.TransformerException;
23 import javax.xml.xpath.XPathExpressionException;
24 import org.w3c.dom.Document;
25 import org.w3c.dom.Element;
26 import org.w3c.dom.NodeList;
27 import org.xml.sax.SAXException;
37 List<String> fileList;
40 final List<String> filenamewhitelist;
44 fileList =
new ArrayList<String>();
46 filenamewhitelist =
new ArrayList<String>();
47 filenamewhitelist.add(
"_score.html");
48 filenamewhitelist.add(
"_list.html");
60 stats.
addFix(
function, comadata,
"File:" + baseDirectory);
63 walk(baseDirectory, stats);
64 System.out.println(
"Done");
84 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
85 DocumentBuilder db = dbf.newDocumentBuilder();
89 List<String> elementNames = Arrays.asList(
"relPath",
"NSLink");
92 for (
int i = 0; i < elementNames.size(); i++) {
93 NodeList elements = doc.getElementsByTagName(elementNames.get(i));
94 for (
int j = 0; j < elements.getLength(); j++) {
95 Element e = (Element) elements.item(j);
96 String c = e.getTextContent();
97 c = c.replace(
'/', File.separatorChar).replace(
'\\', File.separatorChar);
103 }
catch (ParserConfigurationException ex) {
104 stats.
addException(ex,
function, cd,
"Unknown ParserConfigurationException.");
105 }
catch (TransformerException ex) {
106 stats.
addException(ex,
function, cd,
"Unknown TransformerException.");
107 }
catch (SAXException ex) {
108 stats.
addException(ex,
function, cd,
"Unknown SAXException.");
109 }
catch (IOException ex) {
110 stats.
addException(ex,
function, cd,
"Unknown IOException.");
111 }
catch (XPathExpressionException ex) {
112 stats.
addException(ex,
function, cd,
"Unknown XPathExpressionException.");
120 File dir =
new File(path);
121 File[] foundFiles = dir.listFiles();
123 for (File file : foundFiles) {
124 if (file.isDirectory() && (!file.getName().startsWith(
"."))) {
126 walk(file.getAbsolutePath(), stats);
127 }
else if (!file.getName().startsWith(
".")) {
131 String name = file.getAbsolutePath().replace(
'/', File.separatorChar).replace(
'\\', File.separatorChar);
134 Boolean keepFile =
false;
135 for (
int i = 0; i < fileList.size(); i++) {
136 String linkedFile = fileList.get(i);
137 if (name.endsWith(linkedFile) || name.endsWith(filenamewhitelist.get(0)) || name.endsWith(filenamewhitelist.get(1))) {
143 stats.
addNote(
function, comadata,
"Keeping: " + name +
" (found in Coma and file system).");
145 File FileToRemove =
new File(name);
146 if (FileToRemove.delete()) {
147 stats.
addNote(
function, comadata,
"Removed: " + name +
" (not found in Coma).");
149 stats.
addWarning(
function, comadata,
"Removal unsuccessful: " + name +
" (not found in Coma).");
174 comadata = c.getComaData();
186 Class cl = Class.forName(
"de.uni_hamburg.corpora.ComaData");
188 }
catch (ClassNotFoundException ex) {
196 String description =
"This class takes a coma file and removes all files from the directory/subdirectories " 197 +
"which are not linked somewhere in the coma file. ";
Collection< Class<?extends CorpusData > > getIsUsableFor()
void addNote(String statId, String description)
String toSaveableString()
void addWarning(String statId, String description)
Report generateFileList(CorpusData cd)
Report removeFiles(CorpusData comadata)
static InputStream String2InputStream(String s)
void addException(Throwable e, String description)
void addFix(String statId, CorpusData cd, String description)
void walk(String path, Report stats)