public class FLExDocumentReader extends FLExReader implements FLExText
This class parses an xml file following the model of FLExText
.
In the process, the FLEx XML values are mapped onto a Salt model following these principles:
interlinear-text
elements are mapped to SDocument
s.paragraph
s, are mapped to SSpan
s, which span overphrase
s, which are mapped to SSpan
s, which span overword
s and morph
s, both of which are mapped to SToken
s, which are tied to an STextualDS
each.item
s) for each layer are tied to the respective SNode
(SToken
or SSpan
)SAnnotation
to the respective SNode
.Modifier and Type | Class and Description |
---|---|
private static class |
FLExDocumentReader.Element |
Modifier and Type | Field and Description |
---|---|
private Map<org.apache.commons.lang3.tuple.Triple<String,String,String>,String> |
annotationMap |
private List<org.apache.commons.lang3.tuple.Triple<String,String,String>> |
annotationsToDrop |
private FLExImporterProperties |
fLExProperties |
private org.corpus_tools.salt.common.SDocumentGraph |
graph |
private com.google.common.collect.Table<String,String,String> |
interlinearTextItems |
private boolean |
isItemActiveElement |
private FLExDocumentReader.Element |
itemParent |
private List<org.corpus_tools.salt.core.SAnnotation> |
languages |
private static org.slf4j.Logger |
logger |
private org.corpus_tools.salt.common.STextualDS |
morphDS |
private Vector<org.corpus_tools.salt.common.SToken> |
morphemes |
private com.google.common.collect.Table<String,String,String> |
morphItems |
private org.corpus_tools.salt.common.SSpan |
paragraph |
private int |
paragraphCount |
private com.google.common.collect.Table<String,String,String> |
phraseItems |
private Vector<org.corpus_tools.salt.common.SSpan> |
phrases |
private org.corpus_tools.salt.common.STextualDS |
wordDS |
private boolean |
wordHasMorphemes |
private com.google.common.collect.Table<String,String,String> |
wordItems |
private int |
wordLength |
private Vector<org.corpus_tools.salt.common.SToken> |
words |
private int |
wordTimelineStart |
properties
FLEX__ANALYSIS_STATUS_ATTR, FLEX__LANG_ATTR, FLEX__TYPE_ATTR, FLEX_ITEM_TYPE__PUNCT, FLEX_ITEM_TYPE__TXT, FLEX_LANGUAGE__ENCODING_ATTR, FLEX_LANGUAGE__FONT_ATTR, FLEX_LANGUAGE__VERNACULAR_ATTR, ITEM_LAYER_MORPH, ITEM_LAYER_PHRASE, ITEM_LAYER_WORD, PROCESSING__ACTIVE_ELEMENT_VALUE, PROCESSING__KEY_VALUE_SEPARATOR, PROCESSING__UNDERSCORE, TAG_INTERLINEAR_TEXT, TAG_ITEM, TAG_LANGUAGE, TAG_LANGUAGES, TAG_MORPH, TAG_MORPHEMES, TAG_PARAGRAPH, TAG_PHRASE, TAG_SEQNUM, TAG_WORD, TAG_WORDS, TOKEN_LAYER_LEXICAL, TOKEN_LAYER_MORPHOLOGICAL
Constructor and Description |
---|
FLExDocumentReader(org.corpus_tools.salt.common.SDocument document,
org.corpus_tools.pepper.modules.PepperModuleProperties pepperModuleProperties) |
Modifier and Type | Method and Description |
---|---|
private boolean |
actionAnnotations(String layer,
String language,
String name,
Collection<org.apache.commons.lang3.tuple.Triple<String,String,String>> triples) |
void |
characters(char[] ch,
int start,
int length) |
private boolean |
dropAnnotation(String layer,
String name) |
private boolean |
dropAnnotation(String layer,
String language,
String name) |
void |
endElement(String uri,
String localName,
String qName) |
void |
fatalError(SAXParseException e) |
private String |
getNewAnnotationName(String layer,
String name) |
private String |
getNewAnnotationName(String layer,
String language,
String name) |
private boolean |
mapAnnotation(String layer,
String name) |
private boolean |
mapAnnotation(String layer,
String language,
String name) |
void |
startElement(String uri,
String localName,
String qName,
Attributes attributes) |
createAnnotation, createLanguagedAnnotation
attributeDecl, comment, elementDecl, endCDATA, endDTD, endEntity, externalEntityDecl, getExternalSubset, internalEntityDecl, resolveEntity, resolveEntity, startCDATA, startDTD, startEntity
endDocument, endPrefixMapping, error, ignorableWhitespace, notationDecl, processingInstruction, setDocumentLocator, skippedEntity, startDocument, startPrefixMapping, unparsedEntityDecl, warning
private static final org.slf4j.Logger logger
private final org.corpus_tools.salt.common.SDocumentGraph graph
private final org.corpus_tools.salt.common.STextualDS morphDS
private final org.corpus_tools.salt.common.STextualDS wordDS
private boolean isItemActiveElement
private FLExDocumentReader.Element itemParent
private Vector<org.corpus_tools.salt.common.SToken> morphemes
private Vector<org.corpus_tools.salt.common.SToken> words
private Vector<org.corpus_tools.salt.common.SSpan> phrases
private List<org.corpus_tools.salt.core.SAnnotation> languages
private org.corpus_tools.salt.common.SSpan paragraph
private int wordLength
private int paragraphCount
private final com.google.common.collect.Table<String,String,String> interlinearTextItems
private int wordTimelineStart
private boolean wordHasMorphemes
private List<org.apache.commons.lang3.tuple.Triple<String,String,String>> annotationsToDrop
private Map<org.apache.commons.lang3.tuple.Triple<String,String,String>,String> annotationMap
private FLExImporterProperties fLExProperties
public FLExDocumentReader(org.corpus_tools.salt.common.SDocument document, org.corpus_tools.pepper.modules.PepperModuleProperties pepperModuleProperties)
document
- The document to be read intopepperModuleProperties
- The properties to be applied to the conversion processpublic void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException
startElement
in interface ContentHandler
startElement
in class DefaultHandler
SAXException
public void characters(char[] ch, int start, int length) throws SAXException
characters
in interface ContentHandler
characters
in class DefaultHandler
SAXException
public void endElement(String uri, String localName, String qName) throws SAXException
endElement
in interface ContentHandler
endElement
in class DefaultHandler
SAXException
public void fatalError(SAXParseException e)
fatalError
in interface ErrorHandler
fatalError
in class DefaultHandler
private boolean actionAnnotations(String layer, String language, String name, Collection<org.apache.commons.lang3.tuple.Triple<String,String,String>> triples)
Copyright © 2011–2018 Humboldt-Universität zu Berlin. All rights reserved.