lexer grammar MasterLexer;

tokens { WS, PUNCTUATION, LETTER, OPEN_HOMOLOGOUS_FORM, HOMOLOGOUS_LETTER, CLOSE_HOMOLOGOUS_FORM, X_COLON, CLOSE, SLASH_CLOSE, PIPE, LINE_BREAK, TEI_FILE_HEADER, TEI_FILE_FOOTER }



@members {
 public int originalMode;
 public boolean xmlIsEnabled;
 public String homologousModeEnd;
 public String homologousModeStart = "";
}

ERROR_RULE : [.] {System.out.println("in error rule");}; // stupid rule to satisfy stupid Anltr. Default mode is never active.

mode PLAIN_TEXT_MODE;
LINE_BREAK : [\r|\n]+ {if(originalMode==LILAC_MODE){popMode();}else{setType(WS);}} ;
WS : [ \t]+; // capture WS
COMMENT_MODE : '<!--' {xmlIsEnabled}? .*? '-->'                 -> skip;
DTD_MODE : '<!' {xmlIsEnabled}? .*? '>'                 -> skip;
SPECIAL_MODE : '<?' {xmlIsEnabled}? .*? '?>'                 -> skip;
XML_MARKER : '<' 
{
	if(xmlIsEnabled){
		pushMode(XML_MODE);
	} else{ 
		setType(PUNCTUATION);
	}
};      

PUNCTUATION : [\u0021-\u0023 | \u0025-\u002F | \u003A-\u003B | \u003D-\u0040
   | \u005B-\u0060 | \u005B-\u0060 | \u007B-\u007F 
| \u0300-\u036F | \u2000-\u2027 | \u2030-\u205F] {
	if (getText().equals(homologousModeStart)){
		setType(OPEN_HOMOLOGOUS_FORM);
		pushMode(HOMOLOGOUS_FORM_MODE);
	}
};

LETTER : . 
{
	if (getText().equals(homologousModeStart)){
		setType(OPEN_HOMOLOGOUS_FORM);
		pushMode(HOMOLOGOUS_FORM_MODE);
	}
};


mode HOMOLOGOUS_FORM_MODE;
HOMOLOGOUS_LETTER : . {if(getText().equals(homologousModeEnd)){setType(CLOSE_HOMOLOGOUS_FORM);popMode();} }; 

mode LILAC_MODE;
LILAC_WS : [ \t]+ -> skip ; // capture WS
LILAC_COMMENT : '@@@' ~[\n]*        -> pushMode(PLAIN_TEXT_MODE);
LILAC_HASH_COMMENT : '#' ~[\n]*        -> type(LILAC_COMMENT), pushMode(PLAIN_TEXT_MODE);
PIPE : '|' -> pushMode(PLAIN_TEXT_MODE) ;
OPEN_SPEAKER : '[';
CLOSE_SPEAKER : ']';
NON_ZERO_DIGIT : [1|2|3|4|5|6|7|8|9];
ZERO : [0];
POSSIBLE_SPEAKER : [a-zA-Z\'];
DELIMITER : ~[OPEN_SPEAKER|CLOSE_SPEAKER|NON_ZERO_DIGIT|ZERO|POSSIBLE_SPEAKER];
OPEN_CHARACTER_NAME : '<';
CLOSE_CHARACTER_NAME : '>';


mode XML_MODE;
SLASH_CLOSE :   '/>'                    -> popMode ;
CLOSE       :   '>'                     -> popMode ;
SPECIAL_CLOSE:  '?>'                    -> popMode ; // close <?xml...?>
SPECIAL_OPEN:  '?'                     ; // close <?xml...?>
SLASH       :   '/' ;
EQUALS      :   '=' ;
X_COLON      :   ':' ;
STRING      :   '"' ~[<"]* '"'
            |   '\'' ~[<']* '\''
            ;

Name        :   NameStartChar NameChar* ;
S           :   [ \t]               -> skip ;
LB          :   [\r|\n]+                   {if(originalMode == LILAC_MODE){setType(LINE_BREAK);popMode();}else {skip();}} ;
AllowedInSpeakerTag      :   (PUNC|DIGIT)+;


fragment
DIGIT       :   [0-9] ;

fragment    
PUNC	    :  ['('|')'|'\''];

fragment
NameChar    :   NameStartChar
            |   '-' | '.' | DIGIT 
            |   '\u00B7'
            |   '\u0300'..'\u036F'
            |   '\u203F'..'\u2040'
            ;

fragment
NameStartChar
            :   [:a-zA-Z]
            |   '\u2070'..'\u218F' 
            |   '\u2C00'..'\u2FEF' 
            |   '\u3001'..'\uD7FF' 
            |   '\uF900'..'\uFDCF' 
            |   '\uFDF0'..'\uFFFD';