package ro.sync.lexer.dtd;
import ro.sync.lexer.AbstractLexer;

@SuppressWarnings("unused")
%%

%public 
%class DTDLexer
%extends AbstractLexer
%unicode
%char
%type ro.sync.lexer.Symbol
 
%scanerror ro.sync.lexer.LexerException

%{
    // Operators and stuff.
    private static final byte SYM_COMMA = DTDTokens.OPERATOR;
    private static final byte SYM_BRACKET = DTDTokens.BRACKET;
    private static final byte SYM_SQUARE_BRACKET = DTDTokens.SQUARE_BRACKET;
    private static final byte SYM_PIPE = DTDTokens.OPERATOR;
    private static final byte SYM_QUESTION = DTDTokens.OPERATOR;
    private static final byte SYM_ASTERISK = DTDTokens.OPERATOR;
    private static final byte SYM_PLUS = DTDTokens.OPERATOR;
    
    // Most keywords.
    private static final byte SYM_KEYWORD1 = DTDTokens.KEYWORD1;
    private static final byte SYM_KEYWORD2 = DTDTokens.KEYWORD2;
    private static final byte SYM_KEYWORD3 = DTDTokens.KEYWORD3;
    
    // XML Header
    private static final byte SYM_XML_HEADER = DTDTokens.XML_HEADER;
    private static final byte SYM_STRING_DQ = DTDTokens.STRING_DQ;
    private static final byte SYM_STRING_SQ = DTDTokens.STRING_SQ;
    
    // Comments
    private static final byte SYM_COMMENT = DTDTokens.COMMENT;
    // Parameter entity
    private static final byte SYM_PARAM_ENTITY = DTDTokens.PARAM_ENTITY;
    
    // Rest of declaration.
    private static final byte SYM_DOCTYPE = DTDTokens.DOCTYPE;
    
    // Invalid content
    private static final byte SYM_INVALID = DTDTokens.INVALID;
    
    // The embeded doctype.
    private byte embededDoctypeLevel = 0;
    
    /**
     * Create an empty lexer, yyreset will be called later to reset and assign
     * the reader
     */
    public DTDLexer() {
        super();
    }
    
    public String getName() {
      return DTD_LEXER;
    }
%}

%xstate DTD_DECLARATION, EMBEDED_DOCTYPE, SAFE_MODE, SQ_STRING, DQ_STRING, COMMENT

DQStringContent =  ([^\"]|\\\")*
SQStringContent =  ([^\']|\\\')*

DQUnclosedString =  \"{DQStringContent}
SQUnclosedString =  \'{SQStringContent}

DQString =  {DQUnclosedString}\"
SQString =  {SQUnclosedString}\'

// XML header.
XmlHeader = "<?xml "([^\?])*"?>"

// Parameter entities
ParamEntity = "%" [^ \t%]* ";"

// Element types starting with !.
Keyword1 = "!ELEMENT" | "!ATTLIST" | "!ENTITY" | "!NOTATION" | "!INCLUDE" | "!IGNORE"
             
// Attribute types starting with #.
Keyword2 = "#FIXED" | "#REQUIRED" | "#IMPLIED"

// Content type.
Keyword3 = "PUBLIC" | "NMTOKENS" | "NMTOKEN" | "CDATA" | "#PCDATA" | "ANY" | "EMPTY" | "IDREFS"
            | "IDREF" | "ID" | "ENTITY" | "ENTITIES"

GeneralChar = [^,+|\'\"\[\]\(\) \t*#?<!>]

%%

<YYINITIAL> {
    // XML Header
    {XmlHeader}                 {   return symbol(SYM_XML_HEADER);          }
    // Comments
    "<!--"                      {
                                    // Save state to return to.
                                    pushState(YYINITIAL);
                                    yybegin(COMMENT);
                                    return symbol(SYM_COMMENT);
                                }
    // Embeded doctype...
    "<!["                       {
                                    // Push back all
                                    yypushback(yylength());
                                    yybegin(EMBEDED_DOCTYPE);
                                }
    // DTD declarations...
    "<!"                        {
                                    // Push back the all
                                    yypushback(yylength());
                                    yybegin(DTD_DECLARATION);
                                }
    "<"                         {
                                    yybegin(SAFE_MODE);
                                    return symbol(SYM_INVALID);
                                }
    // Parameter entity
    {ParamEntity}               {   return symbol(SYM_PARAM_ENTITY);        }
    // White spaces are emitted separatelly.
    [ \t]+                      {   return symbol(SYM_DOCTYPE);             }
    // White spaces are emitted separatelly.
    {GeneralChar}*              {   return symbol(SYM_INVALID);             }
}

<DTD_DECLARATION> {
    // Keywords.
    {Keyword1}                  {   return symbol(SYM_KEYWORD1);            }
    {Keyword2}                  {   return symbol(SYM_KEYWORD2);            }
    {Keyword3}                  {   return symbol(SYM_KEYWORD3);            }
    // Parameter entity
    {ParamEntity}               {   return symbol(SYM_PARAM_ENTITY);        }
    // Operators and punctuation marks.
    ","                         {   return symbol(SYM_COMMA);               }
    "|"                         {   return symbol(SYM_PIPE);                }
    "[" | "]"                   {   return symbol(SYM_SQUARE_BRACKET);      }
    "(" | ")"                   {   return symbol(SYM_BRACKET);             }
    "?"                         {   return symbol(SYM_QUESTION);            }
    "+"                         {   return symbol(SYM_PLUS);                }
    "*"                         {   return symbol(SYM_ASTERISK);            }
    // Part of doctype.
    "!"                         {   return symbol(SYM_DOCTYPE);             }
    // Strings
    {DQString}                  {   return symbol(SYM_STRING_DQ);           }
    {SQString}                  {   return symbol(SYM_STRING_SQ);           }
    // Unclosed Strings
    {DQUnclosedString}          {
                                    pushState(DTD_DECLARATION);
                                    yybegin(DQ_STRING);
                                    return symbol(SYM_STRING_DQ);
                                }
    {SQUnclosedString}          {
                                    pushState(DTD_DECLARATION);
                                    yybegin(SQ_STRING);
                                    return symbol(SYM_STRING_SQ);
                                }
    "<!--"                      {
                                    // Save state to return to.
                                    pushState(DTD_DECLARATION);
                                    yybegin(COMMENT);
                                    return symbol(SYM_COMMENT);
                                }
    // White spaces are emitted separatelly.
    [ \t]+                      {   return symbol(SYM_DOCTYPE);             }
    // Match anything else different from the markup.
    {GeneralChar}*              {   return symbol(SYM_DOCTYPE);             }
    // End the declaration.
    ">"                         {
                                    yybegin(YYINITIAL);
                                    return symbol(SYM_DOCTYPE);
                                }
    "<"                         {   return symbol(SYM_DOCTYPE);             }
}

<EMBEDED_DOCTYPE> {
    "["                         {
                                    embededDoctypeLevel++;
                                    return symbol(SYM_SQUARE_BRACKET);
                                }
    "]"                         {
                                    embededDoctypeLevel--;
                                    return symbol(SYM_SQUARE_BRACKET);
                                }
    "<"                         {   return symbol(SYM_DOCTYPE);             }
    ">"                         {
                                    if (embededDoctypeLevel <= 0) {
                                        yybegin(YYINITIAL);
                                    }
                                    return symbol(SYM_DOCTYPE);
                                }
    // Parameter entity
    {ParamEntity}               {   return symbol(SYM_PARAM_ENTITY);        }
    // Keywords.
    {Keyword1}                  {   return symbol(SYM_KEYWORD1);            }
    {Keyword2}                  {   return symbol(SYM_KEYWORD2);            }
    {Keyword3}                  {   return symbol(SYM_KEYWORD3);            }
    // Operators and punctuation marks.
    ","                         {   return symbol(SYM_COMMA);               }
    "|"                         {   return symbol(SYM_PIPE);                }
    "[" | "]"                   {   return symbol(SYM_SQUARE_BRACKET);      }
    "(" | ")"                   {   return symbol(SYM_BRACKET);             }
    "?"                         {   return symbol(SYM_QUESTION);            }
    "+"                         {   return symbol(SYM_PLUS);                }
    "*"                         {   return symbol(SYM_ASTERISK);            }
    // Part of doctype.
    "!"                         {   return symbol(SYM_DOCTYPE);             }
    // Strings
    {DQString}                  {   return symbol(SYM_STRING_DQ);           }
    {SQString}                  {   return symbol(SYM_STRING_SQ);           }
    // Unclosed Strings
    {DQUnclosedString}          {
                                    pushState(EMBEDED_DOCTYPE);
                                    yybegin(DQ_STRING);
                                    return symbol(SYM_STRING_DQ);
                                }
    {SQUnclosedString}          {
                                    pushState(EMBEDED_DOCTYPE);
                                    yybegin(SQ_STRING);
                                    return symbol(SYM_STRING_SQ);
                                }
    "<!--"                      {
                                    // Save state to return to.
                                    pushState(EMBEDED_DOCTYPE);
                                    yybegin(COMMENT);
                                    return symbol(SYM_COMMENT);
                                }
    // White spaces are emitted separatelly.
    [ \t]+                      {   return symbol(SYM_DOCTYPE);             }
    // Match anything else different from the markup.
    {GeneralChar}*              {   return symbol(SYM_DOCTYPE);             }
}

<SAFE_MODE> {
    ">"                         {
                                    cLen++;
                                    yybegin(YYINITIAL);
                                    return flush(SYM_INVALID);
                                }
    "<"                         {
                                    yypushback(1);
                                    yybegin(YYINITIAL);
                                    return flush(SYM_INVALID);
                                }
    .                           {   cLen++;                                 }
    <<EOF>>                     {   return flush(SYM_INVALID);              }
}

<DQ_STRING> {
    "\""                        {
                                    yybegin(popState());
                                    cLen++;
                                    return flush(SYM_STRING_DQ);
                                }
    "\\\""                      {   cLen += 2;                              }
    .                           {   cLen++;                                 }
    <<EOF>>                     {   return flush(SYM_STRING_DQ);            }
}

<SQ_STRING> {
    "'"                         {
                                    yybegin(popState());
                                    cLen++;
                                    return flush(SYM_STRING_SQ);
                                }
    "\\\'"                      {   cLen += 2;                              }
    .                           {   cLen++;                                 }
    <<EOF>>                     {   return flush(SYM_STRING_SQ);            }
}

<COMMENT> {
  "-->"                         {
                                    yybegin(popState());
                                    return symbol(SYM_COMMENT);                                     
                                }
  .                             {   cLen ++;                                }                                 
  ~"-->"                        {
                                    yypushback(3);
                                    return symbol(SYM_COMMENT);
                                }
  <<EOF>>                       {   return flush(SYM_COMMENT);              }
}