package ro.sync.lexer.python;
import ro.sync.lexer.AbstractLexer;

@SuppressWarnings("unused")
%%

%public 
%class PythonLexer
%extends AbstractLexer
%unicode
%char
%type ro.sync.lexer.Symbol
 
%scanerror ro.sync.lexer.LexerException

%{
    // Operators and stuff.
    private static final byte SYM_CURLY_BRACKET = PythonTokens.CURLY_BRACKET;
    private static final byte SYM_SQUARE_BRACKET = PythonTokens.SQUARE_BRACKET;
    private static final byte SYM_BRACKET = PythonTokens.BRACKET;
    private static final byte SYM_OPERATOR = PythonTokens.OPERATOR;
    // Most keywords.
    private static final byte SYM_KEYWORD = PythonTokens.KEYWORD;
    private static final byte SYM_TYPE = PythonTokens.TYPE;
    private static final byte SYM_IDENTIFIER = PythonTokens.IDENTIFIER;
    private static final byte SYM_STRING_DQ = PythonTokens.STRING_DQ;
    private static final byte SYM_STRING_SQ = PythonTokens.STRING_SQ;
    // Numbers
    private static final byte SYM_NUMBER = PythonTokens.NUMBER;
    // Comments
    private static final byte SYM_COMMENT = PythonTokens.COMMENT;
    // Other text.
    private static final byte SYM_TEXT = PythonTokens.TEXT;
    // Invalid stuff.
    private static final byte SYM_INVALID = PythonTokens.INVALID;
    
    /**
     * Create an empty lexer, yyreset will be called later to reset and assign
     * the reader
     */
    public PythonLexer() {
        super();
    }
    
    public String getName() {
      return PYTHON_LEXER;
    }
%}

%xstate ML_STRING_DQ, ML_STRING_SQ

DQStringContent =  ([^\"]|\\\")*
SQStringContent =  ([^\']|\\\')*

DQUnclosedString =  \"{DQStringContent}
SQUnclosedString =  \'{SQStringContent}

DQString =  {DQUnclosedString}\"
SQString =  {SQUnclosedString}\'

Keyword = "and" | "as" | "assert" | "break" | "class" | "continue" | "def" | "del" | "elif"
         | "else" | "except" | "exec" | "finally" | "for" | "from" | "global" | "if" | "import"
         | "in" | "is" | "lambda" | "not" | "or" | "pass" | "print" | "self" | "raise" | "return" 
         | "try" | "while" | "with" | "yield"                        
             
Type = "yield" | "Ellipsis" | "False" | "None" | "NotImplemented" | "True" | "__import__"
     | "__name__" | "abs" | "apply" | "bool" | "buffer" | "callable" | "chr" | "classmethod"
     | "cmp" | "coerce" | "compile" | "complex" | "delattr" | "dict" | "dir" | "divmod"
     | "enumerate" | "eval" | "execfile" | "file" | "filter" | "float" | "frozenset" | "getattr"
     | "globals" | "hasattr" | "hash" | "help" | "hex" | "id" | "input" | "int" | "intern"
     | "isinstance" | "issubclass" | "iter" | "len" | "list" | "locals" | "long" | "map" | "max" 
     | "min" | "object" | "oct" | "open" | "ord" | "pow" | "property" | "range" | "raw_input" 
     | "reduce" | "reload" | "repr" | "reversed" | "round" | "set" | "setattr" | "slice" | "sorted"
     | "staticmethod" | "str" | "sum" | "super" | "tuple" | "type" | "unichr" | "unicode" | "vars" 
     | "xrange" | "zip"

Operator = "+" | "-" | "*" | "**" | "/" | "//" | "%" | "<<" | ">>" | "&" | "|" | "^" | "~" | "<"
         | ">" | "<=" | ">=" | "==" | "!=" | "<>" | "@" | "," | ":" | "." | "`" | "=" | ";" | "+="
         | "-=" | "*=" | "/=" | "//=" | "%=" | "&=" | "|=" | "^=" | ">>=" | "<<=" | "**="
         
// Comment
Comment = "#" {Char}*
// Identifier
IdentifierStart = [a-zA-Z_]
Identifier = {IdentifierStart} ({IdentifierStart} | [0-9])*

GeneralChar = [^\\,:{}\-+|\'\"\[\] \t=*\.#\(\)$~\^&`%?]
Char = .

// Numbers
Digit = [0-9]
Integer = {Digit}+
Long = {Integer} [lL]
OctDigit          = [0-7]
HexDigit          = [0-9a-fA-F]
HexInteger = 0 [xX] {HexDigit}+
HexLong = {HexInteger} [lL]

/* floating point literals */
F1 = {Digit}+ \. {Digit}* 
F2 = \. {Digit}+ 
F3 = {Digit}+ 
Exponent = [eE] [+-]? {Digit}+
        
Double = ({F1}|{F2}|{F3}) {Exponent}?
Float  = {Double} [fF]

Number = {Integer} | {Long} | {HexInteger} | {HexLong} | {Double} | {Float}

%%

<YYINITIAL> {
    // Keywords.
    {Keyword}                   {   return symbol(SYM_KEYWORD);             }
    {Type}                      {   return symbol(SYM_TYPE);                }
    {Identifier}                {   return symbol(SYM_IDENTIFIER);          }
    // Operators and punctuation marks.
    {Operator}                  {   return symbol(SYM_OPERATOR);            }
    "{" | "}"                   {   return symbol(SYM_CURLY_BRACKET);       }
    "[" | "]"                   {   return symbol(SYM_SQUARE_BRACKET);      }
    "(" | ")"                   {   return symbol(SYM_BRACKET);             }
    "$" | "?"                   {   return symbol(SYM_INVALID);             }
    
    // Comment
    {Comment}                   {   return symbol(SYM_COMMENT);             }
    // Strings
    {DQString}                  {   return symbol(SYM_STRING_DQ);           }
    {DQUnclosedString}          {   return symbol(SYM_STRING_DQ);           }
    // Strings
    {SQString}                  {   return symbol(SYM_STRING_SQ);           }
    {SQUnclosedString}          {   return symbol(SYM_STRING_SQ);           }
    \"{3}                       {
                                    yybegin(ML_STRING_DQ);
                                    cLen = 3;
                                }
    \'{3}                       {
                                    yybegin(ML_STRING_SQ);
                                    cLen = 3;
                                }
    {Number}                    {   return symbol(SYM_NUMBER);              }
    // White spaces are emitted separatelly.
    [ \t]+                      {   return symbol(SYM_TEXT);                }
    // This is Text
    // Match anything else different from the markup.
    {GeneralChar}*              {   return symbol(SYM_TEXT);                }
}

<ML_STRING_DQ> {
  \"{3}                         {
                                    cLen += 3;
                                    yybegin(YYINITIAL);
                                    // length also includes the trailing quote
                                    return flush(SYM_STRING_DQ);
                                }
  \\[0-3]?{OctDigit}?{OctDigit} {   cLen += yylength();                     }
  .                             {   cLen++;                                 }
  /* escape sequences */
  \\.                           {   cLen += 2;                              }
  <<EOF>>                       {   return flush(SYM_STRING_DQ);            }
}

<ML_STRING_SQ> {
  \'{3}                         {
                                    cLen += 3;
                                    yybegin(YYINITIAL);
                                    // length also includes the trailing quote
                                    return flush(SYM_STRING_SQ);
                                }
  \\[0-3]?{OctDigit}?{OctDigit} {   cLen += yylength();                     }
  .                             {   cLen++;                                 }
  /* escape sequences */
  \\.                           {   cLen += 2;                              }
  <<EOF>>                       {   return flush(SYM_STRING_SQ);            }
}