package ro.sync.lexer.cpp;
import ro.sync.lexer.AbstractLexer;

@SuppressWarnings("unused")
%%

%public 
%class CPPLexer
%extends AbstractLexer
%unicode
%char
%type ro.sync.lexer.Symbol
 
%ignorecase
%scanerror ro.sync.lexer.LexerException

%{
    private static final byte SYM_TEXT               = CPPTokens.TEXT;
    private static final byte SYM_KEYWORD            = CPPTokens.KEYWORD;
    private static final byte SYM_IDENTIFIER         = CPPTokens.IDENTIFIER;
    private static final byte SYM_OPERATOR           = CPPTokens.OPERATOR;
    private static final byte SYM_STRING             = CPPTokens.STRING;
    private static final byte SYM_CHAR_LITERAL       = CPPTokens.CHAR_LITERAL;
    private static final byte SYM_NUMBER             = CPPTokens.NUMBER;
    
    private static final byte SYM_CURLY_BRACKET      = CPPTokens.CURLY_BRACKET;
    private static final byte SYM_SQUARE_BRACKET     = CPPTokens.SQUARE_BRACKET;
    private static final byte SYM_BRACKET            = CPPTokens.BRACKET;
    private static final byte SYM_COMMA              = CPPTokens.COMMA;
    private static final byte SYM_COLON              = CPPTokens.COLON;
    private static final byte SYM_SEMICOLON          = CPPTokens.SEMICOLON;
    private static final byte SYM_DOT                = CPPTokens.DOT;
    private static final byte SYM_POINTER_REFERENCE  = CPPTokens.DOT;
    private static final byte SYM_THREE_DOTS         = CPPTokens.THREE_DOTS;
    
    private static final byte SYM_COMMENT            = CPPTokens.COMMENT;
    private static final byte SYM_LINE_COMMAND       = CPPTokens.LINE_COMMAND;
    
    
    /**
     * Create an empty lexer, yyreset will be called later to reset and assign
     * the reader
     */
    public CPPLexer() {
        super();
    }
    
    public String getName() {
      return CPP_LEXER;
    }
%}

%xstate COMMENT, DQ_STRING

Keyword = "if" | "else" | "switch" | "while" | "do" | "for" | "goto" | "continue" | "break"
        | "return" | "extern" | "static" | "auto" | "register" | "void" | "char" | "short" | "int" 
        | "long" | "float" | "double" | "signed" | "unsigned" | "struct" | "union" | "enum" 
        | "const" | "volatile" | "typedef" | "sizeof" | "case" | "default" | "null" | "new"
        | "delete" | "true" | "false" | "friend" | "mutable" | "inline" | "virtual" | "explicit"
        | "wchar_t" | "bool" | "typename" | "namespace" | "using" | "typenameopt" | "asm" | "class"
        | "private" | "protected" | "public" | "operator" | "template" | "try" | "catch" | "throw"
        | "this" | "dynamic_cast" | "static_cast" | "reinterpret_cast" | "const_cast" | "typeid"

Operator = "=" | "*" | "+" | "++" | "-" | "--" | "/" | "%" | "&" | "&&" | "~" | "!" | "*=" | "/="
         | "%=" | "+=" | "-=" | "<<=" | ">>=" | "&=" | "^=" | "|=" | "?" | "|" | "||" | "^" | "=="
         | "!=" | "<" | ">" | "<=" | ">=" | "<<" | ">>" | "<:" | ":>" | "<%" | "%>" | "%:" | "%:%:"
         | "::" | ".*" | "->*" | "and" | "and_eq" | "bitand" | "bitor" | "compl" | "not" | "not_eq"
         | "or" | "or_eq" | "xor" | "xor_eq" 

// Identifiers
IdentifierStart = [a-zA-Z$_]
Identifier = {IdentifierStart} ({IdentifierStart} | [0-9])* 
 
// Escape sequences
OctalEscape = "\\" [0-7]+
UnicodeEscape = "\\u" {HexDigit}+
EscapeSequence = ("\\" [btnfr\"\'\\]) | {OctalEscape} | {UnicodeEscape}

// Character literals
UnclosedCharLiteral = "L"? "\'" ({EscapeSequence} | [^\'\\])*
CharLiteral         = {UnclosedCharLiteral} "\'" 

// Numbers
IntegerType = ([uU]? [lL]) | ([uU] [lL]?)

Digit = [0-9]
Decimal = {Digit}+ {IntegerType}?

HexDigit = [0-9a-fA-F]
Hexadecimal = "0" [xX] {HexDigit}+ {IntegerType}+

Exponent = [eE] [+-]? {Digit}+
FloatType = [fFdD]
FloatingPoint = ({Digit}+ "." {Digit}* {Exponent}? {FloatType}?)
              | ("." {Digit}+ {Exponent}? {FloatType}?)
              | ({Digit}+ {Exponent} {FloatType}?)
              | ({Digit}+ {Exponent}? {FloatType})

Number = {Decimal} | {Hexadecimal} | {FloatingPoint}

// Line comments
LineComment = "//" {Char}*

// Line commands
LineCommand = "#" {Char}*

// Whitespaces
WS = [ \t]

// Any character not handled separatelly.
GeneralChar = [^;,=:\{\}\(\)\[\]*.+\-/%<>&~!\^|?\"\'\t ]

// Any character
Char = .

%%

<YYINITIAL> {
    // White spaces are emitted separatelly.
    {WS}+                        {   return symbol(SYM_TEXT);           }
	
    {Keyword}                    {   return symbol(SYM_KEYWORD);        }
    {Identifier}                 {   return symbol(SYM_IDENTIFIER);     }
    {Operator}                   {   return symbol(SYM_OPERATOR);       }
    "{" | "}"                    {   return symbol(SYM_CURLY_BRACKET);  }
    "[" | "]"                    {   return symbol(SYM_SQUARE_BRACKET); }
    "(" | ")"                    {   return symbol(SYM_BRACKET);        }
    ";"                          {   return symbol(SYM_SEMICOLON);      }
    ":"                          {   return symbol(SYM_COLON);          }
    "."                          {   return symbol(SYM_DOT);            }
    "->"                         {   return symbol(SYM_POINTER_REFERENCE); }
    "..."                        {   return symbol(SYM_THREE_DOTS);     }
    ","                          {   return symbol(SYM_COMMA);          }   
    "\""                         {   
                                     cLen++;
                                     yybegin(DQ_STRING);         
                                 }
    {CharLiteral} 
    | {UnclosedCharLiteral}      {   return symbol(SYM_CHAR_LITERAL);   }
    {Number}                     {   return symbol(SYM_NUMBER);         }
    {LineCommand}                {   return symbol(SYM_LINE_COMMAND);   }
    {LineComment}                {   return symbol(SYM_COMMENT);        }
    "/*"                         {
                                     yybegin(COMMENT);
                                     return symbol(SYM_COMMENT);
                                 }
    
	// This is Text
    {GeneralChar}+               {	 return symbol(SYM_TEXT);           }
}

<DQ_STRING> {
    "\""                         {
                                     cLen++;
                                     yybegin(YYINITIAL);
                                     return flush(SYM_STRING);
                                 }
    "\\\\" | "\\\""              {   cLen+=2;                           }
    [^\"]                        {   cLen++;                            }
    <<EOF>>                      {
                                     yybegin(YYINITIAL);
                                     return flush(SYM_STRING);
                                 }
}

<COMMENT> {
  "*/"                           {
                                     yybegin(YYINITIAL);
                                     return symbol(SYM_COMMENT);                                     
                                 }
  {Char}                         {   cLen ++;   }                                 
  ~"*/"                          {
                                     yypushback(2);
                                     return symbol(SYM_COMMENT);
                                 }
  <<EOF>>                        {   return flush(SYM_COMMENT);    }
}

