package ro.sync.lexer.perl;
import ro.sync.lexer.AbstractLexer;

@SuppressWarnings("unused")
%%

%public 
%class PerlLexer
%extends AbstractLexer
%unicode
%char
%type ro.sync.lexer.Symbol
 
%scanerror ro.sync.lexer.LexerException

%{
    private static final byte SYM_TEXT               = PerlTokens.TEXT;
    private static final byte SYM_INVALID            = PerlTokens.INVALID;
    
    private static final byte SYM_COMMA              = PerlTokens.COMMA;
    private static final byte SYM_SEMICOLON          = PerlTokens.SEMICOLON;
    private static final byte SYM_BRACKET            = PerlTokens.BRACKET;
    private static final byte SYM_SQUARE_BRACKET     = PerlTokens.SQUARE_BRACKET;
    private static final byte SYM_CURLY_BRACKET      = PerlTokens.CURLY_BRACKET;
    private static final byte SYM_OPERATOR           = PerlTokens.OPERATOR;
    private static final byte SYM_VAR_NAME           = PerlTokens.VAR_NAME;
    private static final byte SYM_NUMBER             = PerlTokens.NUMBER;
    
    private static final byte SYM_KEYWORDS           = PerlTokens.KEYWORDS;
    private static final byte SYM_FUNCTIONS          = PerlTokens.FUNCTIONS;
    private static final byte SYM_REGEXP             = PerlTokens.REG_EXP;
    
    private static final byte SYM_STRING_DQ          = PerlTokens.STRING_DQ;
    private static final byte SYM_STRING_SQ          = PerlTokens.STRING_SQ;
    
    private static final byte SYM_COMMENT            = PerlTokens.COMMENT;

    /**
     * Create an empty lexer, yyreset will be called later to reset and assign
     * the reader
     */
    public PerlLexer() {
        super();
    }
    
    public String getName() {
      return PERL_LEXER;
    }
%}

// Line comment
Comment = "#" {Char}*

// Whitespaces
WS = [ \t]

UnquotedString = [a-zA-Z_] ([a-zA-Z0-9_])*
VarName = ([$@&%] | "::"]) {UnquotedString}

Hexa = "0" ("x" | "X") [0-9a-fA-F]+
Digit = [0-9]
Integer = "-"? {Digit}+
Fraction = "."{Digit}+
Exponent = ("e" | "E") ("+" | "-")? {Digit}+
Double = {Integer} ({Fraction} {Exponent}? | {Exponent})

Number = {Integer} | {Double} | {Hexa}

DQStringContent =  ([^\"]|\\\")*
SQStringContent =  ([^\']|\\\')*

DQUnclosedString =  \"{DQStringContent}
SQUnclosedString =  \'{SQStringContent}

DQString =  {DQUnclosedString}\"
SQString =  {SQUnclosedString}\'

Keywords = "my" | "local" | "new" | "if" | "until" | "while" | "elsif" | "else" | "eval" | "unless"
         | "foreach" | "continue" | "exit" | "die" | "last" | "goto" | "next" | "redo" | "goto"
         | "return" | "do" | "sub" | "use" | "require" | "package" | "BEGIN" | "END"

Functions = "abs" | "accept" | "alarm" | "atan2" | "bind" | "binmode" | "bless" | "caller" | "chdir"
         | "chmod" | "chomp" | "chr" | "chroot" | "chown" | "closedir" | "close" | "connect" | "cos"
         | "crypt" | "dbmclose" | "dbmopen" | "defined" | "delete" | "die" | "dump" | "each"
         | "endgrent" | "endhostent" | "endnetent" | "endprotoent" | "endpwent" | "endservent"
         | "eof" | "exec" | "exists" | "exp" | "fctnl" | "fileno" | "flock" | "fork" | "format"
         | "formline" | "getc" | "getgrent" | "getgrgid" | "getgrnam" | "gethostbyaddr"
         | "gethostbyname" | "gethostent" | "getlogin" | "getnetbyaddr" | "getnetbyname"
         | "getnetent" | "getpeername" | "getpgrp" | "getppid" | "getpriority" | "getprotobyname"
         | "getprotobynumber" | "getprotoent" | "getpwent" | "getpwnam" | "getpwuid"
         | "getservbyname" | "getservbyport" | "getservent" | "getsockname" | "getsockopt" | "glob"
         | "gmtime" | "grep" | "hex" | "import" | "index" | "int" | "ioctl" | "join" | "keys"
         | "kill" | "lcfirst" | "lc" | "length" | "link" | "listen" | "log" | "localtime" | "lstat"
         | "map" | "mkdir" | "msgctl" | "msgget" | "msgrcv" | "no" | "oct" | "opendir" | "open"
         | "ord" | "pack" | "pipe" | "pop" | "pos" | "printf" | "print" | "push" | "quotemeta"
         | "rand" | "readdir" | "read" | "readlink" | "recv" | "ref" | "rename" | "reset"
         | "reverse" | "rewinddir" | "rindex" | "rmdir" | "scalar" | "seekdir" | "seek" | "select"
         | "semctl" | "semget" | "semop" | "send" | "setgrent" | "sethostent" | "setnetent"
         | "setpgrp" | "setpriority" | "setprotoent" | "setpwent" | "setsockopt" | "shift"
         | "shmctl" | "shmget" | "shmread" | "shmwrite" | "shutdown" | "sin" | "sleep" | "socket"
         | "socketpair" | "sort" | "splice" | "split" | "sprintf" | "sqrt" | "srand" | "stat"
         | "study" | "substr" | "symlink" | "syscall" | "sysopen" | "sysread" | "syswrite"
         | "telldir" | "tell" | "tie" | "tied" | "time" | "times" | "truncate" | "uc" | "ucfirst"
         | "umask" | "undef" | "unlink" | "unpack" | "unshift" | "untie" | "utime" | "values"
         | "vec" | "wait" | "waitpid" | "wantarray" | "warn" | "write"

Operator = "=" | "+" | "++" | "-" | "--" | "*" | "/" | "%" | "&&" | "~" | "!" | "*=" | "/="
         | "%=" | "+=" | "-=" | "<<=" | ">>=" | "&=" | "^=" | "|=" | "?" | "|" | "||" | "^" | "=="
         | "!=" | "<" | ">" | "<=" | ">=" | "<<" | ">>" | "eq" | "ne" | "lt" | "gt" | "le" | "ge"
         | "cmp" | "**" | "**=" | ".." | ".=" | "." | "<=>" | "x=" | "=~" | "!~" | "???"

FileTestOperator = "-r" | "-w" | "-x" | "-o" | "-R" | "-W" | "-X" | "-O" | "-e" | "-z" | "-s"
         | "-f" | "-d" | "-l" | "-p" | "-S" | "-b  " | "-c" | "-u" | "-g" | "-k" | "-t" | "-T"
         | "-B" | "-M" | "-A" | "-C"

RegexpContent = ([^/]|\\\/)*

SimpleRegexp = "/" {RegexpContent} "/" [giox]*
MatchRegexp = "m/" {RegexpContent} "/" [gi]*
TranslationRegexp = "tr/" {RegexpContent} "/" {RegexpContent} "/"[gi]*
SubstituteRegexp = "s/" {RegexpContent} "/" {RegexpContent} "/"[gi]*

Regexp = {SubstituteRegexp} | {MatchRegexp} | {SimpleRegexp} | {TranslationRegexp}

/* Any character. Anything interesting must be handled above.*/
Char = .
GeneralChar = [^=+-/%&~!*<>\^|?()\[\]{},;'\" \t.]

%%

<YYINITIAL> {
    "x" {WS}*                   {
                                    if (yylength() > 1) {
                                        // Keep only the "repetition operator".
                                        yypushback(yylength() - 1);
                                    }
                                    return symbol(SYM_OPERATOR);
                                }
    {Keywords}                  {   return symbol(SYM_KEYWORDS);            }
    {Functions}                 {   return symbol(SYM_FUNCTIONS);           }
    {Operator}                  {   return symbol(SYM_OPERATOR);            }
    {VarName}                   {   return symbol(SYM_VAR_NAME);            }
    {Regexp}                    {   return symbol(SYM_REGEXP);              }
    {Number}                    {   return symbol(SYM_NUMBER);              }
    {FileTestOperator} {WS}*    {   
                                    if (yylength() > 2) {
                                        // Keep only the "file test operator".
                                        yypushback(yylength() - 2);
                                    }
                                    return symbol(SYM_OPERATOR);
                                }
    {Comment}                   {   return symbol(SYM_COMMENT);             }
    {DQString} |
    {DQUnclosedString}          {   return symbol(SYM_STRING_DQ);           }
                                
    {SQString} |
    {SQUnclosedString}          {   return symbol(SYM_STRING_SQ);           }
    // Emit white spaces separattelly.
    {WS}+                       {   return symbol(SYM_TEXT);                }
    "(" | ")"                   {   return symbol(SYM_BRACKET);             }
    "{" | "}"                   {   return symbol(SYM_CURLY_BRACKET);       }
    "[" | "]"                   {   return symbol(SYM_SQUARE_BRACKET);      }
    ","                         {   return symbol(SYM_COMMA);               }
    ";"                         {   return symbol(SYM_SEMICOLON);           }
    {GeneralChar}*              {   return symbol(SYM_TEXT);                }
}