package ro.sync.lexer.shell;
import ro.sync.lexer.AbstractLexer;

@SuppressWarnings("unused")
%%

%public 
%class ShellLexer
%extends AbstractLexer
%unicode
%char
%type ro.sync.lexer.Symbol
 
%ignorecase
%scanerror ro.sync.lexer.LexerException

%{
    private static final byte SYM_KEYWORD           = ShellTokens.KEYWORD;
    private static final byte SYM_COMMAND           = ShellTokens.COMMAND;
    private static final byte SYM_IDENTIFIER        = ShellTokens.IDENTIFIER;
    private static final byte SYM_LABEL             = ShellTokens.LABEL;
    private static final byte SYM_TEXT              = ShellTokens.TEXT;
    private static final byte SYM_OPERATOR          = ShellTokens.OPERATOR;
    private static final byte SYM_BRACKET           = ShellTokens.BRACKET;
    private static final byte SYM_CURLY_BRACKET     = ShellTokens.CURLY_BRACKET;
    private static final byte SYM_SQUARE_BRACKET    = ShellTokens.SQUARE_BRACKET;
    private static final byte SYM_COMMENT           = ShellTokens.COMMENT;
    private static final byte SYM_SHEBANG           = ShellTokens.SHEBANG;
    private static final byte SYM_PARAM_NUMBER      = ShellTokens.PARAM_NUMBER;
    private static final byte SYM_PARAMETER         = ShellTokens.PARAMETER;
    private static final byte SYM_STRING_DQ         = ShellTokens.STRING;
    private static final byte SYM_STRING_SQ         = ShellTokens.STRING;
    private static final byte SYM_STRING_RQ         = ShellTokens.STRING_RQ;

    /**
     * Create an empty lexer, yyreset will be called later to reset and assign
     * the reader
     */
    public ShellLexer() {
        super();
    }
    
    public String getName() {
      return SHELL_LEXER;
    }
%}

%xstate COMMENT, SHEBANG

Operator = "-eq" | "-ne" | "-lt" | "-gt" | "-ge" | "-le" | ">=" | "<=" | "==" | "!=" | "-z" | "-n" 
           | "=~" | "$" | "&&" | "&" | "." | ";" | "+" | "-" | "=" | "/" | "++" | "@" | "||" | "|"
           | "--" | ">" | "<" | "*"

Keyword = "if" | "fi" | "do" | "done" | "case" | "esac" | "then" | "else" | "elif" 
          | "for" | "in" | "until" | "while" | "break" | "local" | "continue"

Command = "alias" | "apropos" | "apt" | "aspell" | "awk" | "bash" | "basename" | "bc" | "bg" 
          | "builtin" | "bzip2" | "cal" | "cat" | "cd" | "cfdisk" | "chgrp" | "chmod" | "chown"
          | "chroot" | "chkconfig" | "cksum" | "clear" | "cmp" | "comm" | "command" | "continue" 
          | "cp" | "cron" | "crontab" | "csplit" | "cut" | "date" | "dc" | "dd" | "ddrescue" 
          | "declare" | "df" | "diff" | "diff3" | "dig" | "dir" | "dircolors" | "dirname" | "dirs" 
          | "dmesg" | "du" | "echo" | "egrep" | "eject" | "enable" | "env" | "ethtool" | "eval" 
          | "exec" | "exit" | "expect" | "expand" | "export" | "expr" | "false" | "fdformat" 
          | "fdisk" | "fg" | "fgrep" | "file" | "find" | "fmt" | "fold" | "format" | "free" | "fsck"
          | "ftp" | "function" | "gawk" | "getopts" | "grep" | "groups" | "gzip" | "hash" | "head"
          | "history" | "hostname" | "id" | "ifconfig" | "ifdown" | "ifup" | "import" | "install" 
          | "join" | "kill" | "killall" | "less" | "let" | "ln" | "locate" | "logname" | "logout" 
          | "look" | "lpc" | "lpr" | "lprint" | "lprintd" | "lprintq" | "lprm" | "ls" | "lsof" 
          | "man" | "mkdir" | "mkfifo" | "mkisofs" | "mknod" | "more" | "mount" | "mtools" | "mv" 
          | "mmv" | "netstat" | "nice" | "nl" | "nohup" | "nslookup" | "open" | "op" | "passwd" 
          | "paste" | "pathchk" | "ping" | "popd" | "pr" | "printcap" | "printenv" | "printf" | "ps"
          | "pushd" | "pwd" | "quota" | "quotacheck" | "quotactl" | "ram" | "rcp" | "read" 
          | "readonly" | "reboot" | "renice" | "remsync" | "return" | "rev" | "rm" | "rmdir" 
          | "rsync" | "screen" | "scp" | "sdiff" | "sed" | "select" | "seq" | "set" | "sftp" 
          | "shift" | "shopt" | "shutdown" | "sleep" | "slocate" | "sort" | "source" | "split" 
          | "ssh" | "strace" | "su" | "sudo" | "sum" | "symlink" | "sync" | "tail" | "tar" | "tee" 
          | "test" | "time" | "times" | "touch" | "top" | "traceroute" | "trap" | "tr" | "true"
          | "tsort" | "tty" | "type" | "ulimit" | "umask" | "umount" | "unalias" | "uname" 
          | "unexpand" | "uniq" | "units" | "unset" | "unshar" | "useradd" | "usermod" | "users" 
          | "uuencode" | "uudecode" | "v" | "vdir" | "vi" | "vmstat" | "watch" | "wc" | "whereis" 
          | "which" | "who" | "whoami" | "Wget" | "write" | "xargs" | "yes" 

Identifier = [a-zA-Z][a-zA-Z0-9_]*
Label = ":" {Identifier}
ParamNumber = "$" [0-9]|"@"
Parameter = "$" {Identifier} 
Char = .
GeneralChar = [^ \t=@\$#!;\-\.;\+|<>\(\)\*]

DQStringContent =  ([^\"]|\\\")*
DQUnclosedString =  \"{DQStringContent}
DQString =  {DQUnclosedString}\"
SQStringContent =  ([^\']|\\\')*
SQUnclosedString =  \"{SQStringContent}
SQString =  {SQUnclosedString}\'
RQStringContent =  ([^\`]|\\\`)*
RQUnclosedString =  \`{RQStringContent}
RQString =  {RQUnclosedString}\`

%%

<YYINITIAL> 
{
    {Keyword}                   {   return symbol(SYM_KEYWORD);         }
    {Command}                   {   return symbol(SYM_COMMAND);         }
    {Identifier}                {   return symbol(SYM_IDENTIFIER);      }
    {Operator}                  {   return symbol(SYM_OPERATOR);        }
    {Label}                     {   return symbol(SYM_LABEL);           }
    "#"                         { 
                                    yypushback(1);
                                    yybegin(COMMENT);   
                                }
    "#!"                        { 
                                    yypushback(2);
                                    yybegin(SHEBANG);   
                                }
    "="                         {   return symbol(SYM_OPERATOR);        }
    "(" | ")"                   {   return symbol(SYM_BRACKET);         }
    "[" | "]"                   {   return symbol(SYM_SQUARE_BRACKET);  }
    "{" | "}"                   {   return symbol(SYM_CURLY_BRACKET);   }
    {ParamNumber}               {   return symbol(SYM_PARAM_NUMBER);    }
    {Parameter}                 {   return symbol(SYM_PARAMETER);       }
    // Strings 
    {DQString}                  {   return symbol(SYM_STRING_DQ);       }
    {SQString}                  {   return symbol(SYM_STRING_SQ);       }
    {RQString}                  {   return symbol(SYM_STRING_RQ);       }
    [ \t]+                      {   return symbol(SYM_TEXT);            }
    {GeneralChar}+              {   return symbol(SYM_TEXT);            }
}

<COMMENT> 
{
    {Char}                      {   cLen++;                             }
    <<EOF>>                     {   
                                    yybegin(YYINITIAL);
                                    return flush(SYM_COMMENT);
                                }
}

<SHEBANG> 
{
    {Char}                      {   cLen++;                             }
    <<EOF>>                     {   
                                    yybegin(YYINITIAL);
                                    return flush(SYM_SHEBANG);
                                }
}