Wednesday, 18 November 2015

Python PLY Programming: 1. Introduction to PLY

Python PLY Programming: 1. Introduction to PLY: PLY is a pure-Python implementation of the popular compiler construction tools lex and yacc. The main goal of PLY is to stay fairly faith...

6. Basic Example Using PLY in Python

basiclex.py

from ply import *

keywords = (
    'LET','READ','DATA','PRINT','GOTO','IF','THEN','FOR','NEXT','TO','STEP',
    'END','STOP','DEF','GOSUB','DIM','REM','RETURN','RUN','LIST','NEW',
)

tokens = keywords + (
     'EQUALS','PLUS','MINUS','TIMES','DIVIDE','POWER',
     'LPAREN','RPAREN','LT','LE','GT','GE','NE',
     'COMMA','SEMI', 'INTEGER','FLOAT', 'STRING',
     'ID','NEWLINE'
)

t_ignore = ' \t'

def t_REM(t):
    r'REM .*'
    return t

def t_ID(t):
    r'[A-Z][A-Z0-9]*'
    if t.value in keywords:
        t.type = t.value
    return t
    
t_EQUALS  = r'='
t_PLUS    = r'\+'
t_MINUS   = r'-'
t_TIMES   = r'\*'
t_POWER   = r'\^'
t_DIVIDE  = r'/'
t_LPAREN  = r'\('
t_RPAREN  = r'\)'
t_LT      = r'<'
t_LE      = r'<='
t_GT      = r'>'
t_GE      = r'>='
t_NE      = r'<>'
t_COMMA   = r'\,'
t_SEMI    = r';'
t_INTEGER = r'\d+'    
t_FLOAT   = r'((\d*\.\d+)(E[\+-]?\d+)?|([1-9]\d*E[\+-]?\d+))'
t_STRING  = r'\".*?\"'

def t_NEWLINE(t):
    r'\n'
    t.lexer.lineno += 1
    return t

def t_error(t):
    print "Illegal character", t.value[0]
    t.lexer.skip(1)

lex.lex()




basparse.py


from ply import *
import basiclex

tokens = basiclex.tokens

precedence = (
               ('left', 'PLUS','MINUS'),
               ('left', 'TIMES','DIVIDE'),
               ('left', 'POWER'),
               ('right','UMINUS')
)

#### A BASIC program is a series of statements.  We represent the program as a
#### dictionary of tuples indexed by line number.

def p_program(p):
    '''program : program statement
               | statement'''

    if len(p) == 2 and p[1]:
       p[0] = { }
       line,stat = p[1]
       p[0][line] = stat
    elif len(p) ==3:
       p[0] = p[1]
       if not p[0]: p[0] = { }
       if p[2]:
           line,stat = p[2]
           p[0][line] = stat

#### This catch-all rule is used for any catastrophic errors.  In this case,
#### we simply return nothing

def p_program_error(p):
    '''program : error'''
    p[0] = None
    p.parser.error = 1

#### Format of all BASIC statements. 

def p_statement(p):
    '''statement : INTEGER command NEWLINE'''
    if isinstance(p[2],str):
        print p[2],"AT LINE", p[1]
        p[0] = None
        p.parser.error = 1
    else:
        lineno = int(p[1])
        p[0] = (lineno,p[2])

#### Interactive statements.

def p_statement_interactive(p):
    '''statement : RUN NEWLINE
                 | LIST NEWLINE
                 | NEW NEWLINE'''
    p[0] = (0, (p[1],0))

#### Blank line number
def p_statement_blank(p):
    '''statement : INTEGER NEWLINE'''
    p[0] = (0,('BLANK',int(p[1])))

#### Error handling for malformed statements

def p_statement_bad(p):
    '''statement : INTEGER error NEWLINE'''
    print "MALFORMED STATEMENT AT LINE", p[1]
    p[0] = None
    p.parser.error = 1

#### Blank line

def p_statement_newline(p):
    '''statement : NEWLINE'''
    p[0] = None

#### LET statement

def p_command_let(p):
    '''command : LET variable EQUALS expr'''
    p[0] = ('LET',p[2],p[4])

def p_command_let_bad(p):
    '''command : LET variable EQUALS error'''
    p[0] = "BAD EXPRESSION IN LET"

#### READ statement

def p_command_read(p):
    '''command : READ varlist'''
    p[0] = ('READ',p[2])

def p_command_read_bad(p):
    '''command : READ error'''
    p[0] = "MALFORMED VARIABLE LIST IN READ"

#### DATA statement

def p_command_data(p):
    '''command : DATA numlist'''
    p[0] = ('DATA',p[2])

def p_command_data_bad(p):
    '''command : DATA error'''
    p[0] = "MALFORMED NUMBER LIST IN DATA"

#### PRINT statement

def p_command_print(p):
    '''command : PRINT plist optend'''
    p[0] = ('PRINT',p[2],p[3])

def p_command_print_bad(p):
    '''command : PRINT error'''
    p[0] = "MALFORMED PRINT STATEMENT"

#### Optional ending on PRINT. Either a comma (,) or semicolon (;)

def p_optend(p):
    '''optend : COMMA 
              | SEMI
              |'''
    if len(p)  == 2:
         p[0] = p[1]
    else:
         p[0] = None

#### PRINT statement with no arguments

def p_command_print_empty(p):
    '''command : PRINT'''
    p[0] = ('PRINT',[],None)

#### GOTO statement

def p_command_goto(p):
    '''command : GOTO INTEGER'''
    p[0] = ('GOTO',int(p[2]))

def p_command_goto_bad(p):
    '''command : GOTO error'''
    p[0] = "INVALID LINE NUMBER IN GOTO"

#### IF-THEN statement

def p_command_if(p):
    '''command : IF relexpr THEN INTEGER'''
    p[0] = ('IF',p[2],int(p[4]))

def p_command_if_bad(p):
    '''command : IF error THEN INTEGER'''
    p[0] = "BAD RELATIONAL EXPRESSION"

def p_command_if_bad2(p):
    '''command : IF relexpr THEN error'''
    p[0] = "INVALID LINE NUMBER IN THEN"

#### FOR statement

def p_command_for(p):
    '''command : FOR ID EQUALS expr TO expr optstep'''
    p[0] = ('FOR',p[2],p[4],p[6],p[7])

def p_command_for_bad_initial(p):
    '''command : FOR ID EQUALS error TO expr optstep'''
    p[0] = "BAD INITIAL VALUE IN FOR STATEMENT"

def p_command_for_bad_final(p):
    '''command : FOR ID EQUALS expr TO error optstep'''
    p[0] = "BAD FINAL VALUE IN FOR STATEMENT"

def p_command_for_bad_step(p):
    '''command : FOR ID EQUALS expr TO expr STEP error'''
    p[0] = "MALFORMED STEP IN FOR STATEMENT"

#### Optional STEP qualifier on FOR statement

def p_optstep(p):
    '''optstep : STEP expr
               | empty'''
    if len(p) == 3:
       p[0] = p[2]
    else:
       p[0] = None

#### NEXT statement
    
def p_command_next(p):
    '''command : NEXT ID'''

    p[0] = ('NEXT',p[2])

def p_command_next_bad(p):
    '''command : NEXT error'''
    p[0] = "MALFORMED NEXT"

#### END statement

def p_command_end(p):
    '''command : END'''
    p[0] = ('END',)

#### REM statement

def p_command_rem(p):
    '''command : REM'''
    p[0] = ('REM',p[1])

#### STOP statement

def p_command_stop(p):
    '''command : STOP'''
    p[0] = ('STOP',)

#### DEF statement

def p_command_def(p):
    '''command : DEF ID LPAREN ID RPAREN EQUALS expr'''
    p[0] = ('FUNC',p[2],p[4],p[7])

def p_command_def_bad_rhs(p):
    '''command : DEF ID LPAREN ID RPAREN EQUALS error'''
    p[0] = "BAD EXPRESSION IN DEF STATEMENT"

def p_command_def_bad_arg(p):
    '''command : DEF ID LPAREN error RPAREN EQUALS expr'''
    p[0] = "BAD ARGUMENT IN DEF STATEMENT"

#### GOSUB statement

def p_command_gosub(p):
    '''command : GOSUB INTEGER'''
    p[0] = ('GOSUB',int(p[2]))

def p_command_gosub_bad(p):
    '''command : GOSUB error'''
    p[0] = "INVALID LINE NUMBER IN GOSUB"

#### RETURN statement

def p_command_return(p):
    '''command : RETURN'''
    p[0] = ('RETURN',)

#### DIM statement

def p_command_dim(p):
    '''command : DIM dimlist'''
    p[0] = ('DIM',p[2])

def p_command_dim_bad(p):
    '''command : DIM error'''
    p[0] = "MALFORMED VARIABLE LIST IN DIM"

#### List of variables supplied to DIM statement

def p_dimlist(p):
    '''dimlist : dimlist COMMA dimitem
               | dimitem'''
    if len(p) == 4:
        p[0] = p[1]
        p[0].append(p[3])
    else:
        p[0] = [p[1]]

#### DIM items

def p_dimitem_single(p):
    '''dimitem : ID LPAREN INTEGER RPAREN'''
    p[0] = (p[1],eval(p[3]),0)

def p_dimitem_double(p):
    '''dimitem : ID LPAREN INTEGER COMMA INTEGER RPAREN'''
    p[0] = (p[1],eval(p[3]),eval(p[5]))

#### Arithmetic expressions

def p_expr_binary(p):
    '''expr : expr PLUS expr
            | expr MINUS expr
            | expr TIMES expr
            | expr DIVIDE expr
            | expr POWER expr'''

    p[0] = ('BINOP',p[2],p[1],p[3])

def p_expr_number(p):
    '''expr : INTEGER
            | FLOAT'''
    p[0] = ('NUM',eval(p[1]))

def p_expr_variable(p):
    '''expr : variable'''
    p[0] = ('VAR',p[1])

def p_expr_group(p):
    '''expr : LPAREN expr RPAREN'''
    p[0] = ('GROUP',p[2])

def p_expr_unary(p):
    '''expr : MINUS expr %prec UMINUS'''
    p[0] = ('UNARY','-',p[2])

#### Relational expressions

def p_relexpr(p):
    '''relexpr : expr LT expr
               | expr LE expr
               | expr GT expr
               | expr GE expr
               | expr EQUALS expr
               | expr NE expr'''
    p[0] = ('RELOP',p[2],p[1],p[3])

#### Variables

def p_variable(p):
    '''variable : ID
              | ID LPAREN expr RPAREN
              | ID LPAREN expr COMMA expr RPAREN'''
    if len(p) == 2:
       p[0] = (p[1],None,None)
    elif len(p) == 5:
       p[0] = (p[1],p[3],None)
    else:
       p[0] = (p[1],p[3],p[5])

#### Builds a list of variable targets as a Python list

def p_varlist(p):
    '''varlist : varlist COMMA variable
               | variable'''
    if len(p) > 2:
       p[0] = p[1]
       p[0].append(p[3])
    else:
       p[0] = [p[1]]


#### Builds a list of numbers as a Python list

def p_numlist(p):
    '''numlist : numlist COMMA number
               | number'''

    if len(p) > 2:
       p[0] = p[1]
       p[0].append(p[3])
    else:
       p[0] = [p[1]]

#### A number. May be an integer or a float

def p_number(p):
    '''number  : INTEGER
               | FLOAT'''
    p[0] = eval(p[1])

#### A signed number.

def p_number_signed(p):
    '''number  : MINUS INTEGER
               | MINUS FLOAT'''
    p[0] = eval("-"+p[2])

#### List of targets for a print statement
#### Returns a list of tuples (label,expr)

def p_plist(p):
    '''plist   : plist COMMA pitem
               | pitem'''
    if len(p) > 3:
       p[0] = p[1]
       p[0].append(p[3])
    else:
       p[0] = [p[1]]

def p_item_string(p):
    '''pitem : STRING'''
    p[0] = (p[1][1:-1],None)

def p_item_string_expr(p):
    '''pitem : STRING expr'''
    p[0] = (p[1][1:-1],p[2])

def p_item_expr(p):
    '''pitem : expr'''
    p[0] = ("",p[1])

#### Empty
   
def p_empty(p):
    '''empty : '''

#### Catastrophic error handler
def p_error(p):
    if not p:
        print "SYNTAX ERROR AT EOF"

bparser = yacc.yacc()

def parse(data):
    bparser.error = 0
    p = bparser.parse(data)
    if bparser.error: return None
    return p



5. Syntax Error Handling

When a syntax error occurs during parsing, the error is immediately detected (i.e., the parser does not read any more tokens beyond the source of the error). Error recovery in LR parsers is a delicate topic that involves ancient rituals and black-magic. The recovery mechanism provided by yacc.py is comparable to Unix yacc so you may want consult a book like O'Reilly's "Lex and Yacc" for some of the finer details.
When a syntax error occurs, yacc.py performs the following steps:
  1. On the first occurrence of an error, the user-defined p_error() function is called with the offending token as an argument. Afterwards, the parser enters an "error-recovery" mode in which it will not make future calls to p_error() until it has successfully shifted at least 3 tokens onto the parsing stack.
  2. If no recovery action is taken in p_error(), the offending lookahead token is replaced with a special error token.
  3. If the offending lookahead token is already set to error, the top item of the parsing stack is deleted.
  4. If the entire parsing stack is unwound, the parser enters a restart state and attempts to start parsing from its initial state.
  5. If a grammar rule accepts error as a token, it will be shifted onto the parsing stack.
  6. If the top item of the parsing stack is error, lookahead tokens will be discarded until the parser can successfully shift a new symbol or reduce a rule involving error.

Recovery and resynchronization with error rules

The most well-behaved approach for handling syntax errors is to write grammar rules that include the error token. For example, suppose your language had a grammar rule for a print statement like this:
def p_statement_print(p):
     'statement : PRINT expr SEMI'
     ...
To account for the possibility of a bad expression, you might write an additional grammar rule like this:
def p_statement_print_error(p):
     'statement : PRINT error SEMI'
     print "Syntax error in print statement. Bad expression"

In this case, the error token will match any sequence of tokens that might appear up to the first semicolon that is encountered. Once the semicolon is reached, the rule will be invoked and the error token will go away.
This type of recovery is sometimes known as parser resynchronization. The error token acts as a wildcard for any bad input text and the token immediately following error acts as a synchronization token.
It is important to note that the error token usually does not appear as the last token on the right in an error rule. For example:
def p_statement_print_error(p):
    'statement : PRINT error'
    print "Syntax error in print statement. Bad expression"
This is because the first bad token encountered will cause the rule to be reduced--which may make it difficult to recover if more bad tokens immediately follow.


Panic mode recovery


An alternative error recovery scheme is to enter a panic mode recovery in which tokens are discarded to a point where the parser might be able to recover in some sensible manner.
Panic mode recovery is implemented entirely in the p_error() function. For example, this function starts discarding tokens until it reaches a closing '}'. Then, it restarts the parser in its initial state.
def p_error(p):
    print "Whoa. You are seriously hosed."
    # Read ahead looking for a closing '}'
    while 1:
        tok = yacc.token()             # Get the next token
        if not tok or tok.type == 'RBRACE': break
    yacc.restart()
This function simply discards the bad token and tells the parser that the error was ok.
def p_error(p):
    print "Syntax error at token", p.type
    # Just discard the token and tell the parser it's okay.
    yacc.errok()
Within the p_error() function, three functions are available to control the behavior of the parser:
  • yacc.errok(). This resets the parser state so it doesn't think it's in error-recovery mode. This will prevent an error token from being generated and will reset the internal error counters so that the next syntax error will call p_error() again.
  • yacc.token(). This returns the next token on the input stream.
  • yacc.restart(). This discards the entire parsing stack and resets the parser to its initial state.
Note: these functions are only available when invoking p_error() and are not available at any other time.
To supply the next lookahead token to the parser, p_error() can return a token. This might be useful if trying to synchronize on special characters. For example:
def p_error(p):
    # Read ahead looking for a terminating ";"
    while 1:
        tok = yacc.token()             # Get the next token
        if not tok or tok.type == 'SEMI': break
    yacc.errok()

    # Return SEMI to the parser as the next lookahead token
    return tok  

4.3 Dealing With Ambiguous Grammars

The expression grammar given in the earlier example has been written in a special format to eliminate ambiguity. However, in many situations, it is extremely difficult or awkward to write grammars in this format. A much more natural way to express the grammar is in a more compact form like this:
expression : expression PLUS expression
           | expression MINUS expression
           | expression TIMES expression
           | expression DIVIDE expression
           | LPAREN expression RPAREN
           | NUMBER

Unfortunately, this grammar specification is ambiguous. For example, if you are parsing the string "3 * 4 + 5", there is no way to tell how the operators are supposed to be grouped. For example, does the expression mean "(3 * 4) + 5" or is it "3 * (4+5)"?

When an ambiguous grammar is given to yacc.py it will print messages about "shift/reduce conflicts" or a "reduce/reduce conflicts". A shift/reduce conflict is caused when the parser generator can't decide whether or not to reduce a rule or shift a symbol on the parsing stack. For example, consider the string "3 * 4 + 5" and the internal parsing stack:
Step Symbol Stack           Input Tokens            Action
---- ---------------------  ---------------------   -------------------------------
1    $                                3 * 4 + 5$    Shift 3
2    $ 3                                * 4 + 5$    Reduce : expression : NUMBER
3    $ expr                             * 4 + 5$    Shift *
4    $ expr *                             4 + 5$    Shift 4
5    $ expr * 4                             + 5$    Reduce: expression : NUMBER
6    $ expr * expr                          + 5$    SHIFT/REDUCE CONFLICT ????
In this case, when the parser reaches step 6, it has two options. One is to reduce the rule expr : expr * expr on the stack. The other option is to shift the token + on the stack. Both options are perfectly legal from the rules of the context-free-grammar.

By default, all shift/reduce conflicts are resolved in favor of shifting. Therefore, in the above example, the parser will always shift the + instead of reducing. Although this strategy works in many cases (including the ambiguous if-then-else), it is not enough for arithmetic expressions. In fact, in the above example, the decision to shift + is completely wrong---we should have reduced expr * expr since multiplication has higher mathematical precedence than addition.
To resolve ambiguity, especially in expression grammars, yacc.py allows individual tokens to be assigned a precedence level and associativity. This is done by adding a variable precedence to the grammar file like this:
precedence = (
    ('left', 'PLUS', 'MINUS'),
    ('left', 'TIMES', 'DIVIDE'),
)

This declaration specifies that PLUS/MINUS have the same precedence level and are left-associative and that TIMES/DIVIDE have the same precedence and are left-associative. Within the precedence declaration, tokens are ordered from lowest to highest precedence. Thus, this declaration specifies that TIMES/DIVIDE have higher precedence than PLUS/MINUS (since they appear later in the precedence specification).

The precedence specification works by associating a numerical precedence level value and associativity direction to the listed tokens. For example, in the above example you get:
PLUS      : level = 1,  assoc = 'left'
MINUS     : level = 1,  assoc = 'left'
TIMES     : level = 2,  assoc = 'left'
DIVIDE    : level = 2,  assoc = 'left'

These values are then used to attach a numerical precedence value and associativity direction to each grammar rule. This is always determined by looking at the precedence of the right-most terminal symbol. For example:
expression : expression PLUS expression                 # level = 1, left
           | expression MINUS expression                # level = 1, left
           | expression TIMES expression                # level = 2, left
           | expression DIVIDE expression               # level = 2, left
           | LPAREN expression RPAREN                   # level = None (not specified)
           | NUMBER                                     # level = None (not specified)

When shift/reduce conflicts are encountered, the parser generator resolves the conflict by looking at the precedence rules and associativity specifiers.
  1. If the current token has higher precedence, it is shifted.
  2. If the grammar rule on the stack has higher precedence, the rule is reduced.
  3. If the current token and the grammar rule have the same precedence, the rule is reduced for left associativity, whereas the token is shifted for right associativity.
  4. If nothing is known about the precedence, shift/reduce conflicts are resolved in favor of shifting (the default).
For example, if "expression PLUS expression" has been parsed and the next token is "TIMES", the action is going to be a shift because "TIMES" has a higher precedence level than "PLUS". On the other hand, if "expression TIMES expression" has been parsed and the next token is "PLUS", the action is going to be reduce because "PLUS" has a lower precedence than "TIMES."
When shift/reduce conflicts are resolved using the first three techniques (with the help of precedence rules), yacc.py will report no errors or conflicts in the grammar.
One problem with the precedence specifier technique is that it is sometimes necessary to change the precedence of an operator in certain contents. For example, consider a unary-minus operator in "3 + 4 * -5". Normally, unary minus has a very high precedence--being evaluated before the multiply. However, in our precedence specifier, MINUS has a lower precedence than TIMES. To deal with this, precedence rules can be given for fictitious tokens like this:
precedence = (
    ('left', 'PLUS', 'MINUS'),
    ('left', 'TIMES', 'DIVIDE'),
    ('right', 'UMINUS'),            # Unary minus operator
)
Now, in the grammar file, we can write our unary minus rule like this:
def p_expr_uminus(p):
    'expression : MINUS expression %prec UMINUS'
    p[0] = -p[2]
In this case, %prec UMINUS overrides the default rule precedence--setting it to that of UMINUS in the precedence specifier.
At first, the use of UMINUS in this example may appear very confusing. UMINUS is not an input token or a grammer rule. Instead, you should think of it as the name of a special marker in the precedence table. When you use the %prec qualifier, you're simply telling yacc that you want the precedence of the expression to be the same as for this special marker instead of the usual precedence.
It is also possible to specify non-associativity in the precedence table. This would be used when you don't want operations to chain together. For example, suppose you wanted to support comparison operators like < and > but you didn't want to allow combinations like a < b < c. To do this, simply specify a rule like this:
precedence = (
    ('nonassoc', 'LESSTHAN', 'GREATERTHAN'),  # Nonassociative operators
    ('left', 'PLUS', 'MINUS'),
    ('left', 'TIMES', 'DIVIDE'),
    ('right', 'UMINUS'),            # Unary minus operator
)
If you do this, the occurrence of input text such as a < b < c will result in a syntax error. However, simple expressions such as a < b will still be fine.
Reduce/reduce conflicts are caused when there are multiple grammar rules that can be applied to a given set of symbols. This kind of conflict is almost always bad and is always resolved by picking the rule that appears first in the grammar file. Reduce/reduce conflicts are almost always caused when different sets of grammar rules somehow generate the same set of symbols. For example:
assignment :  ID EQUALS NUMBER
           |  ID EQUALS expression
           
expression : expression PLUS expression
           | expression MINUS expression
           | expression TIMES expression
           | expression DIVIDE expression
           | LPAREN expression RPAREN
           | NUMBER
In this case, a reduce/reduce conflict exists between these two rules:
assignment  : ID EQUALS NUMBER
expression  : NUMBER

For example, if you wrote "a = 5", the parser can't figure out if this is supposed to be reduced as assignment : ID EQUALS NUMBER or whether it's supposed to reduce the 5 as an expression and then reduce the rule assignment : ID EQUALS expression.
It should be noted that reduce/reduce conflicts are notoriously difficult to spot simply looking at the input grammar. To locate these, it is usually easier to look at the parser.out debugging file with an appropriately high level of caffeination.

4.2 Python PLY With Yacc and its Values


 Character Literals


If desired, a grammar may contain tokens defined as single character literals. For example:
def p_binary_operators(p):
    '''expression : expression '+' term
                  | expression '-' term
       term       : term '*' factor
                  | term '/' factor'''
    if p[2] == '+':
        p[0] = p[1] + p[3]
    elif p[2] == '-':
        p[0] = p[1] - p[3]
    elif p[2] == '*':
        p[0] = p[1] * p[3]
    elif p[2] == '/':
        p[0] = p[1] / p[3]
A character literal must be enclosed in quotes such as '+'. In addition, if literals are used, they must be declared in the corresponding lex file through the use of a special literals declaration.
# Literals.  Should be placed in module given to lex()
literals = ['+','-','*','/' ]

Character literals are limited to a single character. Thus, it is not legal to specify literals such as '<=' or '=='. For this, use the normal lexing rules (e.g., define a rule such as t_EQ = r'==').

Example:



Precedence

To resolve ambiguity, especially in expression grammars, yacc.py allows individual tokens to be assigned a precedence level and associativity. This is done by adding a variable precedence to the grammar file like this:
precedence = (
    ('left', 'PLUS', 'MINUS'),
    ('left', 'TIMES', 'DIVIDE'),
)
This declaration specifies that PLUS/MINUS have the same precedence level and are left-associative and that TIMES/DIVIDE have the same precedence and are left-associative. Within the precedence declaration, tokens are ordered from lowest to highest precedence. Thus, this declaration specifies that TIMES/DIVIDE have higher precedence than PLUS/MINUS (since they appear later in the precedence specification).
The precedence specification works by associating a numerical precedence level value and associativity direction to the listed tokens. For example, in the above example you get:
PLUS      : level = 1,  assoc = 'left'
MINUS     : level = 1,  assoc = 'left'
TIMES     : level = 2,  assoc = 'left'
DIVIDE    : level = 2,  assoc = 'left'
These values are then used to attach a numerical precedence value and associativity direction to each grammar rule. This is always determined by looking at the precedence of the right-most terminal symbol. For example:
expression : expression PLUS expression                 # level = 1, left
           | expression MINUS expression                # level = 1, left
           | expression TIMES expression                # level = 2, left
           | expression DIVIDE expression               # level = 2, left
           | LPAREN expression RPAREN                   # level = None (not specified)
           | NUMBER                                     # level = None (not specified)

Example: