Prototyping Interpreters using Python Lex-Yacc
by Shannon Behrens

Example 1:
def p_list(t):
    'list : LPAREN nodes RPAREN'    
    #^     ^       ^     ^          This comment is for illustration only.
    #t[0]  t[1]    t[2]  t[3]
    t[0] = t[2]



Listing One

"""This file contains the lexer rules and the list of valid tokens."""
import lex
import sys
import re

# This is the list of token names.
tokens = (
    'INT', 
    'FLOAT', 
    'STRING',
    'SYMBOL',
    'LPAREN', 
    'RPAREN'
)
# These are regular expression rules for simple tokens.
t_LPAREN    = r'\('
t_RPAREN    = r'\)'

# Read in a float.  This rule has to be done before the int rule.
def t_FLOAT(t):
    r'-?\d+\.\d*(e-?\d+)?'
    t.value = float(t.value)
    return t
# Read in an int.
def t_INT(t):
    r'-?\d+'
    t.value = int(t.value)
    return t
# Read in a string, as in C.  The following backslash sequences have their 
# usual special meaning: \", \\, \n, and \t.
def t_STRING(t):
    r'\"([^\\"]|(\\.))*\"'
    escaped = 0
    str = t.value[1:-1]
    new_str = ""
    for i in range(0, len(str)):
        c = str[i]
        if escaped:
            if c == "n":
                c = "\n"
            elif c == "t":
                c = "\t"
            new_str += c
            escaped = 0
        else:
            if c == "\\":
                escaped = 1
            else:
                new_str += c
    t.value = new_str
    return t
# Ignore comments.
def t_comment(t):
    r'[#][^\n]*'
    pass
# Track line numbers.
def t_newline(t):
    r'\n+'
    t.lineno += len(t.value)
# Read in a symbol.  This rule must be practically last since there are so few
# rules concerning what constitutes a symbol.
def t_SYMBOL(t):
    r'[^0-9()][^()\ \t\n]*'
    return t
# These are the things that should be ignored.
t_ignore = ' \t'
# Handle errors.
def t_error(t):
    raise SyntaxError("syntax error on line %d near '%s'" % 
        (t.lineno, t.value))
# Build the lexer.
lex.lex()


Listing Two
"""This file contains the parser rules.

The function yacc.parse, which this function makes available, returns a parse 
tree.  The parse tree is a set of nested lists containing ints, floats, 
strings, Symbols, etc.
"""
import yacc
import sys

from lexer import tokens
from Symbol import Symbol

def p_list(t):
    'list : LPAREN nodes RPAREN'
    t[0] = t[2]
def p_nodes_node(t):
    'nodes : node nodes'
    t[0] = [t[1]] + t[2]
def p_nodes_empty(t):
    'nodes : empty'
    t[0] = []
def p_empty(t):
    'empty :'
    pass
def p_node_int(t):
    'node : INT'
    t[0] = t[1]
def p_node_float(t):
    'node : FLOAT'
    t[0] = t[1]
def p_node_string(t):
    'node : STRING'
    t[0] = t[1]
def p_node_symbol(t):
    'node : SYMBOL'
    t[0] = Symbol(name = t[1])
def p_node_list(t):
    'node : list'
    t[0] = t[1]
# Error rule for syntax errors.
def p_error(t):
    raise SyntaxError("invalid syntax")
# Build the parser.
yacc.yacc()





1



