本文继续完善Python实现的 Pascal 程序的解释器,新增:
效果如下:

program : PROGRAM variable SEMI block DOT
block : declarations compound_statement
declarations : VAR (variable_declaration SEMI)+
| empty
variable_declaration : ID (COMMA ID)* COLON type_spec
type_spec : INTEGER | REAL
compound_statement : BEGIN statement_list END
statement_list : statement
| statement SEMI statement_list
statement : compound_statement
| assignment_statement
| empty
assignment_statement : variable ASSIGN expr
empty :
expr : term ((PLUS | MINUS) term)*
term : factor ((MUL | INTEGER_DIV | FLOAT_DIV) factor)*
factor : PLUS factor
| MINUS factor
| INTEGER_CONST
| REAL_CONST
| LPAREN expr RPAREN
| variable
variable: ID
import json
import copy
NUMBER, PLUS, MINUS, MUL, DIV, LPAR, RPAR, EOF = 'NUMBER', 'PLUS', 'MINUS', 'MUL', 'DIV', '(', ')', 'EOF'
SEMI = 'SEMI'
ASSIGN = 'ASSIGN'
ID = 'ID'
DOT = 'DOT'
BEGIN = 'BEGIN'
END = 'END'
INTEGER = 'INTEGER'
REAL = 'REAL'
INTEGER_DIV = 'INTEGER DIV'
FLOAT_DIV = 'FLOAT DIV'
PROGRAM = 'PROGRAM'
VAR = 'VAR'
COMMA = 'COMMA'
COLON = 'COLON'
EMPTY = 'EMPTY'
class Token:
def __init__(self, token_type, value):
self.type = token_type
self.value = value
def __repr__(self,):
return f'Token({self.type}, {self.value})'
class Lexer:
reserved_words = {
'BEGIN': Token(BEGIN, 'BEGIN'),
'END': Token(END, 'END'),
'PROGRAM': Token(PROGRAM, 'PROGRAM'),
'DIV': Token(INTEGER_DIV, 'DIV'),
'VAR': Token(VAR, 'VAR'),
'INTEGER': Token(INTEGER, 'INTEGER'),
'REAL': Token(REAL, 'REAL'),
}
blank_chars = [' ', '\n', '\t']
def __init__(self, text):
self.text = text
self.pos = 0
self.current_char = self.text[self.pos]
def copy(self,):
l = Lexer(self.text)
l.pos = self.pos
def peek(self):
if 1 + self.pos >= len(self.text):
return None
return self.text[self.pos + 1]
def error(self, msg=''):
raise Exception(f'Lexer Error! {msg}')
def advance(self):
self.pos += 1
if self.pos >= len(self.text):
self.current_char = None
else:
self.current_char = self.text[self.pos]
def skip_space(self):
if self.current_char in self.blank_chars:
while self.current_char in self.blank_chars:
self.advance()
def skip_comment(self):
if self.current_char == '{':
while self.current_char != '}' and self.current_char is not None:
self.advance()
self.advance()
def _id(self,):
var_id = ''
while self.current_char is not None and self.current_char.isalnum():
var_id += self.current_char
self.advance()
if var_id.upper() in self.reserved_words:
return self.reserved_words[var_id.upper()]
return Token(ID, var_id)
def number(self,):
num = ''
while self.current_char is not None and self.current_char.isdigit():
num += self.current_char
self.advance()
if self.current_char == '.':
num += self.current_char
self.advance()
while self.current_char is not None and self.current_char.isdigit():
num += self.current_char
self.advance()
return Token(REAL, float(num))
return Token(INTEGER, int(num))
def get_next_token(self, ):
while self.current_char in self.blank_chars + ['{']:
self.skip_space()
self.skip_comment()
if self.current_char is None:
return Token(EOF, None)
if self.current_char.isdigit():
return self.number()
if self.current_char.isalpha():
return self._id()
if self.current_char == '+':
self.advance()
return Token(PLUS, '+')
if self.current_char == '-':
self.advance()
return Token(MINUS, '-')
if self.current_char == '*':
self.advance()
return Token(MUL, '*')
if self.current_char == '/':
self.advance()
return Token(FLOAT_DIV, '/')
if self.current_char == '(':
self.advance()
return Token(LPAR, '(')
if self.current_char == ')':
self.advance()
return Token(RPAR, ')')
if self.current_char == ';':
self.advance()
return Token(SEMI, ';')
if self.current_char == ':' and self.peek() == '=':
self.advance()
self.advance()
return Token(ASSIGN, ':=')
if self.current_char == ':' and self.peek() != '=':
self.advance()
return Token(COLON, ':')
if self.current_char == ',':
self.advance()
return Token(COMMA, ':')
if self.current_char == '.':
self.advance()
return Token(DOT, '.')
self.error(f'current_char: {self.current_char}')
class AST:
pass
class VarDec:
def __init__(self, var_type, var_name):
self.type = 'VARIABLE DECLARATION'
self.var_type = var_type
self.var_name = var_name
def __repr__(self):
return f'{{"type":"{self.type}", "variable type":"{self.var_type}", "variable name":"{self.var_name}"}}'
__str__ = __repr__
class Program(AST):
def __init__(self, name, block_node):
self.type = 'PROGRAM'
self.name = name
self.block = block_node
def __repr__(self):
return f'{{"type":"{self.type}", "name":"{self.name}", "block":{self.block}}}'
__str__ = __repr__
class Block(AST):
def __init__(self, dec_node, statement_node):
self.type = 'BLOCK'
self.declaration = dec_node
self.compound_statement = statement_node
def __repr__(self):
return f'{{"type":"{self.type}", "declaration":{self.declaration}, "statements":{self.compound_statement}}}'
__str__ = __repr__
class Declaration(AST):
def __init__(self, var_list):
self.type = 'DECLARATION'
self.var_dec = var_list
def __repr__(self):
return f'{{"type":"{self.type}", "variable declaration":{self.var_dec}}}'
__str__ = __repr__
class CompoundStatement(AST):
def __init__(self, node_type, statement_list):
self.type = node_type
self.children = statement_list
def __repr__(self):
return f'{{"type":"{self.type}", "children":{self.children}}}'
__str__ = __repr__
class BinOp(AST):
def __init__(self, node_type, left_node, right_node):
self.type = node_type
self.left_node = left_node
self.right_node = right_node
def __repr__(self):
return f'{{"type":"{self.type}", "left":{self.left_node}, "right":{self.right_node}}}'
__str__ = __repr__
class UniOp(AST):
def __init__(self, node_type, child_node):
self.type = node_type
self.child_node = child_node
def __repr__(self):
return f'{{"type":"{self.type}", "child":{self.child_node}}}'
__str__ = __repr__
class Variable(AST):
def __init__(self, node_type, name):
self.type = node_type
self.id = name
def __repr__(self):
return f'{{"type":"{self.type}", "id":"{self.id}"}}'
__str__ = __repr__
class Empty(AST):
def __init__(self, ):
self.type = EMPTY
def __repr__(self):
return f'{{"type":"{self.type}"}}'
__str__ = __repr__
class Leaf(AST):
def __init__(self, node_type, value):
self.type = node_type
self.value = value
def __repr__(self):
return f'{{"type":"{self.type}", "value":{self.value}}}'
__str__ = __repr__
class Parser:
"""
program : PROGRAM variable SEMI block DOT
block : declarations compound_statement
declarations : VAR (variable_declaration SEMI)+
| empty
variable_declaration : ID (COMMA ID)* COLON type_spec
type_spec : INTEGER | REAL
compound_statement : BEGIN statement_list END
statement_list : statement
| statement SEMI statement_list
statement : compound_statement
| assignment_statement
| empty
assignment_statement : variable ASSIGN expr
empty :
expr : term ((PLUS | MINUS) term)*
term : factor ((MUL | INTEGER_DIV | FLOAT_DIV) factor)*
factor : PLUS factor
| MINUS factor
| INTEGER_CONST
| REAL_CONST
| LPAREN expr RPAREN
| variable
variable: ID
"""
def __init__(self, lexer):
self.lexer = lexer
self.current_token = self.lexer.get_next_token()
def error(self, msg = ''):
raise Exception(f'Parse Error! {msg}')
def peek(self,):
if self.current_token is None:
return None
return copy.copy(self.lexer).get_next_token()
def eat(self, token_type):
# print(self.current_token)
if type(token_type) == list and self.current_token.type not in token_type:
self.error(f'current token not in list, expect {token_type}, got {self.current_token}.')
elif type(token_type) == str and self.current_token.type != token_type:
self.error(f'current token not match, expect {token_type}, got {self.current_token}.')
else:
self.current_token = self.lexer.get_next_token()
def variable(self,):
f = self.current_token
self.eat(ID)
return Variable(ID, f.value)
def factor(self,):
f = self.current_token
if f.type in [REAL, INTEGER]:
self.eat(f.type)
return Leaf(f.type, f.value)
elif f.type == LPAR:
self.eat(LPAR)
expr = self.expr()
self.eat(RPAR)
return expr
elif f.type in [PLUS, MINUS]:
self.eat(f.type)
return UniOp(f.type, self.factor())
else:
return self.variable()
self.error()
def term(self,):
"""
term: factor(MUL| INTEGER_DIV | FLOAT_DIV)factor)*
"""
node = self.factor()
while(self.current_token.type in [MUL, INTEGER_DIV, FLOAT_DIV]):
op = self.current_token
self.eat(op.type)
f = self.factor()
_node = BinOp(op.type, node, f)
node = _node
return node
def expr(self,):
node = self.term()
while(self.current_token.type in [PLUS, MINUS]):
op = self.current_token
self.eat([PLUS, MINUS])
t = self.term()
_node = BinOp(op.type, node, t)
node = _node
return node
def assign_statement(self,):
"""
assign_statement: VAR ASSIGN expr
"""
var = self.variable()
op = self.current_token
self.eat(ASSIGN)
expr_node = self.expr()
return BinOp(ASSIGN, var, expr_node)
def empty(self):
return Empty()
def statement(self,):
"""
statement: assign_statement | empty
"""
if self.current_token.type == BEGIN:
return self.compound_statement()
elif self.current_token.type == ID and self.peek().type == ASSIGN:
return self.assign_statement()
else:
return self.empty()
def compound_statement(self):
self.eat(BEGIN)
node = self.statement()
node_list = [node]
while self.current_token.type == SEMI:
self.eat(SEMI)
node_list.append(self.statement())
self.eat(END)
return CompoundStatement('CompoundStatement', node_list)
def type_spec(self):
token_type = self.current_token.type
if token_type in [INTEGER, REAL]:
self.eat(token_type)
return token_type
self.error(f'unrecognized type sepc: {token_type}')
def varibale_declaration(self):
"""
variable_declaration : ID (COMMA ID)* COLON type_spec
"""
name_list = []
name = self.variable().id
name_list.append(name)
while self.current_token.type == COMMA:
self.eat(COMMA)
name = self.variable().id
name_list.append(name)
self.eat(COLON)
var_type = self.type_spec()
return [VarDec(var_type, var_name) for var_name in name_list]
def declaration(self):
"""
declarations : VAR (variable_declaration SEMI)+ | empty
"""
if self.current_token.type == VAR:
var_list = []
self.eat(VAR)
while self.current_token.type == ID:
var_list += self.varibale_declaration()
self.eat(SEMI)
return Declaration(var_list)
else:
self.empty()
def block(self):
"""
block : declarations compound_statement
"""
dec_node = self.declaration()
compound_statement = self.compound_statement()
return Block(dec_node, compound_statement)
def program(self):
"""
program : PROGRAM variable SEMI block DOT
"""
self.eat(PROGRAM)
program_name = self.variable().id
self.eat(SEMI)
block = self.block()
self.eat(DOT)
return Program(program_name, block)
def ast(self):
return self.program()
class Interpreter:
def __init__(self, parser):
self.parser = parser
self.ast = parser.ast()
self.GLOBAL_SCOPE = {}
def print_ast(self,):
print(json.dumps( json.loads(str(self.ast)), indent = 4))
def error(self, msg=''):
raise Exception(f'Interpeter Error! {msg}')
def compute(self):
self.visit(self.ast)
return self.GLOBAL_SCOPE
def visit(self, node):
if isinstance(node, Program):
block = node.block
self.visit_Declaration(block.declaration)
self.visit_CompoundStatement(block.compound_statement)
def visit_Declaration(self, node):
pass
def visit_CompoundStatement(self, node):
for statement in node.children:
self.visit_statement(statement)
def visit_BinOp(self, node):
if node.type == PLUS:
return self.visit_node(node.left_node) + self.visit_node(node.right_node)
if node.type == MINUS:
return self.visit_node(node.left_node) - self.visit_node(node.right_node)
if node.type == MUL:
return self.visit_node(node.left_node) * self.visit_node(node.right_node)
if node.type == FLOAT_DIV:
return self.visit_node(node.left_node) / self.visit_node(node.right_node)
if node.type == INTEGER_DIV:
return self.visit_node(node.left_node) // self.visit_node(node.right_node)
def visit_UniOp(self, node):
if node.type == PLUS:
return self.visit_node(node.child_node)
if node.type == MINUS:
return - self.visit_node(node.child_node)
def visit_statement(self, node):
if node.type == ASSIGN:
var = node.left_node
expr_result = self.visit_node(node.right_node)
self.GLOBAL_SCOPE[var.id.lower()] = expr_result
elif isinstance(node, CompoundStatement):
self.visit_CompoundStatement(node)
def visit_Variable(self, node):
var = node.id.lower()
if var in self.GLOBAL_SCOPE:
return self.GLOBAL_SCOPE[var]
else:
self.error(f"variable '{node.id}' not defined!")
def visit_Leaf(self, node):
if node.type in [REAL, INTEGER]:
return node.value
def visit_node(self, node):
if isinstance(node, Leaf):
return self.visit_Leaf(node)
elif isinstance(node, Variable):
return self.visit_Variable(node)
elif isinstance(node, BinOp):
return self.visit_BinOp(node)
elif isinstance(node, UniOp):
return self.visit_UniOp(node)
elif isinstance(node, CompoundStatement):
self.visit_CompoundStatement(node)
test_cases = []
test_cases.append("""
PROGRAM Part10;
VAR
number : INTEGER;
a, b, c, x : INTEGER;
y : REAL;
BEGIN {Part10}
BEGIN
number := 2;
a := number;
b := 10 * a + 10 * number DIV 4;
c := a - - b
END;
x := 11;
y := 20 / 7 + 3.14;
{ writeln('a = ', a); }
{ writeln('b = ', b); }
{ writeln('c = ', c); }
{ writeln('number = ', number); }
{ writeln('x = ', x); }
{ writeln('y = ', y); }
END.
""")
for text in test_cases:
try:
print('Pascal> ', text.replace('\n', '\n.......> '))
if text:
lexer = Lexer(text)
parser = Parser(lexer)
interpreter = Interpreter(parser)
# interpreter.print_ast()
print(interpreter.compute())
print()
except Exception as e:
print(e)
print()
raise(e)