From b1794edb3d76db7c1a86fc14a60bc95b833bc7c7 Mon Sep 17 00:00:00 2001 From: Frederick Yin Date: Sat, 3 Sep 2022 11:22:20 +0800 Subject: hackc: more statements; UnexpectedToken --- projects/hackc/classes.py | 62 ++++++----------- projects/hackc/expressions.py | 96 +++++++++++++++++++++----- projects/hackc/parser.py | 28 +------- projects/hackc/statements.py | 156 +++++++++++++++++++++++++++++++++++++++--- projects/hackc/utils.py | 47 +++++++++++++ 5 files changed, 296 insertions(+), 93 deletions(-) diff --git a/projects/hackc/classes.py b/projects/hackc/classes.py index bd83cc4..f1358f6 100644 --- a/projects/hackc/classes.py +++ b/projects/hackc/classes.py @@ -30,21 +30,16 @@ class Class: if tokens_total < 4: return None if tokens[0] != "class": - raise JackSyntaxError( - f"Expected `class`, got `{tokens[0]}` instead", tokens[0] - ) + raise UnexpectedToken("class", tokens[0]) name = tokens[1] if name.type != "identifier": raise JackSyntaxError(f"You cannot name a class `{name}`", name) if tokens[2] != LEFT_BRACE: - raise JackSyntaxError( - f"Expected `{LEFT_BRACE}`, got `{tokens[2]}` instead", tokens[2] - ) + raise UnexpectedToken(LEFT_BRACE, tokens[2]) t = 3 - variables = [] while t < tokens_total: variable, dt = Variable.from_tokens(tokens[t:], context="class") @@ -115,7 +110,7 @@ class Variable: type = tokens[1] if type not in VAR_TYPES and type.type != "identifier": - raise JackSyntaxError(f"Expected datatype, got `{tokens[1]}` instead", type) + raise UnexpectedToken("datatype", type) t = 2 names = [] # names of variables @@ -129,26 +124,20 @@ class Variable: names.append(token) expecting_identifier = False else: - raise JackSyntaxError(f"Expected `,`, got `{token}` instead", token) + raise UnexpectedToken(",", token) elif token == ",": if not expecting_identifier: expecting_identifier = True else: - raise JackSyntaxError( - f"Expected variable name, got `,` instead", token - ) + raise UnexpectedToken("variable name", token) elif token == ";": if expecting_identifier: - raise JackSyntaxError( - f"Expected variable name, got `;` instead", token - ) + raise UnexpectedToken("variable name", token) found_semicolon = True break else: expected = "variable name" if expecting_identifier else "`,` or `;`" - raise JackSyntaxError( - f"Expected {expected}, got `{token}` instead", token - ) + raise UnexpectedToken(expected, token) if not found_semicolon: # TODO: print caret at end of token @@ -186,16 +175,14 @@ class ParamList: params = [] for type, name, delim in zip(tokens[0::3], tokens[1::3], tokens[2::3]): if type not in VAR_TYPES and type.type != "identifier": - raise JackSyntaxError(f"Expected datatype, got `{type}` instead", type) + raise UnexpectedToken("datatype", type) if not name: # TODO: print caret at end of type raise JackSyntaxError("Expected variable name", type) if name.type != "identifier": - raise JackSyntaxError( - f"Expected variable name, got `{name}` instead", name - ) + raise UnexpectedToken("variable name", name) if not delim: - raise JackSyntaxError(f"Expected `,` or `{RIGHT_PAREN}`", name) + raise UnexpectedToken(f"`,` or `{RIGHT_PAREN}`", name) if delim == ",": t += 3 params.append((type, name)) @@ -205,9 +192,7 @@ class ParamList: params.append((type, name)) break else: - raise JackSyntaxError( - f"Expected `,` or `{RIGHT_PAREN}`, got `{delim}` instead", delim - ) + raise UnexpectedToken(f"`,` or `{RIGHT_PAREN}`", delim) return (ParamList(params), t) @@ -251,20 +236,14 @@ class Subroutine: return_type = tokens[1] if return_type not in RETURN_TYPES and return_type.type != "identifier": - raise JackSyntaxError( - f"Expected datatype, got `{return_type}` instead", return_type - ) + raise UnexpectedToken("datatype", return_type) name = tokens[2] if name.type != "identifier": - raise JackSyntaxError( - f"Expected {category} name, got `{name}` instead", name - ) + raise UnexpectedToken(f"{category} name", name) if tokens[3] != LEFT_PAREN: - raise JackSyntaxError( - f"Expected `{LEFT_PAREN}`, got `{tokens[3]}` instead", tokens[3] - ) + raise UnexpectedToken(LEFT_PAREN, tokens[3]) t = 4 params, dt = ParamList.from_tokens(tokens[t:]) @@ -276,9 +255,7 @@ class Subroutine: # TODO: catch IndexError body_open = tokens[t] if body_open != LEFT_BRACE: - raise JackSyntaxError( - f"Expected `{LEFT_BRACE}`, got `{body_open}` instead", body_open - ) + raise UnexpectedToken(LEFT_BRACE, body_open) t += 1 variables = [] @@ -299,12 +276,13 @@ class Subroutine: body_close = tokens[t] if body_close != RIGHT_BRACE: - raise JackSyntaxError( - f"Expected `{RIGHT_BRACE}`, got `{body_close}` instead", body_close - ) + raise UnexpectedToken(RIGHT_BRACE, body_close) t += 1 - return (Subroutine(category, return_type, name, params, variables, statements), t) + return ( + Subroutine(category, return_type, name, params, variables, statements), + t, + ) def print_verbose(self): print(f"Define {self.category} {self.type} {self.name}") diff --git a/projects/hackc/expressions.py b/projects/hackc/expressions.py index f4623da..2e08cef 100644 --- a/projects/hackc/expressions.py +++ b/projects/hackc/expressions.py @@ -24,9 +24,7 @@ class Term: """Format: '(' ')'""" expr, dt = Expression.from_tokens(tokens[1:]) if tokens[dt + 1] != RIGHT_PAREN: - raise JackSyntaxError( - f"Expected `{RIGHT_PAREN}`, got `{tokens[dt]}` instead", tokens[dt] - ) + raise UnaryTerm(RIGHT_PAREN, tokens[dt]) return (expr, dt + 2) if tokens[0].type == "identifier": if tokens[1] in [LEFT_PAREN, "."]: @@ -69,9 +67,7 @@ class SubscriptTerm: raise JackSyntaxError(f"Expected subscript", tokens[2]) t += dt if tokens[t] != RIGHT_BRACKET: - raise JackSyntaxError( - f"Expected `{RIGHT_BRACKET}`, got `{tokens[t]}` instead", tokens[t] - ) + raise UnexpectedToken(RIGHT_BRACKET, tokens[t]) return (SubscriptTerm(var, sub), t + 1) def __str__(self): @@ -95,16 +91,6 @@ class UnaryTerm: return f"({self.op}{self.term})" -class SubroutineCall: - def __init__(self, name: Token, exprs: list): - self.name = name - self.exprs = exprs - - @classmethod - def from_tokens(cls, tokens: list) -> tuple: - pass - - class Expression: def __init__(self, lhs: Term, op=None, rhs=None): self.lhs = lhs @@ -119,6 +105,8 @@ class Expression: ( )? """ lhs, dt = Term.from_tokens(tokens) + if lhs is None: + return (None, 0) t = dt op = tokens[t] @@ -128,7 +116,7 @@ class Expression: t += 1 rhs, dt = Term.from_tokens(tokens[t:]) if rhs is None: - raise JSE(f"Expected other term, got `{rhs}` instead", rhs) + raise UnexpectedToken("other term", rhs) t += dt return (Expression(lhs, op, rhs), t) @@ -138,3 +126,77 @@ class Expression: return f"({self.lhs} {self.op} {self.rhs})" else: return str(self.lhs) + + +class ExpressionList: + def __init__(self, exprs: list[Expression]): + self.exprs = exprs + + @classmethod + def from_tokens(cls, tokens: list) -> tuple: + """Construct list of expressions. + + Format: + ( (',' )*)? + """ + t = 0 + exprs = [] + while True: + expr, dt = Expression.from_tokens(tokens[t:]) + if expr is None: + if t == 0: + # only allow lack of expression right after paren + break + else: + # expect expression after comma + raise JackSyntaxError(f"Expected expression", tokens[t]) + t += dt + exprs.append(expr) + if tokens[t] != ",": + break + t += 1 + + return (ExpressionList(exprs), t) + + def __str__(self): + return ", ".join([str(expr) for expr in self.exprs]) + + +class SubroutineCall: + def __init__(self, jack_class: Token, name: Token, exprs: ExpressionList): + self.jack_class = jack_class + self.name = name + self.exprs = exprs + + @classmethod + def from_tokens(cls, tokens: list) -> tuple: + """Construct invocation of subroutine. + + Format: + ( '.')? '(' ')' + """ + t = 0 + jack_class = None + if tokens[1] == ".": + jack_class = tokens[0] + t = 2 + + name = tokens[t] + if name.type != "identifier": + raise UnexpectedToken("subroutine name", name) + t += 1 + + if tokens[t] != LEFT_PAREN: + raise UnexpectedToken(LEFT_PAREN, tokens[t]) + t += 1 + + exprs, dt = ExpressionList.from_tokens(tokens[t:]) + t += dt + if tokens[t] != RIGHT_PAREN: + raise UnexpectedToken(RIGHT_PAREN, tokens[t]) + return (SubroutineCall(jack_class, name, exprs), t + 1) + + def __str__(self): + if self.jack_class is None: + return f"{self.name}({self.exprs})" + return f"{self.jack_class}.{self.name}({self.exprs})" diff --git a/projects/hackc/parser.py b/projects/hackc/parser.py index 188bddc..406c139 100644 --- a/projects/hackc/parser.py +++ b/projects/hackc/parser.py @@ -2,32 +2,6 @@ from .tokens import Token from .classes import Class from .utils import * -KEYWORDS = [ - "class", - "constructor", - "function", - "method", - "field", - "static", - "var", - "int", - "char", - "boolean", - "void", - "true", - "false", - "null", - "this", - "let", - "do", - "if", - "else", - "while", - "return", -] - -SYMBOLS = "{}()[].,;+-*/&|<>=~" - class Parser: def __init__(self, fp, extensions=[]): @@ -82,6 +56,8 @@ class Parser: break rem = line[pos:] # remainder of line + if not rem: + continue token = Token.from_line(rem, line_no, pos, extensions=self._extensions) if token is not None: self.tokens.append(token) diff --git a/projects/hackc/statements.py b/projects/hackc/statements.py index 16c123e..0e257ed 100644 --- a/projects/hackc/statements.py +++ b/projects/hackc/statements.py @@ -1,19 +1,42 @@ -from .expressions import Expression +from .expressions import Expression, SubroutineCall from .utils import * + class Statement: def __init__(self): pass @classmethod def from_tokens(cls, tokens: list) -> tuple: - for StatementClass in [LetStatement]: + for StatementClass in [LetStatement, DoStatement, ReturnStatement, IfStatement]: stmt, dt = StatementClass.from_tokens(tokens) if stmt is not None: return (stmt, dt) return (None, 0) + +class StatementList: + def __init__(self, statements: list[Statement]): + self.statements = statements + + @classmethod + def from_tokens(cls, tokens: list) -> tuple: + t = 0 + statements = [] + while True: + stmt, dt = Statement.from_tokens(tokens[t:]) + if stmt is None: + break + statements.append(stmt) + t += dt + return (StatementList(statements), t) + + def print_verbose(self): + for stmt in self.statements: + stmt.print_verbose() + + class LetStatement: def __init__(self, name, expr): self.name = name @@ -31,22 +54,139 @@ class LetStatement: name = tokens[1] if name.type != "identifier": - raise JackSyntaxError(f"Expected variable name, got `{name}` instead", name) + raise UnexpectedToken("variable name", name) if tokens[2] != "=": - raise JackSyntaxError(f"Expected `=`, got `{tokens[2]}` instead", tokens[2]) + raise UnexpectedToken("=", tokens[2]) t = 3 expr, dt = Expression.from_tokens(tokens[t:]) if expr is None: - raise JackSyntaxError(f"Expected expression", tokens[3]) + raise UnexpectedToken(f"Expected expression", tokens[3]) t += dt if tokens[t] != ";": - raise JackSyntaxError(f"Expected `;`, got `{tokens[t]}` instead", tokens[t]) + raise UnexpectedToken(";", tokens[t]) - t += 1 - return (LetStatement(name, expr), t) + return (LetStatement(name, expr), t + 1) def print_verbose(self): print(f"Let {self.name} be {self.expr}") + + +class DoStatement: + def __init__(self, subcall: SubroutineCall): + self.subcall = subcall + + @classmethod + def from_tokens(cls, tokens: list) -> tuple: + """Construct do statement. + + Format: + 'do' ';' + """ + if len(tokens) < 5 or tokens[0] != "do": + return (None, 0) + + t = 1 + subcall, dt = SubroutineCall.from_tokens(tokens[1:]) + if subcall is None: + raise UnexpectedToken("subroutine call", tokens[1]) + t += dt + + if tokens[t] != ";": + raise UnexpectedToken(";", tokens[t]) + + return (DoStatement(subcall), t + 1) + + def print_verbose(self): + print(f"Do {self.subcall}") + + +class ReturnStatement: + def __init__(self, expr: Expression): + self.expr = expr + + @classmethod + def from_tokens(cls, tokens: list) -> tuple: + """Construct return statement. + + Format: + 'return' ? ';' + """ + if len(tokens) < 3 or tokens[0] != "return": + return (None, 0) + + t = 1 + expr, dt = Expression.from_tokens(tokens[1:]) + t += dt + + if tokens[t] != ";": + raise UnexpectedToken(";", tokens[t]) + + return (ReturnStatement(expr), t + 1) + + def print_verbose(self): + print(f"Return {self.expr or 'null'}") + + +class IfStatement: + def __init__(self, condition: Expression, then: StatementList, else_then=None): + self.condition = condition + self.then = then + self.else_then = else_then + + @classmethod + def from_tokens(cls, tokens: list) -> tuple: + """Construct if statement. + + Format: + 'if' '(' ')' '{' '}' ('else' '{'