diff options
author | Frederick Yin <fkfd@fkfd.me> | 2022-09-03 11:22:20 +0800 |
---|---|---|
committer | Frederick Yin <fkfd@fkfd.me> | 2022-09-03 11:22:20 +0800 |
commit | b1794edb3d76db7c1a86fc14a60bc95b833bc7c7 (patch) | |
tree | 582288c1618a74405d3c183410d7525b8d6aa31f | |
parent | bed429a1d17b43678a70bc286aac12a0bd6e387d (diff) |
hackc: more statements; UnexpectedToken
-rw-r--r-- | projects/hackc/classes.py | 62 | ||||
-rw-r--r-- | projects/hackc/expressions.py | 96 | ||||
-rw-r--r-- | projects/hackc/parser.py | 28 | ||||
-rw-r--r-- | projects/hackc/statements.py | 156 | ||||
-rw-r--r-- | projects/hackc/utils.py | 47 |
5 files changed, 296 insertions, 93 deletions
diff --git a/projects/hackc/classes.py b/projects/hackc/classes.py index bd83cc4..f1358f6 100644 --- a/projects/hackc/classes.py +++ b/projects/hackc/classes.py @@ -30,21 +30,16 @@ class Class: if tokens_total < 4: return None if tokens[0] != "class": - raise JackSyntaxError( - f"Expected `class`, got `{tokens[0]}` instead", tokens[0] - ) + raise UnexpectedToken("class", tokens[0]) name = tokens[1] if name.type != "identifier": raise JackSyntaxError(f"You cannot name a class `{name}`", name) if tokens[2] != LEFT_BRACE: - raise JackSyntaxError( - f"Expected `{LEFT_BRACE}`, got `{tokens[2]}` instead", tokens[2] - ) + raise UnexpectedToken(LEFT_BRACE, tokens[2]) t = 3 - variables = [] while t < tokens_total: variable, dt = Variable.from_tokens(tokens[t:], context="class") @@ -115,7 +110,7 @@ class Variable: type = tokens[1] if type not in VAR_TYPES and type.type != "identifier": - raise JackSyntaxError(f"Expected datatype, got `{tokens[1]}` instead", type) + raise UnexpectedToken("datatype", type) t = 2 names = [] # names of variables @@ -129,26 +124,20 @@ class Variable: names.append(token) expecting_identifier = False else: - raise JackSyntaxError(f"Expected `,`, got `{token}` instead", token) + raise UnexpectedToken(",", token) elif token == ",": if not expecting_identifier: expecting_identifier = True else: - raise JackSyntaxError( - f"Expected variable name, got `,` instead", token - ) + raise UnexpectedToken("variable name", token) elif token == ";": if expecting_identifier: - raise JackSyntaxError( - f"Expected variable name, got `;` instead", token - ) + raise UnexpectedToken("variable name", token) found_semicolon = True break else: expected = "variable name" if expecting_identifier else "`,` or `;`" - raise JackSyntaxError( - f"Expected {expected}, got `{token}` instead", token - ) + raise UnexpectedToken(expected, token) if not found_semicolon: # TODO: print caret at end of token @@ -186,16 +175,14 @@ class ParamList: params = [] for type, name, delim in zip(tokens[0::3], tokens[1::3], tokens[2::3]): if type not in VAR_TYPES and type.type != "identifier": - raise JackSyntaxError(f"Expected datatype, got `{type}` instead", type) + raise UnexpectedToken("datatype", type) if not name: # TODO: print caret at end of type raise JackSyntaxError("Expected variable name", type) if name.type != "identifier": - raise JackSyntaxError( - f"Expected variable name, got `{name}` instead", name - ) + raise UnexpectedToken("variable name", name) if not delim: - raise JackSyntaxError(f"Expected `,` or `{RIGHT_PAREN}`", name) + raise UnexpectedToken(f"`,` or `{RIGHT_PAREN}`", name) if delim == ",": t += 3 params.append((type, name)) @@ -205,9 +192,7 @@ class ParamList: params.append((type, name)) break else: - raise JackSyntaxError( - f"Expected `,` or `{RIGHT_PAREN}`, got `{delim}` instead", delim - ) + raise UnexpectedToken(f"`,` or `{RIGHT_PAREN}`", delim) return (ParamList(params), t) @@ -251,20 +236,14 @@ class Subroutine: return_type = tokens[1] if return_type not in RETURN_TYPES and return_type.type != "identifier": - raise JackSyntaxError( - f"Expected datatype, got `{return_type}` instead", return_type - ) + raise UnexpectedToken("datatype", return_type) name = tokens[2] if name.type != "identifier": - raise JackSyntaxError( - f"Expected {category} name, got `{name}` instead", name - ) + raise UnexpectedToken(f"{category} name", name) if tokens[3] != LEFT_PAREN: - raise JackSyntaxError( - f"Expected `{LEFT_PAREN}`, got `{tokens[3]}` instead", tokens[3] - ) + raise UnexpectedToken(LEFT_PAREN, tokens[3]) t = 4 params, dt = ParamList.from_tokens(tokens[t:]) @@ -276,9 +255,7 @@ class Subroutine: # TODO: catch IndexError body_open = tokens[t] if body_open != LEFT_BRACE: - raise JackSyntaxError( - f"Expected `{LEFT_BRACE}`, got `{body_open}` instead", body_open - ) + raise UnexpectedToken(LEFT_BRACE, body_open) t += 1 variables = [] @@ -299,12 +276,13 @@ class Subroutine: body_close = tokens[t] if body_close != RIGHT_BRACE: - raise JackSyntaxError( - f"Expected `{RIGHT_BRACE}`, got `{body_close}` instead", body_close - ) + raise UnexpectedToken(RIGHT_BRACE, body_close) t += 1 - return (Subroutine(category, return_type, name, params, variables, statements), t) + return ( + Subroutine(category, return_type, name, params, variables, statements), + t, + ) def print_verbose(self): print(f"Define {self.category} {self.type} {self.name}") diff --git a/projects/hackc/expressions.py b/projects/hackc/expressions.py index f4623da..2e08cef 100644 --- a/projects/hackc/expressions.py +++ b/projects/hackc/expressions.py @@ -24,9 +24,7 @@ class Term: """Format: '(' <expression> ')'""" expr, dt = Expression.from_tokens(tokens[1:]) if tokens[dt + 1] != RIGHT_PAREN: - raise JackSyntaxError( - f"Expected `{RIGHT_PAREN}`, got `{tokens[dt]}` instead", tokens[dt] - ) + raise UnaryTerm(RIGHT_PAREN, tokens[dt]) return (expr, dt + 2) if tokens[0].type == "identifier": if tokens[1] in [LEFT_PAREN, "."]: @@ -69,9 +67,7 @@ class SubscriptTerm: raise JackSyntaxError(f"Expected subscript", tokens[2]) t += dt if tokens[t] != RIGHT_BRACKET: - raise JackSyntaxError( - f"Expected `{RIGHT_BRACKET}`, got `{tokens[t]}` instead", tokens[t] - ) + raise UnexpectedToken(RIGHT_BRACKET, tokens[t]) return (SubscriptTerm(var, sub), t + 1) def __str__(self): @@ -95,16 +91,6 @@ class UnaryTerm: return f"({self.op}{self.term})" -class SubroutineCall: - def __init__(self, name: Token, exprs: list): - self.name = name - self.exprs = exprs - - @classmethod - def from_tokens(cls, tokens: list) -> tuple: - pass - - class Expression: def __init__(self, lhs: Term, op=None, rhs=None): self.lhs = lhs @@ -119,6 +105,8 @@ class Expression: <term> (<op> <term>)? """ lhs, dt = Term.from_tokens(tokens) + if lhs is None: + return (None, 0) t = dt op = tokens[t] @@ -128,7 +116,7 @@ class Expression: t += 1 rhs, dt = Term.from_tokens(tokens[t:]) if rhs is None: - raise JSE(f"Expected other term, got `{rhs}` instead", rhs) + raise UnexpectedToken("other term", rhs) t += dt return (Expression(lhs, op, rhs), t) @@ -138,3 +126,77 @@ class Expression: return f"({self.lhs} {self.op} {self.rhs})" else: return str(self.lhs) + + +class ExpressionList: + def __init__(self, exprs: list[Expression]): + self.exprs = exprs + + @classmethod + def from_tokens(cls, tokens: list) -> tuple: + """Construct list of expressions. + + Format: + (<expression> (',' <expression>)*)? + """ + t = 0 + exprs = [] + while True: + expr, dt = Expression.from_tokens(tokens[t:]) + if expr is None: + if t == 0: + # only allow lack of expression right after paren + break + else: + # expect expression after comma + raise JackSyntaxError(f"Expected expression", tokens[t]) + t += dt + exprs.append(expr) + if tokens[t] != ",": + break + t += 1 + + return (ExpressionList(exprs), t) + + def __str__(self): + return ", ".join([str(expr) for expr in self.exprs]) + + +class SubroutineCall: + def __init__(self, jack_class: Token, name: Token, exprs: ExpressionList): + self.jack_class = jack_class + self.name = name + self.exprs = exprs + + @classmethod + def from_tokens(cls, tokens: list) -> tuple: + """Construct invocation of subroutine. + + Format: + (<class> '.')? <subroutine> '(' <expression list> ')' + """ + t = 0 + jack_class = None + if tokens[1] == ".": + jack_class = tokens[0] + t = 2 + + name = tokens[t] + if name.type != "identifier": + raise UnexpectedToken("subroutine name", name) + t += 1 + + if tokens[t] != LEFT_PAREN: + raise UnexpectedToken(LEFT_PAREN, tokens[t]) + t += 1 + + exprs, dt = ExpressionList.from_tokens(tokens[t:]) + t += dt + if tokens[t] != RIGHT_PAREN: + raise UnexpectedToken(RIGHT_PAREN, tokens[t]) + return (SubroutineCall(jack_class, name, exprs), t + 1) + + def __str__(self): + if self.jack_class is None: + return f"{self.name}({self.exprs})" + return f"{self.jack_class}.{self.name}({self.exprs})" diff --git a/projects/hackc/parser.py b/projects/hackc/parser.py index 188bddc..406c139 100644 --- a/projects/hackc/parser.py +++ b/projects/hackc/parser.py @@ -2,32 +2,6 @@ from .tokens import Token from .classes import Class from .utils import * -KEYWORDS = [ - "class", - "constructor", - "function", - "method", - "field", - "static", - "var", - "int", - "char", - "boolean", - "void", - "true", - "false", - "null", - "this", - "let", - "do", - "if", - "else", - "while", - "return", -] - -SYMBOLS = "{}()[].,;+-*/&|<>=~" - class Parser: def __init__(self, fp, extensions=[]): @@ -82,6 +56,8 @@ class Parser: break rem = line[pos:] # remainder of line + if not rem: + continue token = Token.from_line(rem, line_no, pos, extensions=self._extensions) if token is not None: self.tokens.append(token) diff --git a/projects/hackc/statements.py b/projects/hackc/statements.py index 16c123e..0e257ed 100644 --- a/projects/hackc/statements.py +++ b/projects/hackc/statements.py @@ -1,19 +1,42 @@ -from .expressions import Expression +from .expressions import Expression, SubroutineCall from .utils import * + class Statement: def __init__(self): pass @classmethod def from_tokens(cls, tokens: list) -> tuple: - for StatementClass in [LetStatement]: + for StatementClass in [LetStatement, DoStatement, ReturnStatement, IfStatement]: stmt, dt = StatementClass.from_tokens(tokens) if stmt is not None: return (stmt, dt) return (None, 0) + +class StatementList: + def __init__(self, statements: list[Statement]): + self.statements = statements + + @classmethod + def from_tokens(cls, tokens: list) -> tuple: + t = 0 + statements = [] + while True: + stmt, dt = Statement.from_tokens(tokens[t:]) + if stmt is None: + break + statements.append(stmt) + t += dt + return (StatementList(statements), t) + + def print_verbose(self): + for stmt in self.statements: + stmt.print_verbose() + + class LetStatement: def __init__(self, name, expr): self.name = name @@ -31,22 +54,139 @@ class LetStatement: name = tokens[1] if name.type != "identifier": - raise JackSyntaxError(f"Expected variable name, got `{name}` instead", name) + raise UnexpectedToken("variable name", name) if tokens[2] != "=": - raise JackSyntaxError(f"Expected `=`, got `{tokens[2]}` instead", tokens[2]) + raise UnexpectedToken("=", tokens[2]) t = 3 expr, dt = Expression.from_tokens(tokens[t:]) if expr is None: - raise JackSyntaxError(f"Expected expression", tokens[3]) + raise UnexpectedToken(f"Expected expression", tokens[3]) t += dt if tokens[t] != ";": - raise JackSyntaxError(f"Expected `;`, got `{tokens[t]}` instead", tokens[t]) + raise UnexpectedToken(";", tokens[t]) - t += 1 - return (LetStatement(name, expr), t) + return (LetStatement(name, expr), t + 1) def print_verbose(self): print(f"Let {self.name} be {self.expr}") + + +class DoStatement: + def __init__(self, subcall: SubroutineCall): + self.subcall = subcall + + @classmethod + def from_tokens(cls, tokens: list) -> tuple: + """Construct do statement. + + Format: + 'do' <subroutine call> ';' + """ + if len(tokens) < 5 or tokens[0] != "do": + return (None, 0) + + t = 1 + subcall, dt = SubroutineCall.from_tokens(tokens[1:]) + if subcall is None: + raise UnexpectedToken("subroutine call", tokens[1]) + t += dt + + if tokens[t] != ";": + raise UnexpectedToken(";", tokens[t]) + + return (DoStatement(subcall), t + 1) + + def print_verbose(self): + print(f"Do {self.subcall}") + + +class ReturnStatement: + def __init__(self, expr: Expression): + self.expr = expr + + @classmethod + def from_tokens(cls, tokens: list) -> tuple: + """Construct return statement. + + Format: + 'return' <expression>? ';' + """ + if len(tokens) < 3 or tokens[0] != "return": + return (None, 0) + + t = 1 + expr, dt = Expression.from_tokens(tokens[1:]) + t += dt + + if tokens[t] != ";": + raise UnexpectedToken(";", tokens[t]) + + return (ReturnStatement(expr), t + 1) + + def print_verbose(self): + print(f"Return {self.expr or 'null'}") + + +class IfStatement: + def __init__(self, condition: Expression, then: StatementList, else_then=None): + self.condition = condition + self.then = then + self.else_then = else_then + + @classmethod + def from_tokens(cls, tokens: list) -> tuple: + """Construct if statement. + + Format: + 'if' '(' <expression> ')' '{' <statement list> '}' ('else' '{' <statement list '}')? + """ + if len(tokens) < 6 or tokens[0] != "if": + return (None, 0) + + if tokens[1] != LEFT_PAREN: + raise UnexpectedToken(LEFT_PAREN, tokens[1]) + + t = 2 + cond, dt = Expression.from_tokens(tokens[2:]) + if cond is None: + raise JackSyntaxError(f"Expected condition", tokens[2]) + t += dt + + if tokens[t] != RIGHT_PAREN: + raise UnexpectedToken(RIGHT_PAREN, tokens[t]) + if tokens[t + 1] != LEFT_BRACE: + raise UnexpectedToken(LEFT_BRACE, tokens[t + 1]) + t += 2 + + then, dt = StatementList.from_tokens(tokens[t:]) + t += dt + + if tokens[t] != RIGHT_BRACE: + raise UnexpectedToken(RIGHT_BRACE, tokens[t]) + t += 1 + + if tokens[t] != "else": + return (IfStatement(cond, then), t) + + if tokens[t + 1] != LEFT_BRACE: + raise UnexpectedToken(LEFT_BRACE, tokens[t + 1]) + + t += 2 + + else_then, dt = StatementList.from_tokens(tokens[t:]) + t += dt + + if tokens[t] != RIGHT_BRACE: + raise UnexpectedToken(RIGHT_BRACE, tokens[t]) + + return (IfStatement(cond, then, else_then), t + 1) + + def print_verbose(self): + print(f"If {self.condition} then:") + self.then.print_verbose() + if self.else_then is not None: + print("Else then:") + self.else_then.print_verbose() diff --git a/projects/hackc/utils.py b/projects/hackc/utils.py index d375953..8756e46 100644 --- a/projects/hackc/utils.py +++ b/projects/hackc/utils.py @@ -1,5 +1,31 @@ from sys import stderr +KEYWORDS = [ + "class", + "constructor", + "function", + "method", + "field", + "static", + "var", + "int", + "char", + "boolean", + "void", + "true", + "false", + "null", + "this", + "let", + "do", + "if", + "else", + "while", + "return", +] + +SYMBOLS = "{}()[].,;+-*/&|<>=~" + EXIT_CODE_FILE_ERROR = 1 EXIT_CODE_INVALID_TOKEN = 2 EXIT_CODE_SYNTAX_ERROR = 4 @@ -13,6 +39,7 @@ RIGHT_BRACKET = "]" LEFT_PAREN = "(" RIGHT_PAREN = ")" + class JackSyntaxError(Exception): def __init__(self, msg, token): self.message = msg @@ -20,5 +47,25 @@ class JackSyntaxError(Exception): super().__init__(msg) +class UnexpectedToken(JackSyntaxError): + def __init__(self, expected, unexpected): + if str(expected) in KEYWORDS or str(expected) in SYMBOLS: + # wrap literal keyword/symbol in backticks + super().__init__( + f"Expected `{expected}`, got `{unexpected}` instead", unexpected + ) + else: + super().__init__( + f"Expected {expected}, got `{unexpected}` instead", unexpected + ) + + +class Unexpected(JackSyntaxError): + def __init__(self, expected, unexpected): + super().__init__( + f"Expected `{expected}`, got `{unexpected}` instead", unexpected + ) + + def print_err(msg): print(msg, file=stderr) |