From b439d663a3f3d4d275f07339c1c0e794808f67d9 Mon Sep 17 00:00:00 2001 From: Frederick Yin Date: Tue, 30 Aug 2022 14:46:23 +0800 Subject: hackc: parse variable declaration Also add a handful of overloading operators to Token --- projects/hackc/__main__.py | 2 +- projects/hackc/parser.py | 6 ++- projects/hackc/syntax.py | 126 +++++++++++++++++++++++++++++++++++++++++++++ projects/hackc/tokens.py | 13 +++++ projects/hackc/utils.py | 10 ++++ 5 files changed, 155 insertions(+), 2 deletions(-) create mode 100644 projects/hackc/syntax.py (limited to 'projects') diff --git a/projects/hackc/__main__.py b/projects/hackc/__main__.py index 1cb5c3f..c50461a 100644 --- a/projects/hackc/__main__.py +++ b/projects/hackc/__main__.py @@ -18,7 +18,7 @@ def compile_jack(input_path: Path, extensions: list, verbose: bool): for input_fn in jack_files: parser = Parser(input_fn, extensions=extensions) parser.tokenize() - parser.print_tokens() + parser.parse() if __name__ == "__main__": diff --git a/projects/hackc/parser.py b/projects/hackc/parser.py index f73c3d1..9a927c6 100644 --- a/projects/hackc/parser.py +++ b/projects/hackc/parser.py @@ -1,4 +1,5 @@ from .tokens import Token +from .syntax import Class from .utils import * KEYWORDS = [ @@ -37,7 +38,7 @@ class Parser: def print_tokens(self): print("LINE\tCOL\tTYPE\tTOKEN") for token in self.tokens: - print(f"{token.line_no + 1}\t{token.column + 1}\t{token.type[:3]}\t{token.token}") + print(f"{token.line_no + 1}\t{token.column + 1}\t{token.type[:3]}\t{token}") print(f"===== {len(self.tokens)} tokens =====") def tokenize(self): @@ -91,3 +92,6 @@ class Parser: print_err(line) print_err(" " * pos + f"^ Invalid token") exit(EXIT_CODE_INVALID_TOKEN) + + def parse(self): + syntax_tree = Class.from_tokens(self.tokens) diff --git a/projects/hackc/syntax.py b/projects/hackc/syntax.py new file mode 100644 index 0000000..c9be157 --- /dev/null +++ b/projects/hackc/syntax.py @@ -0,0 +1,126 @@ +from .tokens import Token +from .utils import * + +SCOPES = ["static", "field", "var"] +PRIMITIVE_TYPES = ["int", "char", "boolean"] + + +class Class: + def __init__(self, name: Token, variables: list, subroutines: list): + self.name = name + self.variables = variables + self.subroutines = subroutines + + @classmethod + def from_tokens(cls, tokens: list): + """Construct a class from a list of tokens. + + In standard Jack, one file is exactly one class. + + Format: + class { + + + } + """ + if len(tokens) < 4: + return None + if tokens[0] != "class": + raise JackSyntaxError( + f"Expected `class`, got `{tokens[0]}` instead", tokens[0] + ) + if tokens[1].type != "identifier": + raise JackSyntaxError( + f"You cannot name a class `{tokens[1]}`", tokens[1] + ) + + name = tokens[1] + + if tokens[2] != LEFT_BRACE: + raise JackSyntaxError( + f"Expected `{LEFT_BRACE}`, got `{tokens[2]}` instead", tokens[2] + ) + + variables = Variable.from_tokens(tokens[3:]) + variables.print_verbose() + return Class(name, variables, []) + + +class Variable: + def __init__(self, scope: Token, type: Token, names: list[Token]): + self.scope = scope + self.type = type + self.names = names + + @classmethod + def from_tokens(cls, tokens: list): + """Construct variable declaration statement. + + You can declare multiple variables of one scope and type on the same line. + + Format: + ; + + = static | field | var + = int | char | boolean | + """ + if len(tokens) < 4 or tokens[0] not in SCOPES: + # not variable declaration + return None + + scope = tokens[0] + + if tokens[1] not in PRIMITIVE_TYPES and tokens[1].type != "identifier": + raise JackSyntaxError( + f"Expected datatype, got `{tokens[1]}` instead", tokens[1] + ) + + type = tokens[1] + + names = [] # names of variables + expecting_identifier = True + + for token in tokens[2:]: + if token.type == "identifier": + if expecting_identifier: + names.append(token) + expecting_identifier = False + else: + raise JackSyntaxError( + f"Expected `,`, got `{token}` instead", token + ) + elif token == ",": + if not expecting_identifier: + expecting_identifier = True + else: + raise JackSyntaxError( + f"Expected identifier, got `,` instead", token + ) + elif token == ";": + if expecting_identifier: + raise JackSyntaxError( + f"Expected identifier, got `;` instead", token + ) + break + + return Variable(scope, type, names) + + def print_verbose(self): + print(f"Declare {len(self.names)} variables:") + for name in self.names: + print(self.scope, self.type, name) + + +class Subroutine: + def __init__(self): + pass + + +class Statement: + def __init__(self): + super().__init__() + + +class IfStatement(Statement): + def __init__(self): + super().__init__() diff --git a/projects/hackc/tokens.py b/projects/hackc/tokens.py index 50c4173..1ed94ae 100644 --- a/projects/hackc/tokens.py +++ b/projects/hackc/tokens.py @@ -26,6 +26,7 @@ KEYWORDS = [ SYMBOLS = "{}()[].,;+-*/&|<>=~" TOKEN_TYPES = ["keyword", "symbol", "integer", "string", "identifier"] + class Token: def __init__(self, type: str, token: str, line_no: int, column: int): """A token in JACK.""" @@ -34,6 +35,18 @@ class Token: self.line_no = line_no self.column = column + def __len__(self) -> int: + return self.length() + + def __eq__(self, other) -> bool: + if type(other) == str: + return self.token == other + if type(other) == Token: + return self.token == other.token + + def __str__(self) -> str: + return self.token + @classmethod def from_line(cls, line: str, line_no: int, column: int, extensions=[]): """Extract first token from line and return it as an instance of Token.""" diff --git a/projects/hackc/utils.py b/projects/hackc/utils.py index d1ea3ca..4861088 100644 --- a/projects/hackc/utils.py +++ b/projects/hackc/utils.py @@ -4,6 +4,16 @@ EXIT_CODE_FILE_ERROR = 1 EXIT_CODE_INVALID_TOKEN = 2 EXIT_CODE_SYNTAX_ERROR = 4 +# vim autoindent misbehaves if I type these verbatim in strings +LEFT_BRACE = "{" +RIGHT_BRACE = "}" + +class JackSyntaxError(Exception): + def __init__(self, msg, token): + self.message = msg + self.token = token + super().__init__(msg) + def print_err(msg): print(msg, file=stderr) -- cgit v1.2.3