diff options
Diffstat (limited to 'projects')
-rw-r--r-- | projects/hackc/classes.py | 245 | ||||
-rw-r--r-- | projects/hackc/parser.py | 2 | ||||
-rw-r--r-- | projects/hackc/syntax.py | 146 | ||||
-rw-r--r-- | projects/hackc/utils.py | 2 |
4 files changed, 248 insertions, 147 deletions
diff --git a/projects/hackc/classes.py b/projects/hackc/classes.py new file mode 100644 index 0000000..1875ce7 --- /dev/null +++ b/projects/hackc/classes.py @@ -0,0 +1,245 @@ +from .tokens import Token +from .utils import * + +SCOPES = ["static", "field", "var"] +VAR_TYPES = ["int", "char", "boolean"] +RETURN_TYPES = ["int", "char", "boolean", "void"] +SUBROUTINE_CATS = ["constructor", "method", "function"] + + +class Class: + def __init__(self, name: Token, variables: list, subroutines: list): + self.name = name + self.variables = variables + self.subroutines = subroutines + + @classmethod + def from_tokens(cls, tokens: list): + """Construct a class from a list of tokens. + + In standard Jack, one file is exactly one class. + + Format: + 'class' <name> '{' + <variable>* + <subroutine>* + '}' + """ + tokens_total = len(tokens) + if tokens_total < 4: + return None + if tokens[0] != "class": + raise JackSyntaxError( + f"Expected `class`, got `{tokens[0]}` instead", tokens[0] + ) + + name = tokens[1] + if name.type != "identifier": + raise JackSyntaxError(f"You cannot name a class `{name}`", name) + + if tokens[2] != LEFT_BRACE: + raise JackSyntaxError( + f"Expected `{LEFT_BRACE}`, got `{tokens[2]}` instead", tokens[2] + ) + + tokens_consumed = 3 + + while tokens_consumed < tokens_total: + variables, token_cnt = Variable.from_tokens( + tokens[tokens_consumed:], context="class" + ) + if variables is None: + break + variables.print_verbose() + tokens_consumed += token_cnt + + while tokens_consumed < tokens_total: + subroutine, token_cnt = Subroutine.from_tokens(tokens[tokens_consumed:]) + if subroutine is None: + break + subroutine.print_verbose() + tokens_consumed += token_cnt + + return Class(name, variables, []) + + +class Variable: + def __init__(self, scope: Token, type: Token, names: list[Token]): + self.scope = scope + self.type = type + self.names = names + + @classmethod + def from_tokens(cls, tokens: list, context: str) -> tuple: + """Construct variable declaration statement from a list of tokens. + Return a tuple of an instance of Variable and number of tokens consumed. + When `tokens` does not begin with a variable declaration, return (None, 0). + + context -- "class" (<scope> = static | field) or "subroutine" (<scope> = var) + + Format: + <scope> <type> <name> (, <name>)* ; + + <scope> = static | field | var + <type> = int | char | boolean | <class> + """ + if len(tokens) < 4 or tokens[0] not in SCOPES: + # not variable declaration + return (None, 0) + + scope = tokens[0] + if scope in ["static", "field"] and context != "class": + raise JackSyntaxError( + f"You cannot declare a {scope} variable in a subroutine", scope + ) + if scope == "var" and context != "subroutine": + raise JackSyntaxError( + f"You cannot declare a local variable outside of a subroutine", + scope, + ) + + type = tokens[1] + if type not in VAR_TYPES and type.type != "identifier": + raise JackSyntaxError(f"Expected datatype, got `{tokens[1]}` instead", type) + + tokens_consumed = 2 + names = [] # names of variables + expecting_identifier = True + found_semicolon = False + + for token in tokens[2:]: + tokens_consumed += 1 + if token.type == "identifier": + if expecting_identifier: + names.append(token) + expecting_identifier = False + else: + raise JackSyntaxError(f"Expected `,`, got `{token}` instead", token) + elif token == ",": + if not expecting_identifier: + expecting_identifier = True + else: + raise JackSyntaxError( + f"Expected variable name, got `,` instead", token + ) + elif token == ";": + if expecting_identifier: + raise JackSyntaxError( + f"Expected variable name, got `;` instead", token + ) + found_semicolon = True + break + else: + expected = "variable name" if expecting_identifier else "`,` or `;`" + raise JackSyntaxError( + f"Expected {expected}, got `{token}` instead", token + ) + + if not found_semicolon: + # TODO: print caret at end of token + raise JackSyntaxError(f"Missing semicolon", token) + + return (Variable(scope, type, names), tokens_consumed) + + def print_verbose(self): + print(f"Declare {len(self.names)} variable(s):") + for name in self.names: + print(self.scope, self.type, name) + + +class ParamList: + def __init__(self, params: list[tuple]): + self.params = params + + @classmethod + def from_tokens(cls, tokens: list) -> tuple: + """Construct parameter list of subroutine from tokens. + + Format: + '(' (<type> <name> (, <type> <name>)*)? ')' + + <type> = int | char | boolean | <class> + """ + if len(tokens) < 2 or tokens[0] != LEFT_PAREN: + return (None, 0) + + if tokens[1] == RIGHT_PAREN: + # empty param list, i.e. '(' ')' + return (ParamList([]), 2) + + tokens_consumed = 1 + params = [] + for type, name, delim in zip(tokens[1::3], tokens[2::3], tokens[3::3]): + tokens_consumed += 3 + if type not in VAR_TYPES and type.type != "identifier": + raise JackSyntaxError(f"Expected datatype, got `{type}` instead", type) + if not name: + # TODO: print caret at end of type + raise JackSyntaxError("Expected variable name", type) + if name.type != "identifier": + raise JackSyntaxError( + f"Expected variable name, got `{name}` instead", name + ) + if not delim: + raise JackSyntaxError(f"Expected `,` or `{RIGHT_PAREN}`", name) + if delim == ",": + params.append((type, name)) + continue + elif delim == RIGHT_PAREN: + params.append((type, name)) + break + else: + raise JackSyntaxError( + f"Expected `,` or `{RIGHT_PAREN}`, got `{delim}` instead", delim + ) + + return (ParamList(params), tokens_consumed) + + +class Subroutine: + def __init__( + self, category: Token, type: Token, name: Token, params: ParamList, body + ): + self.category = category + self.type = type + self.name = name + self.params = params + self.body = body + + @classmethod + def from_tokens(cls, tokens: list) -> tuple: + """Construct subroutine from tokens. + + Format: + <category> <return type> <name> <paramlist> <body> + + <category> = constructor | method | function + <return type> = int | char | boolean | void | <class> + """ + if len(tokens) < 7 or tokens[0] not in SUBROUTINE_CATS: + # not a subroutine + return (None, 0) + category = tokens[0] + + return_type = tokens[1] + if return_type not in RETURN_TYPES and return_type.type != "identifier": + raise JackSyntaxError( + f"Expected datatype, got `{return_type}` instead", return_type + ) + + name = tokens[2] + if name.type != "identifier": + raise JackSyntaxError( + f"Expected {category} name, got `{name}` instead", name + ) + + tokens_consumed = 3 + params, token_cnt = ParamList.from_tokens(tokens[tokens_consumed:]) + if params is None: + raise JackSyntaxError("Expected parameter list", tokens[tokens_consumed]) + tokens_consumed += token_cnt + + return (Subroutine(category, return_type, name, params, None), tokens_consumed) + + def print_verbose(self): + print(f"Define {self.category} {self.name}, returns {self.type}") diff --git a/projects/hackc/parser.py b/projects/hackc/parser.py index 2c34d1b..2fc30ee 100644 --- a/projects/hackc/parser.py +++ b/projects/hackc/parser.py @@ -1,5 +1,5 @@ from .tokens import Token -from .syntax import Class +from .classes import Class from .utils import * KEYWORDS = [ diff --git a/projects/hackc/syntax.py b/projects/hackc/syntax.py deleted file mode 100644 index 281bd17..0000000 --- a/projects/hackc/syntax.py +++ /dev/null @@ -1,146 +0,0 @@ -from .tokens import Token -from .utils import * - -SCOPES = ["static", "field", "var"] -PRIMITIVE_TYPES = ["int", "char", "boolean"] - - -class Class: - def __init__(self, name: Token, variables: list, subroutines: list): - self.name = name - self.variables = variables - self.subroutines = subroutines - - @classmethod - def from_tokens(cls, tokens: list): - """Construct a class from a list of tokens. - - In standard Jack, one file is exactly one class. - - Format: - class <name> { - <zero or more class variable declarations> - <zero or more subroutines> - } - """ - if len(tokens) < 4: - return None - if tokens[0] != "class": - raise JackSyntaxError( - f"Expected `class`, got `{tokens[0]}` instead", tokens[0] - ) - - name = tokens[1] - if name.type != "identifier": - raise JackSyntaxError(f"You cannot name a class `{name}`", name) - - if tokens[2] != LEFT_BRACE: - raise JackSyntaxError( - f"Expected `{LEFT_BRACE}`, got `{tokens[2]}` instead", tokens[2] - ) - - tokens_consumed = 3 - - while True: - variables, token_cnt = Variable.from_tokens( - tokens[tokens_consumed:], context="class" - ) - if variables is None: - break - variables.print_verbose() - tokens_consumed += token_cnt - - return Class(name, variables, []) - - -class Variable: - def __init__(self, scope: Token, type: Token, names: list[Token]): - self.scope = scope - self.type = type - self.names = names - - @classmethod - def from_tokens(cls, tokens: list, context: str) -> tuple: - """Construct variable declaration statement from a list of tokens. - Return a tuple of an instance of Variable and number of tokens consumed. - When `tokens` does not begin with a variable declaration, return (None, 0). - - context -- "class" (<scope> = static | field) or "subroutine" (<scope> = var) - - Format: - <scope> <type> <one or more names, joined with a comma>; - - <scope> = static | field | var - <type> = int | char | boolean | <class name> - """ - if len(tokens) < 4 or tokens[0] not in SCOPES: - # not variable declaration - return (None, 0) - - scope = tokens[0] - if scope in ["static", "field"] and context != "class": - raise JackSyntaxError( - f"You cannot declare a {scope} variable in a subroutine", scope - ) - if scope == "var" and context != "subroutine": - raise JackSyntaxError( - f"You cannot declare a local variable outside of a subroutine", - scope, - ) - - type = tokens[1] - if type not in PRIMITIVE_TYPES and type.type != "identifier": - raise JackSyntaxError(f"Expected datatype, got `{tokens[1]}` instead", type) - - tokens_consumed = 2 - names = [] # names of variables - expecting_identifier = True - - for token in tokens[2:]: - tokens_consumed += 1 - if token.type == "identifier": - if expecting_identifier: - names.append(token) - expecting_identifier = False - else: - raise JackSyntaxError(f"Expected `,`, got `{token}` instead", token) - elif token == ",": - if not expecting_identifier: - expecting_identifier = True - else: - raise JackSyntaxError( - f"Expected variable name, got `,` instead", token - ) - elif token == ";": - if expecting_identifier: - raise JackSyntaxError( - f"Expected variable name, got `;` instead", token - ) - break - else: - expected = "variable name" if expecting_identifier else "`,` or `;`" - raise JackSyntaxError( - f"Expected {expected}, got `{token}` instead", token - ) - - return (Variable(scope, type, names), tokens_consumed) - - def print_verbose(self): - print(f"Declare {len(self.names)} variable(s):") - for name in self.names: - print(self.scope, self.type, name) - - -class Subroutine: - def __init__(self): - pass - - -class Statement: - def __init__(self): - super().__init__() - - -class IfStatement(Statement): - def __init__(self): - super().__init__() diff --git a/projects/hackc/utils.py b/projects/hackc/utils.py index 4861088..4b9c9f0 100644 --- a/projects/hackc/utils.py +++ b/projects/hackc/utils.py @@ -7,6 +7,8 @@ EXIT_CODE_SYNTAX_ERROR = 4 # vim autoindent misbehaves if I type these verbatim in strings LEFT_BRACE = "{" RIGHT_BRACE = "}" +LEFT_PAREN = "(" +RIGHT_PAREN = ")" class JackSyntaxError(Exception): def __init__(self, msg, token): |