summaryrefslogtreecommitdiff
path: root/projects/hackc
diff options
context:
space:
mode:
authorFrederick Yin <fkfd@fkfd.me>2022-08-30 14:46:23 +0800
committerFrederick Yin <fkfd@fkfd.me>2022-08-30 14:46:23 +0800
commitb439d663a3f3d4d275f07339c1c0e794808f67d9 (patch)
tree6ab4a6b152336271f4d38c961cee67922020192e /projects/hackc
parentd303447dc7a830489828be2e66ccf8c36af4aed6 (diff)
hackc: parse variable declaration
Also add a handful of overloading operators to Token
Diffstat (limited to 'projects/hackc')
-rw-r--r--projects/hackc/__main__.py2
-rw-r--r--projects/hackc/parser.py6
-rw-r--r--projects/hackc/syntax.py126
-rw-r--r--projects/hackc/tokens.py13
-rw-r--r--projects/hackc/utils.py10
5 files changed, 155 insertions, 2 deletions
diff --git a/projects/hackc/__main__.py b/projects/hackc/__main__.py
index 1cb5c3f..c50461a 100644
--- a/projects/hackc/__main__.py
+++ b/projects/hackc/__main__.py
@@ -18,7 +18,7 @@ def compile_jack(input_path: Path, extensions: list, verbose: bool):
for input_fn in jack_files:
parser = Parser(input_fn, extensions=extensions)
parser.tokenize()
- parser.print_tokens()
+ parser.parse()
if __name__ == "__main__":
diff --git a/projects/hackc/parser.py b/projects/hackc/parser.py
index f73c3d1..9a927c6 100644
--- a/projects/hackc/parser.py
+++ b/projects/hackc/parser.py
@@ -1,4 +1,5 @@
from .tokens import Token
+from .syntax import Class
from .utils import *
KEYWORDS = [
@@ -37,7 +38,7 @@ class Parser:
def print_tokens(self):
print("LINE\tCOL\tTYPE\tTOKEN")
for token in self.tokens:
- print(f"{token.line_no + 1}\t{token.column + 1}\t{token.type[:3]}\t{token.token}")
+ print(f"{token.line_no + 1}\t{token.column + 1}\t{token.type[:3]}\t{token}")
print(f"===== {len(self.tokens)} tokens =====")
def tokenize(self):
@@ -91,3 +92,6 @@ class Parser:
print_err(line)
print_err(" " * pos + f"^ Invalid token")
exit(EXIT_CODE_INVALID_TOKEN)
+
+ def parse(self):
+ syntax_tree = Class.from_tokens(self.tokens)
diff --git a/projects/hackc/syntax.py b/projects/hackc/syntax.py
new file mode 100644
index 0000000..c9be157
--- /dev/null
+++ b/projects/hackc/syntax.py
@@ -0,0 +1,126 @@
+from .tokens import Token
+from .utils import *
+
+SCOPES = ["static", "field", "var"]
+PRIMITIVE_TYPES = ["int", "char", "boolean"]
+
+
+class Class:
+ def __init__(self, name: Token, variables: list, subroutines: list):
+ self.name = name
+ self.variables = variables
+ self.subroutines = subroutines
+
+ @classmethod
+ def from_tokens(cls, tokens: list):
+ """Construct a class from a list of tokens.
+
+ In standard Jack, one file is exactly one class.
+
+ Format:
+ class <name> {
+ <zero or more class variable declarations>
+ <zero or more subroutines>
+ }
+ """
+ if len(tokens) < 4:
+ return None
+ if tokens[0] != "class":
+ raise JackSyntaxError(
+ f"Expected `class`, got `{tokens[0]}` instead", tokens[0]
+ )
+ if tokens[1].type != "identifier":
+ raise JackSyntaxError(
+ f"You cannot name a class `{tokens[1]}`", tokens[1]
+ )
+
+ name = tokens[1]
+
+ if tokens[2] != LEFT_BRACE:
+ raise JackSyntaxError(
+ f"Expected `{LEFT_BRACE}`, got `{tokens[2]}` instead", tokens[2]
+ )
+
+ variables = Variable.from_tokens(tokens[3:])
+ variables.print_verbose()
+ return Class(name, variables, [])
+
+
+class Variable:
+ def __init__(self, scope: Token, type: Token, names: list[Token]):
+ self.scope = scope
+ self.type = type
+ self.names = names
+
+ @classmethod
+ def from_tokens(cls, tokens: list):
+ """Construct variable declaration statement.
+
+ You can declare multiple variables of one scope and type on the same line.
+
+ Format:
+ <scope> <type> <one or more names, joined with a comma>;
+
+ <scope> = static | field | var
+ <type> = int | char | boolean | <class name>
+ """
+ if len(tokens) < 4 or tokens[0] not in SCOPES:
+ # not variable declaration
+ return None
+
+ scope = tokens[0]
+
+ if tokens[1] not in PRIMITIVE_TYPES and tokens[1].type != "identifier":
+ raise JackSyntaxError(
+ f"Expected datatype, got `{tokens[1]}` instead", tokens[1]
+ )
+
+ type = tokens[1]
+
+ names = [] # names of variables
+ expecting_identifier = True
+
+ for token in tokens[2:]:
+ if token.type == "identifier":
+ if expecting_identifier:
+ names.append(token)
+ expecting_identifier = False
+ else:
+ raise JackSyntaxError(
+ f"Expected `,`, got `{token}` instead", token
+ )
+ elif token == ",":
+ if not expecting_identifier:
+ expecting_identifier = True
+ else:
+ raise JackSyntaxError(
+ f"Expected identifier, got `,` instead", token
+ )
+ elif token == ";":
+ if expecting_identifier:
+ raise JackSyntaxError(
+ f"Expected identifier, got `;` instead", token
+ )
+ break
+
+ return Variable(scope, type, names)
+
+ def print_verbose(self):
+ print(f"Declare {len(self.names)} variables:")
+ for name in self.names:
+ print(self.scope, self.type, name)
+
+
+class Subroutine:
+ def __init__(self):
+ pass
+
+
+class Statement:
+ def __init__(self):
+ super().__init__()
+
+
+class IfStatement(Statement):
+ def __init__(self):
+ super().__init__()
diff --git a/projects/hackc/tokens.py b/projects/hackc/tokens.py
index 50c4173..1ed94ae 100644
--- a/projects/hackc/tokens.py
+++ b/projects/hackc/tokens.py
@@ -26,6 +26,7 @@ KEYWORDS = [
SYMBOLS = "{}()[].,;+-*/&|<>=~"
TOKEN_TYPES = ["keyword", "symbol", "integer", "string", "identifier"]
+
class Token:
def __init__(self, type: str, token: str, line_no: int, column: int):
"""A token in JACK."""
@@ -34,6 +35,18 @@ class Token:
self.line_no = line_no
self.column = column
+ def __len__(self) -> int:
+ return self.length()
+
+ def __eq__(self, other) -> bool:
+ if type(other) == str:
+ return self.token == other
+ if type(other) == Token:
+ return self.token == other.token
+
+ def __str__(self) -> str:
+ return self.token
+
@classmethod
def from_line(cls, line: str, line_no: int, column: int, extensions=[]):
"""Extract first token from line and return it as an instance of Token."""
diff --git a/projects/hackc/utils.py b/projects/hackc/utils.py
index d1ea3ca..4861088 100644
--- a/projects/hackc/utils.py
+++ b/projects/hackc/utils.py
@@ -4,6 +4,16 @@ EXIT_CODE_FILE_ERROR = 1
EXIT_CODE_INVALID_TOKEN = 2
EXIT_CODE_SYNTAX_ERROR = 4
+# vim autoindent misbehaves if I type these verbatim in strings
+LEFT_BRACE = "{"
+RIGHT_BRACE = "}"
+
+class JackSyntaxError(Exception):
+ def __init__(self, msg, token):
+ self.message = msg
+ self.token = token
+ super().__init__(msg)
+
def print_err(msg):
print(msg, file=stderr)