summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Yin <fkfd@fkfd.me>2022-08-30 19:55:41 +0800
committerFrederick Yin <fkfd@fkfd.me>2022-08-30 19:55:41 +0800
commit5b4f3e494c5d12e0b44c232ada41fe2e273b27c0 (patch)
tree9e7a6fbf2e57262ad4447dcacf3f34552fd9abcc
parentcb320b921c0574474430fe8d38aa5438a9f6ee98 (diff)
hackc: subroutine (partial)
-rw-r--r--projects/hackc/classes.py245
-rw-r--r--projects/hackc/parser.py2
-rw-r--r--projects/hackc/syntax.py146
-rw-r--r--projects/hackc/utils.py2
4 files changed, 248 insertions, 147 deletions
diff --git a/projects/hackc/classes.py b/projects/hackc/classes.py
new file mode 100644
index 0000000..1875ce7
--- /dev/null
+++ b/projects/hackc/classes.py
@@ -0,0 +1,245 @@
+from .tokens import Token
+from .utils import *
+
+SCOPES = ["static", "field", "var"]
+VAR_TYPES = ["int", "char", "boolean"]
+RETURN_TYPES = ["int", "char", "boolean", "void"]
+SUBROUTINE_CATS = ["constructor", "method", "function"]
+
+
+class Class:
+ def __init__(self, name: Token, variables: list, subroutines: list):
+ self.name = name
+ self.variables = variables
+ self.subroutines = subroutines
+
+ @classmethod
+ def from_tokens(cls, tokens: list):
+ """Construct a class from a list of tokens.
+
+ In standard Jack, one file is exactly one class.
+
+ Format:
+ 'class' <name> '{'
+ <variable>*
+ <subroutine>*
+ '}'
+ """
+ tokens_total = len(tokens)
+ if tokens_total < 4:
+ return None
+ if tokens[0] != "class":
+ raise JackSyntaxError(
+ f"Expected `class`, got `{tokens[0]}` instead", tokens[0]
+ )
+
+ name = tokens[1]
+ if name.type != "identifier":
+ raise JackSyntaxError(f"You cannot name a class `{name}`", name)
+
+ if tokens[2] != LEFT_BRACE:
+ raise JackSyntaxError(
+ f"Expected `{LEFT_BRACE}`, got `{tokens[2]}` instead", tokens[2]
+ )
+
+ tokens_consumed = 3
+
+ while tokens_consumed < tokens_total:
+ variables, token_cnt = Variable.from_tokens(
+ tokens[tokens_consumed:], context="class"
+ )
+ if variables is None:
+ break
+ variables.print_verbose()
+ tokens_consumed += token_cnt
+
+ while tokens_consumed < tokens_total:
+ subroutine, token_cnt = Subroutine.from_tokens(tokens[tokens_consumed:])
+ if subroutine is None:
+ break
+ subroutine.print_verbose()
+ tokens_consumed += token_cnt
+
+ return Class(name, variables, [])
+
+
+class Variable:
+ def __init__(self, scope: Token, type: Token, names: list[Token]):
+ self.scope = scope
+ self.type = type
+ self.names = names
+
+ @classmethod
+ def from_tokens(cls, tokens: list, context: str) -> tuple:
+ """Construct variable declaration statement from a list of tokens.
+ Return a tuple of an instance of Variable and number of tokens consumed.
+ When `tokens` does not begin with a variable declaration, return (None, 0).
+
+ context -- "class" (<scope> = static | field) or "subroutine" (<scope> = var)
+
+ Format:
+ <scope> <type> <name> (, <name>)* ;
+
+ <scope> = static | field | var
+ <type> = int | char | boolean | <class>
+ """
+ if len(tokens) < 4 or tokens[0] not in SCOPES:
+ # not variable declaration
+ return (None, 0)
+
+ scope = tokens[0]
+ if scope in ["static", "field"] and context != "class":
+ raise JackSyntaxError(
+ f"You cannot declare a {scope} variable in a subroutine", scope
+ )
+ if scope == "var" and context != "subroutine":
+ raise JackSyntaxError(
+ f"You cannot declare a local variable outside of a subroutine",
+ scope,
+ )
+
+ type = tokens[1]
+ if type not in VAR_TYPES and type.type != "identifier":
+ raise JackSyntaxError(f"Expected datatype, got `{tokens[1]}` instead", type)
+
+ tokens_consumed = 2
+ names = [] # names of variables
+ expecting_identifier = True
+ found_semicolon = False
+
+ for token in tokens[2:]:
+ tokens_consumed += 1
+ if token.type == "identifier":
+ if expecting_identifier:
+ names.append(token)
+ expecting_identifier = False
+ else:
+ raise JackSyntaxError(f"Expected `,`, got `{token}` instead", token)
+ elif token == ",":
+ if not expecting_identifier:
+ expecting_identifier = True
+ else:
+ raise JackSyntaxError(
+ f"Expected variable name, got `,` instead", token
+ )
+ elif token == ";":
+ if expecting_identifier:
+ raise JackSyntaxError(
+ f"Expected variable name, got `;` instead", token
+ )
+ found_semicolon = True
+ break
+ else:
+ expected = "variable name" if expecting_identifier else "`,` or `;`"
+ raise JackSyntaxError(
+ f"Expected {expected}, got `{token}` instead", token
+ )
+
+ if not found_semicolon:
+ # TODO: print caret at end of token
+ raise JackSyntaxError(f"Missing semicolon", token)
+
+ return (Variable(scope, type, names), tokens_consumed)
+
+ def print_verbose(self):
+ print(f"Declare {len(self.names)} variable(s):")
+ for name in self.names:
+ print(self.scope, self.type, name)
+
+
+class ParamList:
+ def __init__(self, params: list[tuple]):
+ self.params = params
+
+ @classmethod
+ def from_tokens(cls, tokens: list) -> tuple:
+ """Construct parameter list of subroutine from tokens.
+
+ Format:
+ '(' (<type> <name> (, <type> <name>)*)? ')'
+
+ <type> = int | char | boolean | <class>
+ """
+ if len(tokens) < 2 or tokens[0] != LEFT_PAREN:
+ return (None, 0)
+
+ if tokens[1] == RIGHT_PAREN:
+ # empty param list, i.e. '(' ')'
+ return (ParamList([]), 2)
+
+ tokens_consumed = 1
+ params = []
+ for type, name, delim in zip(tokens[1::3], tokens[2::3], tokens[3::3]):
+ tokens_consumed += 3
+ if type not in VAR_TYPES and type.type != "identifier":
+ raise JackSyntaxError(f"Expected datatype, got `{type}` instead", type)
+ if not name:
+ # TODO: print caret at end of type
+ raise JackSyntaxError("Expected variable name", type)
+ if name.type != "identifier":
+ raise JackSyntaxError(
+ f"Expected variable name, got `{name}` instead", name
+ )
+ if not delim:
+ raise JackSyntaxError(f"Expected `,` or `{RIGHT_PAREN}`", name)
+ if delim == ",":
+ params.append((type, name))
+ continue
+ elif delim == RIGHT_PAREN:
+ params.append((type, name))
+ break
+ else:
+ raise JackSyntaxError(
+ f"Expected `,` or `{RIGHT_PAREN}`, got `{delim}` instead", delim
+ )
+
+ return (ParamList(params), tokens_consumed)
+
+
+class Subroutine:
+ def __init__(
+ self, category: Token, type: Token, name: Token, params: ParamList, body
+ ):
+ self.category = category
+ self.type = type
+ self.name = name
+ self.params = params
+ self.body = body
+
+ @classmethod
+ def from_tokens(cls, tokens: list) -> tuple:
+ """Construct subroutine from tokens.
+
+ Format:
+ <category> <return type> <name> <paramlist> <body>
+
+ <category> = constructor | method | function
+ <return type> = int | char | boolean | void | <class>
+ """
+ if len(tokens) < 7 or tokens[0] not in SUBROUTINE_CATS:
+ # not a subroutine
+ return (None, 0)
+ category = tokens[0]
+
+ return_type = tokens[1]
+ if return_type not in RETURN_TYPES and return_type.type != "identifier":
+ raise JackSyntaxError(
+ f"Expected datatype, got `{return_type}` instead", return_type
+ )
+
+ name = tokens[2]
+ if name.type != "identifier":
+ raise JackSyntaxError(
+ f"Expected {category} name, got `{name}` instead", name
+ )
+
+ tokens_consumed = 3
+ params, token_cnt = ParamList.from_tokens(tokens[tokens_consumed:])
+ if params is None:
+ raise JackSyntaxError("Expected parameter list", tokens[tokens_consumed])
+ tokens_consumed += token_cnt
+
+ return (Subroutine(category, return_type, name, params, None), tokens_consumed)
+
+ def print_verbose(self):
+ print(f"Define {self.category} {self.name}, returns {self.type}")
diff --git a/projects/hackc/parser.py b/projects/hackc/parser.py
index 2c34d1b..2fc30ee 100644
--- a/projects/hackc/parser.py
+++ b/projects/hackc/parser.py
@@ -1,5 +1,5 @@
from .tokens import Token
-from .syntax import Class
+from .classes import Class
from .utils import *
KEYWORDS = [
diff --git a/projects/hackc/syntax.py b/projects/hackc/syntax.py
deleted file mode 100644
index 281bd17..0000000
--- a/projects/hackc/syntax.py
+++ /dev/null
@@ -1,146 +0,0 @@
-from .tokens import Token
-from .utils import *
-
-SCOPES = ["static", "field", "var"]
-PRIMITIVE_TYPES = ["int", "char", "boolean"]
-
-
-class Class:
- def __init__(self, name: Token, variables: list, subroutines: list):
- self.name = name
- self.variables = variables
- self.subroutines = subroutines
-
- @classmethod
- def from_tokens(cls, tokens: list):
- """Construct a class from a list of tokens.
-
- In standard Jack, one file is exactly one class.
-
- Format:
- class <name> {
- <zero or more class variable declarations>
- <zero or more subroutines>
- }
- """
- if len(tokens) < 4:
- return None
- if tokens[0] != "class":
- raise JackSyntaxError(
- f"Expected `class`, got `{tokens[0]}` instead", tokens[0]
- )
-
- name = tokens[1]
- if name.type != "identifier":
- raise JackSyntaxError(f"You cannot name a class `{name}`", name)
-
- if tokens[2] != LEFT_BRACE:
- raise JackSyntaxError(
- f"Expected `{LEFT_BRACE}`, got `{tokens[2]}` instead", tokens[2]
- )
-
- tokens_consumed = 3
-
- while True:
- variables, token_cnt = Variable.from_tokens(
- tokens[tokens_consumed:], context="class"
- )
- if variables is None:
- break
- variables.print_verbose()
- tokens_consumed += token_cnt
-
- return Class(name, variables, [])
-
-
-class Variable:
- def __init__(self, scope: Token, type: Token, names: list[Token]):
- self.scope = scope
- self.type = type
- self.names = names
-
- @classmethod
- def from_tokens(cls, tokens: list, context: str) -> tuple:
- """Construct variable declaration statement from a list of tokens.
- Return a tuple of an instance of Variable and number of tokens consumed.
- When `tokens` does not begin with a variable declaration, return (None, 0).
-
- context -- "class" (<scope> = static | field) or "subroutine" (<scope> = var)
-
- Format:
- <scope> <type> <one or more names, joined with a comma>;
-
- <scope> = static | field | var
- <type> = int | char | boolean | <class name>
- """
- if len(tokens) < 4 or tokens[0] not in SCOPES:
- # not variable declaration
- return (None, 0)
-
- scope = tokens[0]
- if scope in ["static", "field"] and context != "class":
- raise JackSyntaxError(
- f"You cannot declare a {scope} variable in a subroutine", scope
- )
- if scope == "var" and context != "subroutine":
- raise JackSyntaxError(
- f"You cannot declare a local variable outside of a subroutine",
- scope,
- )
-
- type = tokens[1]
- if type not in PRIMITIVE_TYPES and type.type != "identifier":
- raise JackSyntaxError(f"Expected datatype, got `{tokens[1]}` instead", type)
-
- tokens_consumed = 2
- names = [] # names of variables
- expecting_identifier = True
-
- for token in tokens[2:]:
- tokens_consumed += 1
- if token.type == "identifier":
- if expecting_identifier:
- names.append(token)
- expecting_identifier = False
- else:
- raise JackSyntaxError(f"Expected `,`, got `{token}` instead", token)
- elif token == ",":
- if not expecting_identifier:
- expecting_identifier = True
- else:
- raise JackSyntaxError(
- f"Expected variable name, got `,` instead", token
- )
- elif token == ";":
- if expecting_identifier:
- raise JackSyntaxError(
- f"Expected variable name, got `;` instead", token
- )
- break
- else:
- expected = "variable name" if expecting_identifier else "`,` or `;`"
- raise JackSyntaxError(
- f"Expected {expected}, got `{token}` instead", token
- )
-
- return (Variable(scope, type, names), tokens_consumed)
-
- def print_verbose(self):
- print(f"Declare {len(self.names)} variable(s):")
- for name in self.names:
- print(self.scope, self.type, name)
-
-
-class Subroutine:
- def __init__(self):
- pass
-
-
-class Statement:
- def __init__(self):
- super().__init__()
-
-
-class IfStatement(Statement):
- def __init__(self):
- super().__init__()
diff --git a/projects/hackc/utils.py b/projects/hackc/utils.py
index 4861088..4b9c9f0 100644
--- a/projects/hackc/utils.py
+++ b/projects/hackc/utils.py
@@ -7,6 +7,8 @@ EXIT_CODE_SYNTAX_ERROR = 4
# vim autoindent misbehaves if I type these verbatim in strings
LEFT_BRACE = "{"
RIGHT_BRACE = "}"
+LEFT_PAREN = "("
+RIGHT_PAREN = ")"
class JackSyntaxError(Exception):
def __init__(self, msg, token):