summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Yin <fkfd@fkfd.me>2022-09-03 11:22:20 +0800
committerFrederick Yin <fkfd@fkfd.me>2022-09-03 11:22:20 +0800
commitb1794edb3d76db7c1a86fc14a60bc95b833bc7c7 (patch)
tree582288c1618a74405d3c183410d7525b8d6aa31f
parentbed429a1d17b43678a70bc286aac12a0bd6e387d (diff)
hackc: more statements; UnexpectedToken
-rw-r--r--projects/hackc/classes.py62
-rw-r--r--projects/hackc/expressions.py96
-rw-r--r--projects/hackc/parser.py28
-rw-r--r--projects/hackc/statements.py156
-rw-r--r--projects/hackc/utils.py47
5 files changed, 296 insertions, 93 deletions
diff --git a/projects/hackc/classes.py b/projects/hackc/classes.py
index bd83cc4..f1358f6 100644
--- a/projects/hackc/classes.py
+++ b/projects/hackc/classes.py
@@ -30,21 +30,16 @@ class Class:
if tokens_total < 4:
return None
if tokens[0] != "class":
- raise JackSyntaxError(
- f"Expected `class`, got `{tokens[0]}` instead", tokens[0]
- )
+ raise UnexpectedToken("class", tokens[0])
name = tokens[1]
if name.type != "identifier":
raise JackSyntaxError(f"You cannot name a class `{name}`", name)
if tokens[2] != LEFT_BRACE:
- raise JackSyntaxError(
- f"Expected `{LEFT_BRACE}`, got `{tokens[2]}` instead", tokens[2]
- )
+ raise UnexpectedToken(LEFT_BRACE, tokens[2])
t = 3
-
variables = []
while t < tokens_total:
variable, dt = Variable.from_tokens(tokens[t:], context="class")
@@ -115,7 +110,7 @@ class Variable:
type = tokens[1]
if type not in VAR_TYPES and type.type != "identifier":
- raise JackSyntaxError(f"Expected datatype, got `{tokens[1]}` instead", type)
+ raise UnexpectedToken("datatype", type)
t = 2
names = [] # names of variables
@@ -129,26 +124,20 @@ class Variable:
names.append(token)
expecting_identifier = False
else:
- raise JackSyntaxError(f"Expected `,`, got `{token}` instead", token)
+ raise UnexpectedToken(",", token)
elif token == ",":
if not expecting_identifier:
expecting_identifier = True
else:
- raise JackSyntaxError(
- f"Expected variable name, got `,` instead", token
- )
+ raise UnexpectedToken("variable name", token)
elif token == ";":
if expecting_identifier:
- raise JackSyntaxError(
- f"Expected variable name, got `;` instead", token
- )
+ raise UnexpectedToken("variable name", token)
found_semicolon = True
break
else:
expected = "variable name" if expecting_identifier else "`,` or `;`"
- raise JackSyntaxError(
- f"Expected {expected}, got `{token}` instead", token
- )
+ raise UnexpectedToken(expected, token)
if not found_semicolon:
# TODO: print caret at end of token
@@ -186,16 +175,14 @@ class ParamList:
params = []
for type, name, delim in zip(tokens[0::3], tokens[1::3], tokens[2::3]):
if type not in VAR_TYPES and type.type != "identifier":
- raise JackSyntaxError(f"Expected datatype, got `{type}` instead", type)
+ raise UnexpectedToken("datatype", type)
if not name:
# TODO: print caret at end of type
raise JackSyntaxError("Expected variable name", type)
if name.type != "identifier":
- raise JackSyntaxError(
- f"Expected variable name, got `{name}` instead", name
- )
+ raise UnexpectedToken("variable name", name)
if not delim:
- raise JackSyntaxError(f"Expected `,` or `{RIGHT_PAREN}`", name)
+ raise UnexpectedToken(f"`,` or `{RIGHT_PAREN}`", name)
if delim == ",":
t += 3
params.append((type, name))
@@ -205,9 +192,7 @@ class ParamList:
params.append((type, name))
break
else:
- raise JackSyntaxError(
- f"Expected `,` or `{RIGHT_PAREN}`, got `{delim}` instead", delim
- )
+ raise UnexpectedToken(f"`,` or `{RIGHT_PAREN}`", delim)
return (ParamList(params), t)
@@ -251,20 +236,14 @@ class Subroutine:
return_type = tokens[1]
if return_type not in RETURN_TYPES and return_type.type != "identifier":
- raise JackSyntaxError(
- f"Expected datatype, got `{return_type}` instead", return_type
- )
+ raise UnexpectedToken("datatype", return_type)
name = tokens[2]
if name.type != "identifier":
- raise JackSyntaxError(
- f"Expected {category} name, got `{name}` instead", name
- )
+ raise UnexpectedToken(f"{category} name", name)
if tokens[3] != LEFT_PAREN:
- raise JackSyntaxError(
- f"Expected `{LEFT_PAREN}`, got `{tokens[3]}` instead", tokens[3]
- )
+ raise UnexpectedToken(LEFT_PAREN, tokens[3])
t = 4
params, dt = ParamList.from_tokens(tokens[t:])
@@ -276,9 +255,7 @@ class Subroutine:
# TODO: catch IndexError
body_open = tokens[t]
if body_open != LEFT_BRACE:
- raise JackSyntaxError(
- f"Expected `{LEFT_BRACE}`, got `{body_open}` instead", body_open
- )
+ raise UnexpectedToken(LEFT_BRACE, body_open)
t += 1
variables = []
@@ -299,12 +276,13 @@ class Subroutine:
body_close = tokens[t]
if body_close != RIGHT_BRACE:
- raise JackSyntaxError(
- f"Expected `{RIGHT_BRACE}`, got `{body_close}` instead", body_close
- )
+ raise UnexpectedToken(RIGHT_BRACE, body_close)
t += 1
- return (Subroutine(category, return_type, name, params, variables, statements), t)
+ return (
+ Subroutine(category, return_type, name, params, variables, statements),
+ t,
+ )
def print_verbose(self):
print(f"Define {self.category} {self.type} {self.name}")
diff --git a/projects/hackc/expressions.py b/projects/hackc/expressions.py
index f4623da..2e08cef 100644
--- a/projects/hackc/expressions.py
+++ b/projects/hackc/expressions.py
@@ -24,9 +24,7 @@ class Term:
"""Format: '(' <expression> ')'"""
expr, dt = Expression.from_tokens(tokens[1:])
if tokens[dt + 1] != RIGHT_PAREN:
- raise JackSyntaxError(
- f"Expected `{RIGHT_PAREN}`, got `{tokens[dt]}` instead", tokens[dt]
- )
+ raise UnaryTerm(RIGHT_PAREN, tokens[dt])
return (expr, dt + 2)
if tokens[0].type == "identifier":
if tokens[1] in [LEFT_PAREN, "."]:
@@ -69,9 +67,7 @@ class SubscriptTerm:
raise JackSyntaxError(f"Expected subscript", tokens[2])
t += dt
if tokens[t] != RIGHT_BRACKET:
- raise JackSyntaxError(
- f"Expected `{RIGHT_BRACKET}`, got `{tokens[t]}` instead", tokens[t]
- )
+ raise UnexpectedToken(RIGHT_BRACKET, tokens[t])
return (SubscriptTerm(var, sub), t + 1)
def __str__(self):
@@ -95,16 +91,6 @@ class UnaryTerm:
return f"({self.op}{self.term})"
-class SubroutineCall:
- def __init__(self, name: Token, exprs: list):
- self.name = name
- self.exprs = exprs
-
- @classmethod
- def from_tokens(cls, tokens: list) -> tuple:
- pass
-
-
class Expression:
def __init__(self, lhs: Term, op=None, rhs=None):
self.lhs = lhs
@@ -119,6 +105,8 @@ class Expression:
<term> (<op> <term>)?
"""
lhs, dt = Term.from_tokens(tokens)
+ if lhs is None:
+ return (None, 0)
t = dt
op = tokens[t]
@@ -128,7 +116,7 @@ class Expression:
t += 1
rhs, dt = Term.from_tokens(tokens[t:])
if rhs is None:
- raise JSE(f"Expected other term, got `{rhs}` instead", rhs)
+ raise UnexpectedToken("other term", rhs)
t += dt
return (Expression(lhs, op, rhs), t)
@@ -138,3 +126,77 @@ class Expression:
return f"({self.lhs} {self.op} {self.rhs})"
else:
return str(self.lhs)
+
+
+class ExpressionList:
+ def __init__(self, exprs: list[Expression]):
+ self.exprs = exprs
+
+ @classmethod
+ def from_tokens(cls, tokens: list) -> tuple:
+ """Construct list of expressions.
+
+ Format:
+ (<expression> (',' <expression>)*)?
+ """
+ t = 0
+ exprs = []
+ while True:
+ expr, dt = Expression.from_tokens(tokens[t:])
+ if expr is None:
+ if t == 0:
+ # only allow lack of expression right after paren
+ break
+ else:
+ # expect expression after comma
+ raise JackSyntaxError(f"Expected expression", tokens[t])
+ t += dt
+ exprs.append(expr)
+ if tokens[t] != ",":
+ break
+ t += 1
+
+ return (ExpressionList(exprs), t)
+
+ def __str__(self):
+ return ", ".join([str(expr) for expr in self.exprs])
+
+
+class SubroutineCall:
+ def __init__(self, jack_class: Token, name: Token, exprs: ExpressionList):
+ self.jack_class = jack_class
+ self.name = name
+ self.exprs = exprs
+
+ @classmethod
+ def from_tokens(cls, tokens: list) -> tuple:
+ """Construct invocation of subroutine.
+
+ Format:
+ (<class> '.')? <subroutine> '(' <expression list> ')'
+ """
+ t = 0
+ jack_class = None
+ if tokens[1] == ".":
+ jack_class = tokens[0]
+ t = 2
+
+ name = tokens[t]
+ if name.type != "identifier":
+ raise UnexpectedToken("subroutine name", name)
+ t += 1
+
+ if tokens[t] != LEFT_PAREN:
+ raise UnexpectedToken(LEFT_PAREN, tokens[t])
+ t += 1
+
+ exprs, dt = ExpressionList.from_tokens(tokens[t:])
+ t += dt
+ if tokens[t] != RIGHT_PAREN:
+ raise UnexpectedToken(RIGHT_PAREN, tokens[t])
+ return (SubroutineCall(jack_class, name, exprs), t + 1)
+
+ def __str__(self):
+ if self.jack_class is None:
+ return f"{self.name}({self.exprs})"
+ return f"{self.jack_class}.{self.name}({self.exprs})"
diff --git a/projects/hackc/parser.py b/projects/hackc/parser.py
index 188bddc..406c139 100644
--- a/projects/hackc/parser.py
+++ b/projects/hackc/parser.py
@@ -2,32 +2,6 @@ from .tokens import Token
from .classes import Class
from .utils import *
-KEYWORDS = [
- "class",
- "constructor",
- "function",
- "method",
- "field",
- "static",
- "var",
- "int",
- "char",
- "boolean",
- "void",
- "true",
- "false",
- "null",
- "this",
- "let",
- "do",
- "if",
- "else",
- "while",
- "return",
-]
-
-SYMBOLS = "{}()[].,;+-*/&|<>=~"
-
class Parser:
def __init__(self, fp, extensions=[]):
@@ -82,6 +56,8 @@ class Parser:
break
rem = line[pos:] # remainder of line
+ if not rem:
+ continue
token = Token.from_line(rem, line_no, pos, extensions=self._extensions)
if token is not None:
self.tokens.append(token)
diff --git a/projects/hackc/statements.py b/projects/hackc/statements.py
index 16c123e..0e257ed 100644
--- a/projects/hackc/statements.py
+++ b/projects/hackc/statements.py
@@ -1,19 +1,42 @@
-from .expressions import Expression
+from .expressions import Expression, SubroutineCall
from .utils import *
+
class Statement:
def __init__(self):
pass
@classmethod
def from_tokens(cls, tokens: list) -> tuple:
- for StatementClass in [LetStatement]:
+ for StatementClass in [LetStatement, DoStatement, ReturnStatement, IfStatement]:
stmt, dt = StatementClass.from_tokens(tokens)
if stmt is not None:
return (stmt, dt)
return (None, 0)
+
+class StatementList:
+ def __init__(self, statements: list[Statement]):
+ self.statements = statements
+
+ @classmethod
+ def from_tokens(cls, tokens: list) -> tuple:
+ t = 0
+ statements = []
+ while True:
+ stmt, dt = Statement.from_tokens(tokens[t:])
+ if stmt is None:
+ break
+ statements.append(stmt)
+ t += dt
+ return (StatementList(statements), t)
+
+ def print_verbose(self):
+ for stmt in self.statements:
+ stmt.print_verbose()
+
+
class LetStatement:
def __init__(self, name, expr):
self.name = name
@@ -31,22 +54,139 @@ class LetStatement:
name = tokens[1]
if name.type != "identifier":
- raise JackSyntaxError(f"Expected variable name, got `{name}` instead", name)
+ raise UnexpectedToken("variable name", name)
if tokens[2] != "=":
- raise JackSyntaxError(f"Expected `=`, got `{tokens[2]}` instead", tokens[2])
+ raise UnexpectedToken("=", tokens[2])
t = 3
expr, dt = Expression.from_tokens(tokens[t:])
if expr is None:
- raise JackSyntaxError(f"Expected expression", tokens[3])
+ raise UnexpectedToken(f"Expected expression", tokens[3])
t += dt
if tokens[t] != ";":
- raise JackSyntaxError(f"Expected `;`, got `{tokens[t]}` instead", tokens[t])
+ raise UnexpectedToken(";", tokens[t])
- t += 1
- return (LetStatement(name, expr), t)
+ return (LetStatement(name, expr), t + 1)
def print_verbose(self):
print(f"Let {self.name} be {self.expr}")
+
+
+class DoStatement:
+ def __init__(self, subcall: SubroutineCall):
+ self.subcall = subcall
+
+ @classmethod
+ def from_tokens(cls, tokens: list) -> tuple:
+ """Construct do statement.
+
+ Format:
+ 'do' <subroutine call> ';'
+ """
+ if len(tokens) < 5 or tokens[0] != "do":
+ return (None, 0)
+
+ t = 1
+ subcall, dt = SubroutineCall.from_tokens(tokens[1:])
+ if subcall is None:
+ raise UnexpectedToken("subroutine call", tokens[1])
+ t += dt
+
+ if tokens[t] != ";":
+ raise UnexpectedToken(";", tokens[t])
+
+ return (DoStatement(subcall), t + 1)
+
+ def print_verbose(self):
+ print(f"Do {self.subcall}")
+
+
+class ReturnStatement:
+ def __init__(self, expr: Expression):
+ self.expr = expr
+
+ @classmethod
+ def from_tokens(cls, tokens: list) -> tuple:
+ """Construct return statement.
+
+ Format:
+ 'return' <expression>? ';'
+ """
+ if len(tokens) < 3 or tokens[0] != "return":
+ return (None, 0)
+
+ t = 1
+ expr, dt = Expression.from_tokens(tokens[1:])
+ t += dt
+
+ if tokens[t] != ";":
+ raise UnexpectedToken(";", tokens[t])
+
+ return (ReturnStatement(expr), t + 1)
+
+ def print_verbose(self):
+ print(f"Return {self.expr or 'null'}")
+
+
+class IfStatement:
+ def __init__(self, condition: Expression, then: StatementList, else_then=None):
+ self.condition = condition
+ self.then = then
+ self.else_then = else_then
+
+ @classmethod
+ def from_tokens(cls, tokens: list) -> tuple:
+ """Construct if statement.
+
+ Format:
+ 'if' '(' <expression> ')' '{' <statement list> '}' ('else' '{' <statement list '}')?
+ """
+ if len(tokens) < 6 or tokens[0] != "if":
+ return (None, 0)
+
+ if tokens[1] != LEFT_PAREN:
+ raise UnexpectedToken(LEFT_PAREN, tokens[1])
+
+ t = 2
+ cond, dt = Expression.from_tokens(tokens[2:])
+ if cond is None:
+ raise JackSyntaxError(f"Expected condition", tokens[2])
+ t += dt
+
+ if tokens[t] != RIGHT_PAREN:
+ raise UnexpectedToken(RIGHT_PAREN, tokens[t])
+ if tokens[t + 1] != LEFT_BRACE:
+ raise UnexpectedToken(LEFT_BRACE, tokens[t + 1])
+ t += 2
+
+ then, dt = StatementList.from_tokens(tokens[t:])
+ t += dt
+
+ if tokens[t] != RIGHT_BRACE:
+ raise UnexpectedToken(RIGHT_BRACE, tokens[t])
+ t += 1
+
+ if tokens[t] != "else":
+ return (IfStatement(cond, then), t)
+
+ if tokens[t + 1] != LEFT_BRACE:
+ raise UnexpectedToken(LEFT_BRACE, tokens[t + 1])
+
+ t += 2
+
+ else_then, dt = StatementList.from_tokens(tokens[t:])
+ t += dt
+
+ if tokens[t] != RIGHT_BRACE:
+ raise UnexpectedToken(RIGHT_BRACE, tokens[t])
+
+ return (IfStatement(cond, then, else_then), t + 1)
+
+ def print_verbose(self):
+ print(f"If {self.condition} then:")
+ self.then.print_verbose()
+ if self.else_then is not None:
+ print("Else then:")
+ self.else_then.print_verbose()
diff --git a/projects/hackc/utils.py b/projects/hackc/utils.py
index d375953..8756e46 100644
--- a/projects/hackc/utils.py
+++ b/projects/hackc/utils.py
@@ -1,5 +1,31 @@
from sys import stderr
+KEYWORDS = [
+ "class",
+ "constructor",
+ "function",
+ "method",
+ "field",
+ "static",
+ "var",
+ "int",
+ "char",
+ "boolean",
+ "void",
+ "true",
+ "false",
+ "null",
+ "this",
+ "let",
+ "do",
+ "if",
+ "else",
+ "while",
+ "return",
+]
+
+SYMBOLS = "{}()[].,;+-*/&|<>=~"
+
EXIT_CODE_FILE_ERROR = 1
EXIT_CODE_INVALID_TOKEN = 2
EXIT_CODE_SYNTAX_ERROR = 4
@@ -13,6 +39,7 @@ RIGHT_BRACKET = "]"
LEFT_PAREN = "("
RIGHT_PAREN = ")"
+
class JackSyntaxError(Exception):
def __init__(self, msg, token):
self.message = msg
@@ -20,5 +47,25 @@ class JackSyntaxError(Exception):
super().__init__(msg)
+class UnexpectedToken(JackSyntaxError):
+ def __init__(self, expected, unexpected):
+ if str(expected) in KEYWORDS or str(expected) in SYMBOLS:
+ # wrap literal keyword/symbol in backticks
+ super().__init__(
+ f"Expected `{expected}`, got `{unexpected}` instead", unexpected
+ )
+ else:
+ super().__init__(
+ f"Expected {expected}, got `{unexpected}` instead", unexpected
+ )
+
+
+class Unexpected(JackSyntaxError):
+ def __init__(self, expected, unexpected):
+ super().__init__(
+ f"Expected `{expected}`, got `{unexpected}` instead", unexpected
+ )
+
+
def print_err(msg):
print(msg, file=stderr)