summaryrefslogtreecommitdiff
path: root/projects/hackc/tokens.py
diff options
context:
space:
mode:
authorFrederick Yin <fkfd@fkfd.me>2022-08-29 20:20:08 +0800
committerFrederick Yin <fkfd@fkfd.me>2022-08-29 20:20:08 +0800
commit51e1667e716ea8c6b20f37cdec1f99eef55eccd6 (patch)
tree3b023734a7337de535923bd0c08cf86cc4a4a647 /projects/hackc/tokens.py
parentca3e66d0cb0825285af7ea34a73355cf34e00a62 (diff)
hackc: tokenizer
Diffstat (limited to 'projects/hackc/tokens.py')
-rw-r--r--projects/hackc/tokens.py66
1 files changed, 66 insertions, 0 deletions
diff --git a/projects/hackc/tokens.py b/projects/hackc/tokens.py
new file mode 100644
index 0000000..7ae37ce
--- /dev/null
+++ b/projects/hackc/tokens.py
@@ -0,0 +1,66 @@
+import re
+
+KEYWORDS = [
+ "class",
+ "constructor",
+ "function",
+ "method",
+ "field",
+ "static",
+ "var",
+ "int",
+ "char",
+ "boolean",
+ "void",
+ "true",
+ "false",
+ "null",
+ "this",
+ "let",
+ "do",
+ "if",
+ "else",
+ "while",
+ "return",
+]
+SYMBOLS = "{}()[].,;+-*/&|<>=~"
+TOKEN_TYPES = ["keyword", "symbol", "integer", "string", "identifier"]
+
+class Token:
+ def __init__(self, type: str, token: str, line_no: int, column: int):
+ """A token in JACK."""
+ self.type = type
+ self.token = token
+ self.line_no = line_no
+ self.column = column
+
+ @classmethod
+ def from_line(cls, line: str, line_no: int, column: int):
+ """Extract first token from line and return it as an instance of Token."""
+ if not line:
+ return None
+
+ if line[0] in SYMBOLS:
+ return Token("symbol", line[0], line_no, column)
+
+ int_match = re.match("([0-9]+)", line)
+ if int_match is not None:
+ return Token("integer", int_match.group(1), line_no, column)
+
+ str_match = re.match('(".*")', line)
+ if str_match is not None:
+ return Token("string", str_match.group(1), line_no, column)
+
+ # keyword or identifier
+ kwid_match = re.match("([_A-Za-z][_A-Za-z0-9]*)", line)
+ if kwid_match is not None:
+ kwid = kwid_match.group(1)
+ type = "identifier"
+ if kwid in KEYWORDS:
+ type = "keyword"
+ return Token(type, kwid, line_no, column)
+
+ return None
+
+ def length(self) -> int:
+ return len(self.token)