diff options
Diffstat (limited to 'projects/hackc/tokens.py')
-rw-r--r-- | projects/hackc/tokens.py | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/projects/hackc/tokens.py b/projects/hackc/tokens.py new file mode 100644 index 0000000..7ae37ce --- /dev/null +++ b/projects/hackc/tokens.py @@ -0,0 +1,66 @@ +import re + +KEYWORDS = [ + "class", + "constructor", + "function", + "method", + "field", + "static", + "var", + "int", + "char", + "boolean", + "void", + "true", + "false", + "null", + "this", + "let", + "do", + "if", + "else", + "while", + "return", +] +SYMBOLS = "{}()[].,;+-*/&|<>=~" +TOKEN_TYPES = ["keyword", "symbol", "integer", "string", "identifier"] + +class Token: + def __init__(self, type: str, token: str, line_no: int, column: int): + """A token in JACK.""" + self.type = type + self.token = token + self.line_no = line_no + self.column = column + + @classmethod + def from_line(cls, line: str, line_no: int, column: int): + """Extract first token from line and return it as an instance of Token.""" + if not line: + return None + + if line[0] in SYMBOLS: + return Token("symbol", line[0], line_no, column) + + int_match = re.match("([0-9]+)", line) + if int_match is not None: + return Token("integer", int_match.group(1), line_no, column) + + str_match = re.match('(".*")', line) + if str_match is not None: + return Token("string", str_match.group(1), line_no, column) + + # keyword or identifier + kwid_match = re.match("([_A-Za-z][_A-Za-z0-9]*)", line) + if kwid_match is not None: + kwid = kwid_match.group(1) + type = "identifier" + if kwid in KEYWORDS: + type = "keyword" + return Token(type, kwid, line_no, column) + + return None + + def length(self) -> int: + return len(self.token) |