import re KEYWORDS = [ "class", "constructor", "function", "method", "field", "static", "var", "int", "char", "boolean", "void", "true", "false", "null", "this", "let", "do", "if", "else", "while", "return", ] SYMBOLS = "{}()[].,;+-*/&|<>=~" TOKEN_TYPES = ["keyword", "symbol", "integer", "string", "identifier"] class Token: def __init__(self, type: str, token: str, line_no: int, column: int): """A token in JACK.""" self.type = type self.token = token self.line_no = line_no self.column = column def __len__(self) -> int: return self.length() def __eq__(self, other) -> bool: if type(other) == str: return self.token == other if type(other) == Token: return self.token == other.token def __str__(self) -> str: return self.token @classmethod def from_line(cls, line: str, line_no: int, column: int, extensions=[]): """Extract first token from line and return it as an instance of Token.""" if not line: return None if line[0] in SYMBOLS: return Token("symbol", line[0], line_no, column) int_match = re.match("([0-9]+)", line) if int_match is not None: return Token("integer", int_match.group(1), line_no, column) if "escape" in extensions: str_match = re.match(r'("(.|\\")+?[^\\]")', line) else: str_match = re.match('(".*?")', line) if str_match is not None: return Token("string", str_match.group(1), line_no, column) # keyword or identifier kwid_match = re.match("([_A-Za-z][_A-Za-z0-9]*)", line) if kwid_match is not None: kwid = kwid_match.group(1) type = "identifier" if kwid in KEYWORDS: type = "keyword" return Token(type, kwid, line_no, column) return None def length(self) -> int: return len(self.token)