from .tokens import Token from .classes import Class from .utils import * class Parser: def __init__(self, fp, extensions=[]): self._fp = fp self._extensions = extensions self.tokens = [] # load source code input_file = open(fp) self.source = input_file.read() self.lines = self.source.splitlines() input_file.close() def print_tokens(self): print("LINE\tCOL\tTYPE\tTOKEN") for token in self.tokens: print(f"{token.line_no + 1}\t{token.column + 1}\t{token.type[:3]}\t{token}") print(f"===== {len(self.tokens)} tokens =====") def tokenize(self): # tokenize code self.tokens = [] in_multicomment = False # True when inside /* */ for line_no, line in enumerate(self.lines): pos = 0 # current position in line line_width = len(line) if in_multicomment: multicomment_close_idx = line.find("*/") if multicomment_close_idx == -1: # this line is entirely comment continue # skip until comment ends pos = multicomment_close_idx + 2 in_multicomment = False # advance in line until exhausted while pos < line_width: rem = line[pos:] # remainder of line ws_count = len(rem) - len(rem.lstrip()) if ws_count > 0: # line begins with whitespace pos += ws_count continue elif rem.startswith("/*"): multicomment_close_idx = rem.find("*/") if multicomment_close_idx == -1: in_multicomment = True break # this line is all comment beyond this point # skip until comment ends on the same line pos += multicomment_close_idx + 2 elif rem.startswith("//"): break rem = line[pos:] # remainder of line if not rem: continue token = Token.from_line(rem, line_no, pos, extensions=self._extensions) if token is not None: self.tokens.append(token) pos += token.length() else: # invalid token print_err(f"{self._fp}:{line_no + 1}") print_err(line) print_err(" " * pos + f"^ Invalid token") exit(EXIT_CODE_INVALID_TOKEN) def parse(self): try: syntax_tree = Class.from_tokens(self.tokens) syntax_tree.print_verbose() except JackSyntaxError as err: print_err(f"{self._fp}:{err.token.line_no + 1}") print_err(self.lines[err.token.line_no]) print_err(" " * err.token.column + "^ " + err.message) exit(EXIT_CODE_SYNTAX_ERROR) except IndexError: last_line = self.lines[-1] print_err(f"{self._fp}:{len(self.lines)}") print_err(last_line) print_err(" " * len(last_line) + "^ Unexpected EOF") exit(EXIT_CODE_EOF)