diff options
Diffstat (limited to 'projects/hackc/parser.py')
-rw-r--r-- | projects/hackc/parser.py | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/projects/hackc/parser.py b/projects/hackc/parser.py new file mode 100644 index 0000000..c3056d7 --- /dev/null +++ b/projects/hackc/parser.py @@ -0,0 +1,85 @@ +from .tokens import Token + +KEYWORDS = [ + "class", + "constructor", + "function", + "method", + "field", + "static", + "var", + "int", + "char", + "boolean", + "void", + "true", + "false", + "null", + "this", + "let", + "do", + "if", + "else", + "while", + "return", +] + +SYMBOLS = "{}()[].,;+-*/&|<>=~" + + +class Parser: + def __init__(self, fp): + self._fp = fp + self.tokens = [] + + def print_tokens(self): + print("LINE\tCOL\tTYPE\tTOKEN") + for token in self.tokens: + print(f"{token.line_no + 1}\t{token.column + 1}\t{token.type[:3]}\t{token.token}") + print(f"===== {len(self.tokens)} tokens =====") + + def tokenize(self): + # read file + input_file = open(self._fp) + source_code = input_file.read() + source_lines = source_code.splitlines() + input_file.close() + + # tokenize code + self.tokens = [] + in_multicomment = False # True when inside /* */ + for line_no, line in enumerate(source_lines): + pos = 0 # current position in line + line_width = len(line) + if in_multicomment: + multicomment_close_idx = line.find("*/") + if multicomment_close_idx == -1: + # this line is entirely comment + continue + # skip until comment ends + pos = multicomment_close_idx + 2 + in_multicomment = False + + # advance in line until exhausted + while pos < line_width: + rem = line[pos:] # remainder of line + ws_count = len(rem) - len(rem.lstrip()) + if ws_count > 0: + # line begins with whitespace + pos += ws_count + continue + elif rem.startswith("/*"): + multicomment_close_idx = rem.find("*/") + if multicomment_close_idx == -1: + in_multicomment = True + break # this line is all comment beyond this point + # skip until comment ends on the same line + pos += multicomment_close_idx + 2 + elif rem.startswith("//"): + break + + rem = line[pos:] # remainder of line + token = Token.from_line(rem, line_no, pos) + if token is not None: + self.tokens.append(token) + pos += token.length() |