blob: 406c1391f32f9a5a4b8830efe0a04b1b88a0d6e6 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
from .tokens import Token
from .classes import Class
from .utils import *
class Parser:
def __init__(self, fp, extensions=[]):
self._fp = fp
self._extensions = extensions
self.tokens = []
# load source code
input_file = open(fp)
self.source = input_file.read()
self.lines = self.source.splitlines()
input_file.close()
def print_tokens(self):
print("LINE\tCOL\tTYPE\tTOKEN")
for token in self.tokens:
print(f"{token.line_no + 1}\t{token.column + 1}\t{token.type[:3]}\t{token}")
print(f"===== {len(self.tokens)} tokens =====")
def tokenize(self):
# tokenize code
self.tokens = []
in_multicomment = False # True when inside /* */
for line_no, line in enumerate(self.lines):
pos = 0 # current position in line
line_width = len(line)
if in_multicomment:
multicomment_close_idx = line.find("*/")
if multicomment_close_idx == -1:
# this line is entirely comment
continue
# skip until comment ends
pos = multicomment_close_idx + 2
in_multicomment = False
# advance in line until exhausted
while pos < line_width:
rem = line[pos:] # remainder of line
ws_count = len(rem) - len(rem.lstrip())
if ws_count > 0:
# line begins with whitespace
pos += ws_count
continue
elif rem.startswith("/*"):
multicomment_close_idx = rem.find("*/")
if multicomment_close_idx == -1:
in_multicomment = True
break # this line is all comment beyond this point
# skip until comment ends on the same line
pos += multicomment_close_idx + 2
elif rem.startswith("//"):
break
rem = line[pos:] # remainder of line
if not rem:
continue
token = Token.from_line(rem, line_no, pos, extensions=self._extensions)
if token is not None:
self.tokens.append(token)
pos += token.length()
else:
# invalid token
print_err(f"{self._fp}:{line_no + 1}")
print_err(line)
print_err(" " * pos + f"^ Invalid token")
exit(EXIT_CODE_INVALID_TOKEN)
def parse(self):
try:
syntax_tree = Class.from_tokens(self.tokens)
syntax_tree.print_verbose()
except JackSyntaxError as err:
print_err(f"{self._fp}:{err.token.line_no + 1}")
print_err(self.lines[err.token.line_no])
print_err(" " * err.token.column + "^ " + err.message)
exit(EXIT_CODE_SYNTAX_ERROR)
except IndexError:
last_line = self.lines[-1]
print_err(f"{self._fp}:{len(self.lines)}")
print_err(last_line)
print_err(" " * len(last_line) + "^ Unexpected EOF")
exit(EXIT_CODE_EOF)
|