summaryrefslogtreecommitdiff
path: root/projects/hack-as/hack-as.py
diff options
context:
space:
mode:
Diffstat (limited to 'projects/hack-as/hack-as.py')
-rw-r--r--projects/hack-as/hack-as.py269
1 files changed, 269 insertions, 0 deletions
diff --git a/projects/hack-as/hack-as.py b/projects/hack-as/hack-as.py
new file mode 100644
index 0000000..bb70abc
--- /dev/null
+++ b/projects/hack-as/hack-as.py
@@ -0,0 +1,269 @@
+from sys import stderr
+from argparse import ArgumentParser
+
+EXIT_CODES = {
+ "file_error": 1,
+ "illegal_char": 2,
+ "size_exceeded": 3,
+ "syntax_error": 4,
+ "addr_error": 5,
+ "symbol_error": 6,
+}
+
+PREDEFINED_SYMBOL_CNT = 23
+MAX_INST_LEN = 32768
+MAX_ADDR = 32767
+
+PREDEFINED_SYMBOLS = {
+ "SP": 0,
+ "LCL": 1,
+ "ARG": 2,
+ "THIS": 3,
+ "THAT": 4,
+ "R0": 0,
+ "R1": 1,
+ "R2": 2,
+ "R3": 3,
+ "R4": 4,
+ "R5": 5,
+ "R6": 6,
+ "R7": 7,
+ "R8": 8,
+ "R9": 9,
+ "R10": 10,
+ "R11": 11,
+ "R12": 12,
+ "R13": 13,
+ "R14": 14,
+ "R15": 15,
+ "SCREEN": 16384,
+ "KBD": 24576,
+}
+
+DEST = {
+ "": 0b000 << 3,
+ "M": 0b001 << 3,
+ "D": 0b010 << 3,
+ "MD": 0b011 << 3,
+ "A": 0b100 << 3,
+ "AM": 0b101 << 3,
+ "AD": 0b110 << 3,
+ "AMD": 0b111 << 3,
+}
+
+JMP = {
+ "": 0b000,
+ "JGT": 0b001,
+ "JEQ": 0b010,
+ "JGE": 0b011,
+ "JLT": 0b100,
+ "JNE": 0b101,
+ "JLE": 0b110,
+ "JMP": 0b111,
+}
+
+
+COMP = {
+ "0": 0b0101010 << 6,
+ "1": 0b0111111 << 6,
+ "-1": 0b0111010 << 6,
+ "D": 0b0001100 << 6,
+ "A": 0b0110000 << 6,
+ "M": 0b1110000 << 6,
+ "!D": 0b0001101 << 6,
+ "!A": 0b0110001 << 6,
+ "!M": 0b1110001 << 6,
+ "-D": 0b0001111 << 6,
+ "-A": 0b0110011 << 6,
+ "-M": 0b1110011 << 6,
+ "D+1": 0b0011111 << 6,
+ "A+1": 0b0110111 << 6,
+ "M+1": 0b1110111 << 6,
+ "D-1": 0b0001110 << 6,
+ "A-1": 0b0110010 << 6,
+ "M-1": 0b1110010 << 6,
+ "D+A": 0b0000010 << 6,
+ "A+D": 0b0000010 << 6,
+ "D+M": 0b1000010 << 6,
+ "M+D": 0b1000010 << 6,
+ "D-A": 0b0010011 << 6,
+ "D-M": 0b1010011 << 6,
+ "A-D": 0b0000111 << 6,
+ "M-D": 0b1000111 << 6,
+ "D&A": 0b0000000 << 6,
+ "A&D": 0b0000000 << 6,
+ "D&M": 0b1000000 << 6,
+ "M&D": 0b1000000 << 6,
+ "D|A": 0b0010101 << 6,
+ "A|D": 0b0010101 << 6,
+ "D|M": 0b1010101 << 6,
+ "M|D": 0b1010101 << 6,
+}
+
+
+def find_illegal_symbol_char(symbol):
+ for c in symbol:
+ if (not c.isascii()) or (not (c.isalnum() or c in "_.$:")):
+ return c
+
+ return None
+
+
+def print_symbols(symbols):
+ print("====== SYMBOLS =====")
+ print("label\taddr")
+ for label, addr in symbols.items():
+ print(f"{label}\t{addr}")
+
+
+def print_binary_and_asm(binary, asm_lines):
+ # binary and asm_lines are assumed to be of the same length
+ print("====== RESULTS =====")
+ print("addr\tbinary \tinst")
+ for line_no, bin, asm in zip(range(len(binary)), binary, asm_lines):
+ print(f"{line_no}\t{bin:016b}\t{asm}")
+
+
+def write_binary(file, binary):
+ for inst in binary:
+ file.write(f"{inst:016b}\n")
+
+
+def assemble_inst(asm_line):
+ if asm_line.startswith("@"):
+ # A instruction
+ addr_str = asm_line[1:]
+ addr = int(addr_str)
+ if addr > MAX_ADDR:
+ print(f"Address out of range: {addr}", file=stderr)
+ exit(EXIT_CODES["addr_error"])
+
+ return addr
+ else:
+ # dest=comp;jmp
+ inst = 0xE000
+ dest, _, rhs = asm_line.partition("=")
+ if not rhs:
+ rhs = dest
+ dest = ""
+
+ comp, _, jmp = rhs.partition(";")
+ if dest not in DEST:
+ print(f"Invalid destination: {dest}", file=stderr)
+ exit(EXIT_CODES["syntax_error"])
+
+ inst |= DEST[dest]
+
+ if jmp not in JMP:
+ print(f"Invalid jump instruction: {jmp}", file=stderr)
+ exit(EXIT_CODES["syntax_error"])
+
+ inst |= JMP[jmp]
+
+ if comp not in COMP:
+ print(f"Invalid computation: {comp}", file=stderr)
+ exit(EXIT_CODES["syntax_error"])
+
+ inst |= COMP[comp]
+
+ return inst
+
+
+def assembler(input_fn, verbose):
+ try:
+ input_file = open(input_fn, "r")
+ except FileNotFoundError:
+ print(f"Cannot open input file: {input_fn}", file=stderr)
+ exit(EXIT_CODES["file_error"])
+
+ asm_lines = []
+ line = input_file.readline()
+ asm_line_cnt = 0
+ user_symbols = {}
+ while line:
+ line = line.rstrip("\n")
+ # throw away comment and whitespace
+ asm_line = line.partition("//")[0].replace(" ", "").replace("\t", "")
+ if not asm_line:
+ # skip blank or comment line
+ line = input_file.readline()
+ continue
+
+ if asm_line.startswith("(") and asm_line.endswith(")"):
+ # asm_line may be a label
+ label = asm_line[1:-1]
+ illegal_char = find_illegal_symbol_char(label)
+ if illegal_char is not None:
+ print(f"Illegal character: {illegal_char}", file=stderr)
+ exit(EXIT_CODES["illegal_char"])
+
+ if label in user_symbols or label in PREDEFINED_SYMBOLS:
+ print(f"Symbol {label} already exists", file=stderr)
+ exit(EXIT_CODES["symbol_error"])
+
+ user_symbols[label] = asm_line_cnt
+ else:
+ # asm_line may be an instruction
+ asm_lines.append(asm_line)
+ asm_line_cnt += 1
+ if asm_line_cnt > MAX_INST_LEN:
+ print(
+ f"Max number of instruction ({MAX_INST_LEN}) exceeded", file=stderr
+ )
+ exit(EXIT_CODE_SIZE_EXCEEDED)
+
+ line = input_file.readline()
+
+ input_file.close()
+
+ # find and assign address to variables on the fly
+ addr = 16 # variable addresses start at 16
+ for line_no, asm_line in enumerate(asm_lines):
+ if not asm_line.startswith("@"):
+ continue
+
+ addr_str = asm_line[1:] # whatever comes after the @
+ if not addr_str:
+ print("Address cannot be empty", file=stderr)
+ exit(EXIT_CODES["addr_error"])
+
+ if addr_str.isascii() and addr_str.isdecimal():
+ # address is decimal constant
+ continue
+
+ if addr_str in PREDEFINED_SYMBOLS:
+ asm_lines[line_no] = f"@{PREDEFINED_SYMBOLS[addr_str]}"
+ elif addr_str in user_symbols:
+ asm_lines[line_no] = f"@{user_symbols[addr_str]}"
+ else:
+ user_symbols[addr_str] = addr
+ asm_lines[line_no] = f"@{addr}"
+ addr += 1
+
+ binary = []
+ for asm_line in asm_lines:
+ binary.append(assemble_inst(asm_line))
+
+ if verbose:
+ print_symbols(user_symbols)
+ print_binary_and_asm(binary, asm_lines)
+
+ output_fn = (
+ input_fn[:-4] + ".hack" if input_fn.endswith(".asm") else input_fn + ".hack"
+ )
+ try:
+ output_file = open(output_fn, "w")
+ except:
+ print(f"Cannot open output file: {output_fn}", file=stderr)
+
+ write_binary(output_file, binary)
+ output_file.close()
+ print(f"Binary written to {output_fn}")
+
+
+if __name__ == "__main__":
+ parser = ArgumentParser()
+ parser.add_argument("-v", "--verbose", action="store_true", help="verbose mode")
+ parser.add_argument("input_fn", help="input file in assembly")
+ args = parser.parse_args()
+ assembler(args.input_fn, args.verbose)