From 6b9d8f151cc04a0590a7fed80ae5b8eb3928952a Mon Sep 17 00:00:00 2001 From: Frederick Yin Date: Sun, 21 Aug 2022 22:08:53 +0800 Subject: Move hack-as up one directory --- projects/hack-as/hack-as.py | 269 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100644 projects/hack-as/hack-as.py (limited to 'projects/hack-as/hack-as.py') diff --git a/projects/hack-as/hack-as.py b/projects/hack-as/hack-as.py new file mode 100644 index 0000000..bb70abc --- /dev/null +++ b/projects/hack-as/hack-as.py @@ -0,0 +1,269 @@ +from sys import stderr +from argparse import ArgumentParser + +EXIT_CODES = { + "file_error": 1, + "illegal_char": 2, + "size_exceeded": 3, + "syntax_error": 4, + "addr_error": 5, + "symbol_error": 6, +} + +PREDEFINED_SYMBOL_CNT = 23 +MAX_INST_LEN = 32768 +MAX_ADDR = 32767 + +PREDEFINED_SYMBOLS = { + "SP": 0, + "LCL": 1, + "ARG": 2, + "THIS": 3, + "THAT": 4, + "R0": 0, + "R1": 1, + "R2": 2, + "R3": 3, + "R4": 4, + "R5": 5, + "R6": 6, + "R7": 7, + "R8": 8, + "R9": 9, + "R10": 10, + "R11": 11, + "R12": 12, + "R13": 13, + "R14": 14, + "R15": 15, + "SCREEN": 16384, + "KBD": 24576, +} + +DEST = { + "": 0b000 << 3, + "M": 0b001 << 3, + "D": 0b010 << 3, + "MD": 0b011 << 3, + "A": 0b100 << 3, + "AM": 0b101 << 3, + "AD": 0b110 << 3, + "AMD": 0b111 << 3, +} + +JMP = { + "": 0b000, + "JGT": 0b001, + "JEQ": 0b010, + "JGE": 0b011, + "JLT": 0b100, + "JNE": 0b101, + "JLE": 0b110, + "JMP": 0b111, +} + + +COMP = { + "0": 0b0101010 << 6, + "1": 0b0111111 << 6, + "-1": 0b0111010 << 6, + "D": 0b0001100 << 6, + "A": 0b0110000 << 6, + "M": 0b1110000 << 6, + "!D": 0b0001101 << 6, + "!A": 0b0110001 << 6, + "!M": 0b1110001 << 6, + "-D": 0b0001111 << 6, + "-A": 0b0110011 << 6, + "-M": 0b1110011 << 6, + "D+1": 0b0011111 << 6, + "A+1": 0b0110111 << 6, + "M+1": 0b1110111 << 6, + "D-1": 0b0001110 << 6, + "A-1": 0b0110010 << 6, + "M-1": 0b1110010 << 6, + "D+A": 0b0000010 << 6, + "A+D": 0b0000010 << 6, + "D+M": 0b1000010 << 6, + "M+D": 0b1000010 << 6, + "D-A": 0b0010011 << 6, + "D-M": 0b1010011 << 6, + "A-D": 0b0000111 << 6, + "M-D": 0b1000111 << 6, + "D&A": 0b0000000 << 6, + "A&D": 0b0000000 << 6, + "D&M": 0b1000000 << 6, + "M&D": 0b1000000 << 6, + "D|A": 0b0010101 << 6, + "A|D": 0b0010101 << 6, + "D|M": 0b1010101 << 6, + "M|D": 0b1010101 << 6, +} + + +def find_illegal_symbol_char(symbol): + for c in symbol: + if (not c.isascii()) or (not (c.isalnum() or c in "_.$:")): + return c + + return None + + +def print_symbols(symbols): + print("====== SYMBOLS =====") + print("label\taddr") + for label, addr in symbols.items(): + print(f"{label}\t{addr}") + + +def print_binary_and_asm(binary, asm_lines): + # binary and asm_lines are assumed to be of the same length + print("====== RESULTS =====") + print("addr\tbinary \tinst") + for line_no, bin, asm in zip(range(len(binary)), binary, asm_lines): + print(f"{line_no}\t{bin:016b}\t{asm}") + + +def write_binary(file, binary): + for inst in binary: + file.write(f"{inst:016b}\n") + + +def assemble_inst(asm_line): + if asm_line.startswith("@"): + # A instruction + addr_str = asm_line[1:] + addr = int(addr_str) + if addr > MAX_ADDR: + print(f"Address out of range: {addr}", file=stderr) + exit(EXIT_CODES["addr_error"]) + + return addr + else: + # dest=comp;jmp + inst = 0xE000 + dest, _, rhs = asm_line.partition("=") + if not rhs: + rhs = dest + dest = "" + + comp, _, jmp = rhs.partition(";") + if dest not in DEST: + print(f"Invalid destination: {dest}", file=stderr) + exit(EXIT_CODES["syntax_error"]) + + inst |= DEST[dest] + + if jmp not in JMP: + print(f"Invalid jump instruction: {jmp}", file=stderr) + exit(EXIT_CODES["syntax_error"]) + + inst |= JMP[jmp] + + if comp not in COMP: + print(f"Invalid computation: {comp}", file=stderr) + exit(EXIT_CODES["syntax_error"]) + + inst |= COMP[comp] + + return inst + + +def assembler(input_fn, verbose): + try: + input_file = open(input_fn, "r") + except FileNotFoundError: + print(f"Cannot open input file: {input_fn}", file=stderr) + exit(EXIT_CODES["file_error"]) + + asm_lines = [] + line = input_file.readline() + asm_line_cnt = 0 + user_symbols = {} + while line: + line = line.rstrip("\n") + # throw away comment and whitespace + asm_line = line.partition("//")[0].replace(" ", "").replace("\t", "") + if not asm_line: + # skip blank or comment line + line = input_file.readline() + continue + + if asm_line.startswith("(") and asm_line.endswith(")"): + # asm_line may be a label + label = asm_line[1:-1] + illegal_char = find_illegal_symbol_char(label) + if illegal_char is not None: + print(f"Illegal character: {illegal_char}", file=stderr) + exit(EXIT_CODES["illegal_char"]) + + if label in user_symbols or label in PREDEFINED_SYMBOLS: + print(f"Symbol {label} already exists", file=stderr) + exit(EXIT_CODES["symbol_error"]) + + user_symbols[label] = asm_line_cnt + else: + # asm_line may be an instruction + asm_lines.append(asm_line) + asm_line_cnt += 1 + if asm_line_cnt > MAX_INST_LEN: + print( + f"Max number of instruction ({MAX_INST_LEN}) exceeded", file=stderr + ) + exit(EXIT_CODE_SIZE_EXCEEDED) + + line = input_file.readline() + + input_file.close() + + # find and assign address to variables on the fly + addr = 16 # variable addresses start at 16 + for line_no, asm_line in enumerate(asm_lines): + if not asm_line.startswith("@"): + continue + + addr_str = asm_line[1:] # whatever comes after the @ + if not addr_str: + print("Address cannot be empty", file=stderr) + exit(EXIT_CODES["addr_error"]) + + if addr_str.isascii() and addr_str.isdecimal(): + # address is decimal constant + continue + + if addr_str in PREDEFINED_SYMBOLS: + asm_lines[line_no] = f"@{PREDEFINED_SYMBOLS[addr_str]}" + elif addr_str in user_symbols: + asm_lines[line_no] = f"@{user_symbols[addr_str]}" + else: + user_symbols[addr_str] = addr + asm_lines[line_no] = f"@{addr}" + addr += 1 + + binary = [] + for asm_line in asm_lines: + binary.append(assemble_inst(asm_line)) + + if verbose: + print_symbols(user_symbols) + print_binary_and_asm(binary, asm_lines) + + output_fn = ( + input_fn[:-4] + ".hack" if input_fn.endswith(".asm") else input_fn + ".hack" + ) + try: + output_file = open(output_fn, "w") + except: + print(f"Cannot open output file: {output_fn}", file=stderr) + + write_binary(output_file, binary) + output_file.close() + print(f"Binary written to {output_fn}") + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("-v", "--verbose", action="store_true", help="verbose mode") + parser.add_argument("input_fn", help="input file in assembly") + args = parser.parse_args() + assembler(args.input_fn, args.verbose) -- cgit v1.2.3