from sys import stderr from argparse import ArgumentParser EXIT_CODES = { "file_error": 1, "illegal_char": 2, "size_exceeded": 3, "syntax_error": 4, "addr_error": 5, "symbol_error": 6, } PREDEFINED_SYMBOL_CNT = 23 MAX_INST_LEN = 32768 MAX_ADDR = 32767 PREDEFINED_SYMBOLS = { "SP": 0, "LCL": 1, "ARG": 2, "THIS": 3, "THAT": 4, "R0": 0, "R1": 1, "R2": 2, "R3": 3, "R4": 4, "R5": 5, "R6": 6, "R7": 7, "R8": 8, "R9": 9, "R10": 10, "R11": 11, "R12": 12, "R13": 13, "R14": 14, "R15": 15, "SCREEN": 16384, "KBD": 24576, } DEST = { "": 0b000 << 3, "M": 0b001 << 3, "D": 0b010 << 3, "MD": 0b011 << 3, "A": 0b100 << 3, "AM": 0b101 << 3, "AD": 0b110 << 3, "AMD": 0b111 << 3, } JMP = { "": 0b000, "JGT": 0b001, "JEQ": 0b010, "JGE": 0b011, "JLT": 0b100, "JNE": 0b101, "JLE": 0b110, "JMP": 0b111, } COMP = { "0": 0b0101010 << 6, "1": 0b0111111 << 6, "-1": 0b0111010 << 6, "D": 0b0001100 << 6, "A": 0b0110000 << 6, "M": 0b1110000 << 6, "!D": 0b0001101 << 6, "!A": 0b0110001 << 6, "!M": 0b1110001 << 6, "-D": 0b0001111 << 6, "-A": 0b0110011 << 6, "-M": 0b1110011 << 6, "D+1": 0b0011111 << 6, "A+1": 0b0110111 << 6, "M+1": 0b1110111 << 6, "D-1": 0b0001110 << 6, "A-1": 0b0110010 << 6, "M-1": 0b1110010 << 6, "D+A": 0b0000010 << 6, "A+D": 0b0000010 << 6, "D+M": 0b1000010 << 6, "M+D": 0b1000010 << 6, "D-A": 0b0010011 << 6, "D-M": 0b1010011 << 6, "A-D": 0b0000111 << 6, "M-D": 0b1000111 << 6, "D&A": 0b0000000 << 6, "A&D": 0b0000000 << 6, "D&M": 0b1000000 << 6, "M&D": 0b1000000 << 6, "D|A": 0b0010101 << 6, "A|D": 0b0010101 << 6, "D|M": 0b1010101 << 6, "M|D": 0b1010101 << 6, } def find_illegal_symbol_char(symbol): for c in symbol: if (not c.isascii()) or (not (c.isalnum() or c in "_.$:")): return c return None def print_symbols(symbols): print("====== SYMBOLS =====") print("label\taddr") for label, addr in symbols.items(): print(f"{label}\t{addr}") def print_binary_and_asm(binary, asm_lines): # binary and asm_lines are assumed to be of the same length print("====== RESULTS =====") print("addr\tbinary \tinst") for line_no, bin, asm in zip(range(len(binary)), binary, asm_lines): print(f"{line_no}\t{bin:016b}\t{asm}") def write_binary(file, binary): for inst in binary: file.write(f"{inst:016b}\n") def assemble_inst(asm_line): if asm_line.startswith("@"): # A instruction addr_str = asm_line[1:] addr = int(addr_str) if addr > MAX_ADDR: print(f"Address out of range: {addr}", file=stderr) exit(EXIT_CODES["addr_error"]) return addr else: # dest=comp;jmp inst = 0xE000 dest, _, rhs = asm_line.partition("=") if not rhs: rhs = dest dest = "" comp, _, jmp = rhs.partition(";") if dest not in DEST: print(f"Invalid destination: {dest}", file=stderr) exit(EXIT_CODES["syntax_error"]) inst |= DEST[dest] if jmp not in JMP: print(f"Invalid jump instruction: {jmp}", file=stderr) exit(EXIT_CODES["syntax_error"]) inst |= JMP[jmp] if comp not in COMP: print(f"Invalid computation: {comp}", file=stderr) exit(EXIT_CODES["syntax_error"]) inst |= COMP[comp] return inst def assembler(input_fn, verbose): try: input_file = open(input_fn, "r") except FileNotFoundError: print(f"Cannot open input file: {input_fn}", file=stderr) exit(EXIT_CODES["file_error"]) asm_lines = [] line = input_file.readline() asm_line_cnt = 0 user_symbols = {} while line: line = line.rstrip("\n") # throw away comment and whitespace asm_line = line.partition("//")[0].replace(" ", "").replace("\t", "") if not asm_line: # skip blank or comment line line = input_file.readline() continue if asm_line.startswith("(") and asm_line.endswith(")"): # asm_line may be a label label = asm_line[1:-1] illegal_char = find_illegal_symbol_char(label) if illegal_char is not None: print(f"Illegal character: {illegal_char}", file=stderr) exit(EXIT_CODES["illegal_char"]) if label in user_symbols or label in PREDEFINED_SYMBOLS: print(f"Symbol {label} already exists", file=stderr) exit(EXIT_CODES["symbol_error"]) user_symbols[label] = asm_line_cnt else: # asm_line may be an instruction asm_lines.append(asm_line) asm_line_cnt += 1 if asm_line_cnt > MAX_INST_LEN: print( f"Max number of instruction ({MAX_INST_LEN}) exceeded", file=stderr ) exit(EXIT_CODE_SIZE_EXCEEDED) line = input_file.readline() input_file.close() # find and assign address to variables on the fly addr = 16 # variable addresses start at 16 for line_no, asm_line in enumerate(asm_lines): if not asm_line.startswith("@"): continue addr_str = asm_line[1:] # whatever comes after the @ if not addr_str: print("Address cannot be empty", file=stderr) exit(EXIT_CODES["addr_error"]) if addr_str.isascii() and addr_str.isdecimal(): # address is decimal constant continue if addr_str in PREDEFINED_SYMBOLS: asm_lines[line_no] = f"@{PREDEFINED_SYMBOLS[addr_str]}" elif addr_str in user_symbols: asm_lines[line_no] = f"@{user_symbols[addr_str]}" else: user_symbols[addr_str] = addr asm_lines[line_no] = f"@{addr}" addr += 1 binary = [] for asm_line in asm_lines: binary.append(assemble_inst(asm_line)) if verbose: print_symbols(user_symbols) print_binary_and_asm(binary, asm_lines) output_fn = ( input_fn[:-4] + ".hack" if input_fn.endswith(".asm") else input_fn + ".hack" ) try: output_file = open(output_fn, "w") except: print(f"Cannot open output file: {output_fn}", file=stderr) write_binary(output_file, binary) output_file.close() print(f"Binary written to {output_fn}") if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("-v", "--verbose", action="store_true", help="verbose mode") parser.add_argument("input_fn", help="input file in assembly") args = parser.parse_args() assembler(args.input_fn, args.verbose)