From 6b9d8f151cc04a0590a7fed80ae5b8eb3928952a Mon Sep 17 00:00:00 2001 From: Frederick Yin Date: Sun, 21 Aug 2022 22:08:53 +0800 Subject: Move hack-as up one directory --- projects/06/hack-as/hack-as.c | 384 ------------------------------ projects/06/hack-as/hack-as.min.c | 268 --------------------- projects/06/hack-as/hack-as.py | 269 --------------------- projects/06/hack-as/test/compare.hack | 25 -- projects/06/hack-as/test/conventional.asm | 32 --- projects/06/hack-as/test/valid.asm | 29 --- projects/hack-as/hack-as.c | 384 ++++++++++++++++++++++++++++++ projects/hack-as/hack-as.min.c | 268 +++++++++++++++++++++ projects/hack-as/hack-as.py | 269 +++++++++++++++++++++ projects/hack-as/test/compare.hack | 25 ++ projects/hack-as/test/conventional.asm | 32 +++ projects/hack-as/test/valid.asm | 29 +++ 12 files changed, 1007 insertions(+), 1007 deletions(-) delete mode 100644 projects/06/hack-as/hack-as.c delete mode 100644 projects/06/hack-as/hack-as.min.c delete mode 100644 projects/06/hack-as/hack-as.py delete mode 100644 projects/06/hack-as/test/compare.hack delete mode 100644 projects/06/hack-as/test/conventional.asm delete mode 100644 projects/06/hack-as/test/valid.asm create mode 100644 projects/hack-as/hack-as.c create mode 100644 projects/hack-as/hack-as.min.c create mode 100644 projects/hack-as/hack-as.py create mode 100644 projects/hack-as/test/compare.hack create mode 100644 projects/hack-as/test/conventional.asm create mode 100644 projects/hack-as/test/valid.asm (limited to 'projects') diff --git a/projects/06/hack-as/hack-as.c b/projects/06/hack-as/hack-as.c deleted file mode 100644 index b152fb8..0000000 --- a/projects/06/hack-as/hack-as.c +++ /dev/null @@ -1,384 +0,0 @@ -#include -#include -#include -#include -#include - -#define MAX_ASM_LINE_LEN 64 -#define INST_CHUNK_LEN 64 -#define MAX_INST_LEN 32768 -#define MAX_ADDR 32767 - -#define EXIT_CODE_FILE_ERROR 1 -#define EXIT_CODE_ILLEGAL_CHAR 2 -#define EXIT_CODE_SIZE_EXCEEDED 3 -#define EXIT_CODE_SYNTAX_ERROR 4 -#define EXIT_CODE_ADDR_ERROR 5 - -struct symbol { - char *label; - int addr; -}; - -char find_illegal_symbol_char(char *symbol) { - // symbol should not begin with number - // nand2tetris implementation allows it, but the standard says otherwise - if (*symbol >= '0' && *symbol <= '9') return *symbol; - for (char *c = symbol; *c != '\0'; c++) { - if (!((*c >= 'A' && *c <= 'Z') || (*c >= 'a' && *c <= 'z') - || (*c >= '0' && *c <= '9') - || *c == '_' || *c == '.' || *c == '$' || *c == ':') - ) { - return *c; - } - } - return -1; -} - -void print_symbols(struct symbol *symbols, int cnt) { - printf("====== SYMBOLS =====\nlabel\taddr\n"); - for (int i = 0; i < cnt; i++) { - printf("%s\t%d\n", symbols[i].label, symbols[i].addr); - } -} - -void print_binary_and_asm(uint16_t *binary, char **asm_lines, int cnt) { - printf("\n====== RESULTS =====\naddr\tbinary \tinst\n"); - for (int i = 0; i < cnt; i++) { - char binary_str[17]; - binary_str[16] = '\0'; - for (int b = 0; b < 16; b++) { - binary_str[b] = '0' + (*(binary + i) >> (15 - b)) % 2; - } - printf("%d\t%s\t%s\n", i, binary_str, asm_lines[i]); - } -} - -void write_binary(FILE *file, uint16_t *binary, int cnt) { - for (int i = 0; i < cnt; i++) { - char binary_str[17]; - binary_str[16] = '\0'; - for (int b = 0; b < 16; b++) { - binary_str[b] = '0' + (*(binary + i) >> (15 - b)) % 2; - } - fprintf(file, "%s\n", binary_str); - } -} - -uint16_t assemble_inst(char *asm_line) { - // assemble one line of assembly, terminated with \0 - // labels and variables must be replaced with corresponding addresses beforehand - uint16_t inst = 0; - if (*asm_line == '@') { - // A instruction - char *addr_str = asm_line + 1; - int addr = atoi(addr_str); - if (addr < 0 || addr > MAX_ADDR) { - fprintf(stderr, "Address out of range: %d\n", addr); - exit(EXIT_CODE_ADDR_ERROR); - } - inst = (uint16_t) addr; - } else { - // C instruction - inst = 0xe000; // set 3 MSBs to 1 - char *eq = asm_line; - for (char *c = asm_line; *c != '\0'; c++) { - // find first equal sign (eq == asm_line if not found) - if (*c == '=') { - eq = c; - break; - } - } - - // slice out destination and copy to dest - int dest_len = eq - asm_line; - char *dest = malloc(dest_len + 1); - strncpy(dest, asm_line, dest_len); - dest[dest_len] = '\0'; - if (dest_len == 0) {} // ignore - else if (strcmp(dest, "M") == 0) inst |= 0b001 << 3; - else if (strcmp(dest, "D") == 0) inst |= 0b010 << 3; - else if (strcmp(dest, "MD") == 0) inst |= 0b011 << 3; - else if (strcmp(dest, "A") == 0) inst |= 0b100 << 3; - else if (strcmp(dest, "AM") == 0) inst |= 0b101 << 3; - else if (strcmp(dest, "AD") == 0) inst |= 0b110 << 3; - else if (strcmp(dest, "AMD") == 0) inst |= 0b111 << 3; - else { - fprintf(stderr, "Invalid destination: %s\n", dest); - exit(EXIT_CODE_SYNTAX_ERROR); - } - free(dest); - - char *semi = eq; - for (; *semi != '\0'; semi++) { - // find jump instruction after semicolon (;) (*semi == '\0' if not found) - if (*semi == ';') break; - } - // ignore if there's no semicolon, or there's nothing after it - if (*semi == '\0' || *(semi + 1) == '\0') {} - else if (strcmp(semi + 1, "JGT") == 0) inst |= 0b001; - else if (strcmp(semi + 1, "JEQ") == 0) inst |= 0b010; - else if (strcmp(semi + 1, "JGE") == 0) inst |= 0b011; - else if (strcmp(semi + 1, "JLT") == 0) inst |= 0b100; - else if (strcmp(semi + 1, "JNE") == 0) inst |= 0b101; - else if (strcmp(semi + 1, "JLE") == 0) inst |= 0b110; - else if (strcmp(semi + 1, "JMP") == 0) inst |= 0b111; - else { - fprintf(stderr, "Invalid jump instruction: %s\n", semi + 1); - exit(EXIT_CODE_SYNTAX_ERROR); - } - - // slice out computation and copy to comp - int comp_len = (*eq == '=') ? (semi - eq - 1) : (semi - eq); - char *comp = malloc(comp_len + 1); - strncpy(comp, (*eq == '=') ? (eq + 1) : eq, comp_len); - comp[comp_len] = '\0'; - if (strcmp(comp, "0") == 0) inst |= 0b0101010 << 6; - else if (strcmp(comp, "1") == 0) inst |= 0b0111111 << 6; - else if (strcmp(comp, "-1") == 0) inst |= 0b0111010 << 6; - else if (strcmp(comp, "D") == 0) inst |= 0b0001100 << 6; - else if (strcmp(comp, "A") == 0) inst |= 0b0110000 << 6; - else if (strcmp(comp, "M") == 0) inst |= 0b1110000 << 6; - else if (strcmp(comp, "!D") == 0) inst |= 0b0001101 << 6; - else if (strcmp(comp, "!A") == 0) inst |= 0b0110001 << 6; - else if (strcmp(comp, "!M") == 0) inst |= 0b1110001 << 6; - else if (strcmp(comp, "-D") == 0) inst |= 0b0001111 << 6; - else if (strcmp(comp, "-A") == 0) inst |= 0b0110011 << 6; - else if (strcmp(comp, "-M") == 0) inst |= 0b1110011 << 6; - else if (strcmp(comp, "D+1") == 0) inst |= 0b0011111 << 6; - else if (strcmp(comp, "A+1") == 0) inst |= 0b0110111 << 6; - else if (strcmp(comp, "M+1") == 0) inst |= 0b1110111 << 6; - else if (strcmp(comp, "D-1") == 0) inst |= 0b0001110 << 6; - else if (strcmp(comp, "A-1") == 0) inst |= 0b0110010 << 6; - else if (strcmp(comp, "M-1") == 0) inst |= 0b1110010 << 6; - else if (strcmp(comp, "D+A") == 0 - || strcmp(comp, "A+D") == 0) inst |= 0b0000010 << 6; - else if (strcmp(comp, "D+M") == 0 - || strcmp(comp, "M+D") == 0) inst |= 0b1000010 << 6; - else if (strcmp(comp, "D-A") == 0) inst |= 0b0010011 << 6; - else if (strcmp(comp, "D-M") == 0) inst |= 0b1010011 << 6; - else if (strcmp(comp, "A-D") == 0) inst |= 0b0000111 << 6; - else if (strcmp(comp, "M-D") == 0) inst |= 0b1000111 << 6; - else if (strcmp(comp, "D&A") == 0 - || strcmp(comp, "A&D") == 0) inst |= 0b0000000 << 6; - else if (strcmp(comp, "D&M") == 0 - || strcmp(comp, "M&D") == 0) inst |= 0b1000000 << 6; - else if (strcmp(comp, "D|A") == 0 - || strcmp(comp, "A|D") == 0) inst |= 0b0010101 << 6; - else if (strcmp(comp, "D|M") == 0 - || strcmp(comp, "M|D") == 0) inst |= 0b1010101 << 6; - else { - fprintf(stderr, "Invalid computation: %s\n", comp); - exit(EXIT_CODE_SYNTAX_ERROR); - } - free(comp); - } - return inst; -} - -size_t assembler(char *input_fn, bool verbose) { - // open input file - FILE *input_file = fopen(input_fn, "r"); - if (input_file == NULL) { - fprintf(stderr, "Cannot open input file: %s\n", input_fn); - exit(EXIT_CODE_FILE_ERROR); - } - // find size of input file - fseek(input_file, 0, SEEK_END); - size_t file_size = ftell(input_file); - fseek(input_file, 0, SEEK_SET); - // read input file - char *file_content = malloc(file_size); - fread(file_content, file_size, 1, input_file); - fclose(input_file); - - // strip away comments, labels, blank lines and whitespace from file_content - // resulting in lines of what looks like instructions in assembly but is not necessarily correct - // labels in parentheses are assigned corresponding addresses in ROM, then collected in `symbols` - // the strings are scattered in the heap but asm_lines collects pointers to them - char **asm_lines = calloc(INST_CHUNK_LEN, sizeof(char*)); - int asm_line_cnt = 0; // no. of lines (metaphorically) written into asm_lines - char *asm_line = malloc(MAX_ASM_LINE_LEN + 1); // one line of (probably) assembly - int asm_char_cnt = 0; // no. of chars written into asm_line - struct symbol symbols[MAX_INST_LEN] = { - {"SP", 0}, {"LCL", 1}, {"ARG", 2}, {"THIS", 3}, {"THAT", 4}, - {"R0", 0}, {"R1", 1}, {"R2", 2}, {"R3", 3}, - {"R4", 4}, {"R5", 5}, {"R6", 6}, {"R7", 7}, - {"R8", 8}, {"R9", 9}, {"R10", 10}, {"R11", 11}, - {"R12", 12}, {"R13", 13}, {"R14", 14}, {"R15", 15}, - {"SCREEN", 16384}, {"KBD", 24576}, - }; - const int predef_symbol_cnt = 23; // no. of predefined symbols - int user_symbol_cnt = 0; - for (size_t i = 0; i < file_size; i++) { - switch (file_content[i]) { - case '\n': - // end of line; try to figure out what's in asm_line - if (asm_char_cnt == 0) continue; // skip blank line or comment line - *(asm_line + asm_char_cnt) = '\0'; - if (*asm_line == '(' && *(asm_line + asm_char_cnt - 1) == ')') { - // this line may be a label; extract label from between the parentheses - char *label = malloc(asm_char_cnt - 1); - strncpy(label, asm_line + 1, asm_char_cnt - 2); - free(asm_line); - *(label + asm_char_cnt - 2) = '\0'; - char illegal_char = find_illegal_symbol_char(label); - if (illegal_char != -1) { - fprintf(stderr, "Illegal character: %c\n", illegal_char); - exit(EXIT_CODE_ILLEGAL_CHAR); - } - // TODO: error on repeated label - symbols[predef_symbol_cnt + user_symbol_cnt] = (struct symbol) {label, asm_line_cnt}; - user_symbol_cnt++; - } else { - // this line may be an instruction - // if we used up a chunk, realloc asm_lines - if (asm_line_cnt > 0 && asm_line_cnt % INST_CHUNK_LEN == 0) { - asm_lines = realloc(asm_lines, (asm_line_cnt + INST_CHUNK_LEN) * sizeof(char*)); - } - asm_lines[asm_line_cnt] = asm_line; - asm_line_cnt++; - } - // allocate memory for next line - asm_char_cnt = 0; - asm_line = malloc(MAX_ASM_LINE_LEN + 1); - break; - case '/': - if (i + 1 < file_size && file_content[i + 1] == '/') { - // we encountered a comment - // skip to last char of line - while (i + 1 < file_size && file_content[i + 1] != '\n') i++; - } else { - fprintf(stderr, "Illegal character: /\n"); - exit(EXIT_CODE_ILLEGAL_CHAR); - } - break; - case ' ': - case '\t': - case '\r': - break; // ignore whitespace and CR - default: - *(asm_line + asm_char_cnt) = file_content[i]; - asm_char_cnt++; - if (asm_char_cnt > MAX_ASM_LINE_LEN) { - fprintf(stderr, "Max assembly line length (%d) exceeded\n", MAX_ASM_LINE_LEN); - exit(EXIT_CODE_SIZE_EXCEEDED); - } - } - } - free(asm_line); - free(file_content); - - // find and assign address to variables on the fly - int addr = 16; // variable addresses start at 16 - for (int i = 0; i < asm_line_cnt; i++) { - if (asm_lines[i] == NULL) break; // no more instructions - if (*(asm_lines[i]) != '@') continue; // not an A-instruction - char *addr_str = malloc(strlen(asm_lines[i])); - strcpy(addr_str, asm_lines[i] + 1); // whatever comes after the @ - if (strlen(addr_str) == 0) { - fprintf(stderr, "Address cannot be empty\n"); - exit(EXIT_CODE_SYNTAX_ERROR); - } - bool is_symbol = false; - for (char *c = addr_str; *c != '\0'; c++) { - // search for non-numeric chars in addr_str - if (*c < '0' || *c > '9') { - is_symbol = true; - } - } - if (!is_symbol) { - free(addr_str); - continue; // address is decimal constant - } - char illegal_char = find_illegal_symbol_char(addr_str); - if (illegal_char != -1) { - fprintf(stderr, "Illegal character: %c\n", illegal_char); - exit(EXIT_CODE_ILLEGAL_CHAR); - } - // search for symbol in list - bool found = false; - for (int s = 0; s < predef_symbol_cnt + user_symbol_cnt; s++) { - if (strcmp(addr_str, symbols[s].label) == 0) { - // overwrite asm line with decimal constant - sprintf(asm_lines[i], "@%d", symbols[s].addr); - found = true; - free(addr_str); - break; - } - } - if (!found) { - // add symbol to list - symbols[predef_symbol_cnt + user_symbol_cnt] = (struct symbol) {addr_str, addr}; - sprintf(asm_lines[i], "@%d", addr); - user_symbol_cnt++; - addr++; - } - } - - // start assembling - uint16_t *binary = calloc(32768, 2); - size_t inst_cnt = 0; // current no. of instructions in binary - for (char **line = asm_lines; *line != NULL; line++) { - *(binary + inst_cnt) = assemble_inst(*line); - inst_cnt++; - if (inst_cnt > MAX_INST_LEN) { - fprintf(stderr, "Max number of instruction (%d) exceeded\n", MAX_INST_LEN); - exit(EXIT_CODE_SIZE_EXCEEDED); - } - } - - if (verbose) { - print_symbols(symbols + predef_symbol_cnt, user_symbol_cnt); - print_binary_and_asm(binary, asm_lines, inst_cnt); - printf("\n"); - } - - for (char **line = asm_lines; *line != NULL; line++) free(*line); - free(asm_lines); - for (int s = predef_symbol_cnt; s < predef_symbol_cnt + user_symbol_cnt; s++) { - free(symbols[s].label); - } - - // write binary - // output_fn = input_fn[:-4] + ".hack" if input_fn.endswith(".asm") else input_fn + ".hack" - int input_fn_len = strlen(input_fn); - char *output_fn = malloc(input_fn_len + 6); - strcpy(output_fn, input_fn); - if (input_fn_len >= 4 && strcmp(input_fn + input_fn_len - 4, ".asm") == 0) { - sprintf(output_fn + input_fn_len - 4, ".hack"); - } else { - sprintf(output_fn + input_fn_len, ".hack"); - } - - FILE *output_file = fopen(output_fn, "w"); - if (output_file == NULL) { - fprintf(stderr, "Cannot open output file: %s\n", output_fn); - exit(EXIT_CODE_FILE_ERROR); - } - write_binary(output_file, binary, inst_cnt); - fclose(output_file); - free(binary); - printf("Binary written to %s\n", output_fn); - free(output_fn); - - return inst_cnt; -} - -int main(int argc, char *argv[]) { - char *input_fn = NULL; - bool verbose = false; - for (int i = 1; i < argc; i++) { - if (strcmp(argv[i], "-h") == 0) { - printf("Usage: %s [-v]\n-v -- verbose mode\n", argv[0]); - exit(0); - } else if (strcmp(argv[i], "-v") == 0) { - verbose = true; - } else { - input_fn = argv[i]; - } - } - - assembler(input_fn, verbose); - return 0; -} diff --git a/projects/06/hack-as/hack-as.min.c b/projects/06/hack-as/hack-as.min.c deleted file mode 100644 index 5d7252f..0000000 --- a/projects/06/hack-as/hack-as.min.c +++ /dev/null @@ -1,268 +0,0 @@ -#include -#include -#include -#include -#include - -#define MAX_ASM_LINE_LEN 64 -#define INST_CHUNK_LEN 64 -#define MAX_INST_LEN 32768 -#define MAX_ADDR 32767 - -struct symbol { - char *label; - int addr; -}; - -void write_binary(FILE *file, uint16_t *binary, int cnt) { - for (int i = 0; i < cnt; i++) { - char binary_str[17]; - binary_str[16] = '\0'; - for (int b = 0; b < 16; b++) { - binary_str[b] = '0' + (*(binary + i) >> (15 - b)) % 2; - } - fprintf(file, "%s\n", binary_str); - } -} - -uint16_t assemble_inst(char *asm_line) { - // assemble one line of assembly, terminated with \0 - // labels and variables must be replaced with corresponding addresses beforehand - if (*asm_line == '@') { - // A instruction - return atoi(asm_line + 1); - } else { - // C instruction - uint16_t inst = 0xe000; // set 3 MSBs to 1 - char *eq = asm_line; - for (char *c = asm_line; *c != '\0'; c++) { - // find first equal sign (eq == asm_line if not found) - if (*c == '=') { - eq = c; - break; - } - } - - // slice out destination and copy to dest - int dest_len = eq - asm_line; - char *dest = malloc(dest_len + 1); - strncpy(dest, asm_line, dest_len); - dest[dest_len] = '\0'; - if (dest_len == 0) {} // ignore - else if (strcmp(dest, "M") == 0) inst |= 0b001 << 3; - else if (strcmp(dest, "D") == 0) inst |= 0b010 << 3; - else if (strcmp(dest, "MD") == 0) inst |= 0b011 << 3; - else if (strcmp(dest, "A") == 0) inst |= 0b100 << 3; - else if (strcmp(dest, "AM") == 0) inst |= 0b101 << 3; - else if (strcmp(dest, "AD") == 0) inst |= 0b110 << 3; - else if (strcmp(dest, "AMD") == 0) inst |= 0b111 << 3; - free(dest); - - char *semi = eq; - for (; *semi != '\0'; semi++) { - // find jump instruction after semicolon (;) (*semi == '\0' if not found) - if (*semi == ';') break; - } - // ignore if there's no semicolon, or there's nothing after it - if (*semi == '\0' || *(semi + 1) == '\0') {} - else if (strcmp(semi + 1, "JGT") == 0) inst |= 0b001; - else if (strcmp(semi + 1, "JEQ") == 0) inst |= 0b010; - else if (strcmp(semi + 1, "JGE") == 0) inst |= 0b011; - else if (strcmp(semi + 1, "JLT") == 0) inst |= 0b100; - else if (strcmp(semi + 1, "JNE") == 0) inst |= 0b101; - else if (strcmp(semi + 1, "JLE") == 0) inst |= 0b110; - else if (strcmp(semi + 1, "JMP") == 0) inst |= 0b111; - - // slice out computation and copy to comp - int comp_len = (*eq == '=') ? (semi - eq - 1) : (semi - eq); - char *comp = malloc(comp_len + 1); - strncpy(comp, (*eq == '=') ? (eq + 1) : eq, comp_len); - comp[comp_len] = '\0'; - if (strcmp(comp, "0") == 0) inst |= 0b0101010 << 6; - else if (strcmp(comp, "1") == 0) inst |= 0b0111111 << 6; - else if (strcmp(comp, "-1") == 0) inst |= 0b0111010 << 6; - else if (strcmp(comp, "D") == 0) inst |= 0b0001100 << 6; - else if (strcmp(comp, "A") == 0) inst |= 0b0110000 << 6; - else if (strcmp(comp, "M") == 0) inst |= 0b1110000 << 6; - else if (strcmp(comp, "!D") == 0) inst |= 0b0001101 << 6; - else if (strcmp(comp, "!A") == 0) inst |= 0b0110001 << 6; - else if (strcmp(comp, "!M") == 0) inst |= 0b1110001 << 6; - else if (strcmp(comp, "-D") == 0) inst |= 0b0001111 << 6; - else if (strcmp(comp, "-A") == 0) inst |= 0b0110011 << 6; - else if (strcmp(comp, "-M") == 0) inst |= 0b1110011 << 6; - else if (strcmp(comp, "D+1") == 0) inst |= 0b0011111 << 6; - else if (strcmp(comp, "A+1") == 0) inst |= 0b0110111 << 6; - else if (strcmp(comp, "M+1") == 0) inst |= 0b1110111 << 6; - else if (strcmp(comp, "D-1") == 0) inst |= 0b0001110 << 6; - else if (strcmp(comp, "A-1") == 0) inst |= 0b0110010 << 6; - else if (strcmp(comp, "M-1") == 0) inst |= 0b1110010 << 6; - else if (strcmp(comp, "D+A") == 0 - || strcmp(comp, "A+D") == 0) inst |= 0b0000010 << 6; - else if (strcmp(comp, "D+M") == 0 - || strcmp(comp, "M+D") == 0) inst |= 0b1000010 << 6; - else if (strcmp(comp, "D-A") == 0) inst |= 0b0010011 << 6; - else if (strcmp(comp, "D-M") == 0) inst |= 0b1010011 << 6; - else if (strcmp(comp, "A-D") == 0) inst |= 0b0000111 << 6; - else if (strcmp(comp, "M-D") == 0) inst |= 0b1000111 << 6; - else if (strcmp(comp, "D&A") == 0 - || strcmp(comp, "A&D") == 0) inst |= 0b0000000 << 6; - else if (strcmp(comp, "D&M") == 0 - || strcmp(comp, "M&D") == 0) inst |= 0b1000000 << 6; - else if (strcmp(comp, "D|A") == 0 - || strcmp(comp, "A|D") == 0) inst |= 0b0010101 << 6; - else if (strcmp(comp, "D|M") == 0 - || strcmp(comp, "M|D") == 0) inst |= 0b1010101 << 6; - free(comp); - return inst; - } -} - -size_t assembler(char *input_fn) { - // open input file - FILE *input_file = fopen(input_fn, "r"); - // find size of input file - fseek(input_file, 0, SEEK_END); - size_t file_size = ftell(input_file); - fseek(input_file, 0, SEEK_SET); - // read input file - char *file_content = malloc(file_size); - fread(file_content, file_size, 1, input_file); - fclose(input_file); - - // strip away comments, labels, blank lines and whitespace from file_content - // resulting in lines of what looks like instructions in assembly but is not necessarily correct - // labels in parentheses are assigned corresponding addresses in ROM, then collected in `symbols` - // the strings are scattered in the heap but asm_lines collects pointers to them - char **asm_lines = calloc(INST_CHUNK_LEN, sizeof(char*)); - int asm_line_cnt = 0; // no. of lines (metaphorically) written into asm_lines - char *asm_line = malloc(MAX_ASM_LINE_LEN + 1); // one line of (probably) assembly - int asm_char_cnt = 0; // no. of chars written into asm_line - struct symbol symbols[MAX_INST_LEN] = { - {"SP", 0}, {"LCL", 1}, {"ARG", 2}, {"THIS", 3}, {"THAT", 4}, - {"R0", 0}, {"R1", 1}, {"R2", 2}, {"R3", 3}, - {"R4", 4}, {"R5", 5}, {"R6", 6}, {"R7", 7}, - {"R8", 8}, {"R9", 9}, {"R10", 10}, {"R11", 11}, - {"R12", 12}, {"R13", 13}, {"R14", 14}, {"R15", 15}, - {"SCREEN", 16384}, {"KBD", 24576}, - }; - const int predef_symbol_cnt = 23; // no. of predefined symbols - int user_symbol_cnt = 0; - for (size_t i = 0; i < file_size; i++) { - switch (file_content[i]) { - case '\n': - // end of line; try to figure out what's in asm_line - if (asm_char_cnt == 0) continue; // skip blank line or comment line - *(asm_line + asm_char_cnt) = '\0'; - if (*asm_line == '(' && *(asm_line + asm_char_cnt - 1) == ')') { - // this line may be a label; extract label from between the parentheses - char *label = malloc(asm_char_cnt - 1); - strncpy(label, asm_line + 1, asm_char_cnt - 2); - free(asm_line); - *(label + asm_char_cnt - 2) = '\0'; - symbols[predef_symbol_cnt + user_symbol_cnt] = (struct symbol) {label, asm_line_cnt}; - user_symbol_cnt++; - } else { - // this line may be an instruction - // if we used up a chunk, realloc asm_lines - if (asm_line_cnt > 0 && asm_line_cnt % INST_CHUNK_LEN == 0) { - asm_lines = realloc(asm_lines, (asm_line_cnt + INST_CHUNK_LEN) * sizeof(char*)); - } - asm_lines[asm_line_cnt] = asm_line; - asm_line_cnt++; - } - // allocate memory for next line - asm_char_cnt = 0; - asm_line = malloc(MAX_ASM_LINE_LEN + 1); - break; - case '/': - // we encountered a comment - // skip to last char of line - while (i + 1 < file_size && file_content[i + 1] != '\n') i++; - break; - case ' ': - case '\t': - case '\r': - break; // ignore whitespace and CR - default: - *(asm_line + asm_char_cnt) = file_content[i]; - asm_char_cnt++; - } - } - free(asm_line); - free(file_content); - - // find and assign address to variables on the fly - int addr = 16; // variable addresses start at 16 - for (int i = 0; i < asm_line_cnt; i++) { - if (asm_lines[i] == NULL) break; // no more instructions - if (*(asm_lines[i]) != '@') continue; // not an A-instruction - char *addr_str = malloc(strlen(asm_lines[i])); - strcpy(addr_str, asm_lines[i] + 1); // whatever comes after the @ - bool is_symbol = false; - for (char *c = addr_str; *c != '\0'; c++) { - // search for non-numeric chars in addr_str - if (*c < '0' || *c > '9') is_symbol = true; - } - if (!is_symbol) { - free(addr_str); - continue; // address is decimal constant - } - // search for symbol in list - bool found = false; - for (int s = 0; s < predef_symbol_cnt + user_symbol_cnt; s++) { - if (strcmp(addr_str, symbols[s].label) == 0) { - // overwrite asm line with decimal constant - sprintf(asm_lines[i], "@%d", symbols[s].addr); - found = true; - free(addr_str); - break; - } - } - if (!found) { - // add symbol to list - symbols[predef_symbol_cnt + user_symbol_cnt] = (struct symbol) {addr_str, addr}; - sprintf(asm_lines[i], "@%d", addr); - user_symbol_cnt++; - addr++; - } - } - - // start assembling - uint16_t *binary = calloc(32768, 2); - size_t inst_cnt = 0; // current no. of instructions in binary - for (char **line = asm_lines; *line != NULL; line++) { - *(binary + inst_cnt) = assemble_inst(*line); - inst_cnt++; - } - - for (char **line = asm_lines; *line != NULL; line++) free(*line); - free(asm_lines); - for (int s = predef_symbol_cnt; s < predef_symbol_cnt + user_symbol_cnt; s++) { - free(symbols[s].label); - } - - // write binary - // output_fn = input_fn[:-4] + ".hack" if input_fn.endswith(".asm") else input_fn + ".hack" - int input_fn_len = strlen(input_fn); - char *output_fn = malloc(input_fn_len + 6); - strcpy(output_fn, input_fn); - if (input_fn_len >= 4 && strcmp(input_fn + input_fn_len - 4, ".asm") == 0) { - sprintf(output_fn + input_fn_len - 4, ".hack"); - } else { - sprintf(output_fn + input_fn_len, ".hack"); - } - - FILE *output_file = fopen(output_fn, "w"); - write_binary(output_file, binary, inst_cnt); - fclose(output_file); - free(binary); - printf("Binary written to %s\n", output_fn); - free(output_fn); - - return inst_cnt; -} - -int main(int argc, char *argv[]) { - assembler(argv[1]); - return 0; -} diff --git a/projects/06/hack-as/hack-as.py b/projects/06/hack-as/hack-as.py deleted file mode 100644 index bb70abc..0000000 --- a/projects/06/hack-as/hack-as.py +++ /dev/null @@ -1,269 +0,0 @@ -from sys import stderr -from argparse import ArgumentParser - -EXIT_CODES = { - "file_error": 1, - "illegal_char": 2, - "size_exceeded": 3, - "syntax_error": 4, - "addr_error": 5, - "symbol_error": 6, -} - -PREDEFINED_SYMBOL_CNT = 23 -MAX_INST_LEN = 32768 -MAX_ADDR = 32767 - -PREDEFINED_SYMBOLS = { - "SP": 0, - "LCL": 1, - "ARG": 2, - "THIS": 3, - "THAT": 4, - "R0": 0, - "R1": 1, - "R2": 2, - "R3": 3, - "R4": 4, - "R5": 5, - "R6": 6, - "R7": 7, - "R8": 8, - "R9": 9, - "R10": 10, - "R11": 11, - "R12": 12, - "R13": 13, - "R14": 14, - "R15": 15, - "SCREEN": 16384, - "KBD": 24576, -} - -DEST = { - "": 0b000 << 3, - "M": 0b001 << 3, - "D": 0b010 << 3, - "MD": 0b011 << 3, - "A": 0b100 << 3, - "AM": 0b101 << 3, - "AD": 0b110 << 3, - "AMD": 0b111 << 3, -} - -JMP = { - "": 0b000, - "JGT": 0b001, - "JEQ": 0b010, - "JGE": 0b011, - "JLT": 0b100, - "JNE": 0b101, - "JLE": 0b110, - "JMP": 0b111, -} - - -COMP = { - "0": 0b0101010 << 6, - "1": 0b0111111 << 6, - "-1": 0b0111010 << 6, - "D": 0b0001100 << 6, - "A": 0b0110000 << 6, - "M": 0b1110000 << 6, - "!D": 0b0001101 << 6, - "!A": 0b0110001 << 6, - "!M": 0b1110001 << 6, - "-D": 0b0001111 << 6, - "-A": 0b0110011 << 6, - "-M": 0b1110011 << 6, - "D+1": 0b0011111 << 6, - "A+1": 0b0110111 << 6, - "M+1": 0b1110111 << 6, - "D-1": 0b0001110 << 6, - "A-1": 0b0110010 << 6, - "M-1": 0b1110010 << 6, - "D+A": 0b0000010 << 6, - "A+D": 0b0000010 << 6, - "D+M": 0b1000010 << 6, - "M+D": 0b1000010 << 6, - "D-A": 0b0010011 << 6, - "D-M": 0b1010011 << 6, - "A-D": 0b0000111 << 6, - "M-D": 0b1000111 << 6, - "D&A": 0b0000000 << 6, - "A&D": 0b0000000 << 6, - "D&M": 0b1000000 << 6, - "M&D": 0b1000000 << 6, - "D|A": 0b0010101 << 6, - "A|D": 0b0010101 << 6, - "D|M": 0b1010101 << 6, - "M|D": 0b1010101 << 6, -} - - -def find_illegal_symbol_char(symbol): - for c in symbol: - if (not c.isascii()) or (not (c.isalnum() or c in "_.$:")): - return c - - return None - - -def print_symbols(symbols): - print("====== SYMBOLS =====") - print("label\taddr") - for label, addr in symbols.items(): - print(f"{label}\t{addr}") - - -def print_binary_and_asm(binary, asm_lines): - # binary and asm_lines are assumed to be of the same length - print("====== RESULTS =====") - print("addr\tbinary \tinst") - for line_no, bin, asm in zip(range(len(binary)), binary, asm_lines): - print(f"{line_no}\t{bin:016b}\t{asm}") - - -def write_binary(file, binary): - for inst in binary: - file.write(f"{inst:016b}\n") - - -def assemble_inst(asm_line): - if asm_line.startswith("@"): - # A instruction - addr_str = asm_line[1:] - addr = int(addr_str) - if addr > MAX_ADDR: - print(f"Address out of range: {addr}", file=stderr) - exit(EXIT_CODES["addr_error"]) - - return addr - else: - # dest=comp;jmp - inst = 0xE000 - dest, _, rhs = asm_line.partition("=") - if not rhs: - rhs = dest - dest = "" - - comp, _, jmp = rhs.partition(";") - if dest not in DEST: - print(f"Invalid destination: {dest}", file=stderr) - exit(EXIT_CODES["syntax_error"]) - - inst |= DEST[dest] - - if jmp not in JMP: - print(f"Invalid jump instruction: {jmp}", file=stderr) - exit(EXIT_CODES["syntax_error"]) - - inst |= JMP[jmp] - - if comp not in COMP: - print(f"Invalid computation: {comp}", file=stderr) - exit(EXIT_CODES["syntax_error"]) - - inst |= COMP[comp] - - return inst - - -def assembler(input_fn, verbose): - try: - input_file = open(input_fn, "r") - except FileNotFoundError: - print(f"Cannot open input file: {input_fn}", file=stderr) - exit(EXIT_CODES["file_error"]) - - asm_lines = [] - line = input_file.readline() - asm_line_cnt = 0 - user_symbols = {} - while line: - line = line.rstrip("\n") - # throw away comment and whitespace - asm_line = line.partition("//")[0].replace(" ", "").replace("\t", "") - if not asm_line: - # skip blank or comment line - line = input_file.readline() - continue - - if asm_line.startswith("(") and asm_line.endswith(")"): - # asm_line may be a label - label = asm_line[1:-1] - illegal_char = find_illegal_symbol_char(label) - if illegal_char is not None: - print(f"Illegal character: {illegal_char}", file=stderr) - exit(EXIT_CODES["illegal_char"]) - - if label in user_symbols or label in PREDEFINED_SYMBOLS: - print(f"Symbol {label} already exists", file=stderr) - exit(EXIT_CODES["symbol_error"]) - - user_symbols[label] = asm_line_cnt - else: - # asm_line may be an instruction - asm_lines.append(asm_line) - asm_line_cnt += 1 - if asm_line_cnt > MAX_INST_LEN: - print( - f"Max number of instruction ({MAX_INST_LEN}) exceeded", file=stderr - ) - exit(EXIT_CODE_SIZE_EXCEEDED) - - line = input_file.readline() - - input_file.close() - - # find and assign address to variables on the fly - addr = 16 # variable addresses start at 16 - for line_no, asm_line in enumerate(asm_lines): - if not asm_line.startswith("@"): - continue - - addr_str = asm_line[1:] # whatever comes after the @ - if not addr_str: - print("Address cannot be empty", file=stderr) - exit(EXIT_CODES["addr_error"]) - - if addr_str.isascii() and addr_str.isdecimal(): - # address is decimal constant - continue - - if addr_str in PREDEFINED_SYMBOLS: - asm_lines[line_no] = f"@{PREDEFINED_SYMBOLS[addr_str]}" - elif addr_str in user_symbols: - asm_lines[line_no] = f"@{user_symbols[addr_str]}" - else: - user_symbols[addr_str] = addr - asm_lines[line_no] = f"@{addr}" - addr += 1 - - binary = [] - for asm_line in asm_lines: - binary.append(assemble_inst(asm_line)) - - if verbose: - print_symbols(user_symbols) - print_binary_and_asm(binary, asm_lines) - - output_fn = ( - input_fn[:-4] + ".hack" if input_fn.endswith(".asm") else input_fn + ".hack" - ) - try: - output_file = open(output_fn, "w") - except: - print(f"Cannot open output file: {output_fn}", file=stderr) - - write_binary(output_file, binary) - output_file.close() - print(f"Binary written to {output_fn}") - - -if __name__ == "__main__": - parser = ArgumentParser() - parser.add_argument("-v", "--verbose", action="store_true", help="verbose mode") - parser.add_argument("input_fn", help="input file in assembly") - args = parser.parse_args() - assembler(args.input_fn, args.verbose) diff --git a/projects/06/hack-as/test/compare.hack b/projects/06/hack-as/test/compare.hack deleted file mode 100644 index f33a9d1..0000000 --- a/projects/06/hack-as/test/compare.hack +++ /dev/null @@ -1,25 +0,0 @@ -0000000000000000 -1111110000010000 -0000000000010111 -1110001100000110 -0000000000010000 -1110001100001000 -0100000000000000 -1110110000010000 -0000000000010001 -1110001100001000 -0000000000010001 -1111110000100000 -1110111010001000 -0000000000010001 -1111110000010000 -0000000000100000 -1110000010010000 -0000000000010001 -1110001100001000 -0000000000010000 -1111110010011000 -0000000000001010 -1110001100000001 -0000000000010111 -1110101010000111 diff --git a/projects/06/hack-as/test/conventional.asm b/projects/06/hack-as/test/conventional.asm deleted file mode 100644 index 5510424..0000000 --- a/projects/06/hack-as/test/conventional.asm +++ /dev/null @@ -1,32 +0,0 @@ -// conventional asm a sane person would write -// draw a rectangle on top left of screen -// width 16px, height specified in RAM[0] -@0 -D=M -@INFINITE_LOOP -D;JLE // reject if height is negative -@counter -M=D -@SCREEN -D=A -@address -M=D -(LOOP) - @address - A=M - M=-1 - @address - D=M - @32 - D=D+A - @address - M=D - @counter - MD=M-1 - @LOOP - D;JGT - -(INFINITE_LOOP) - @INFINITE_LOOP - 0;JMP - diff --git a/projects/06/hack-as/test/valid.asm b/projects/06/hack-as/test/valid.asm deleted file mode 100644 index d705f1e..0000000 --- a/projects/06/hack-as/test/valid.asm +++ /dev/null @@ -1,29 +0,0 @@ -// valid asm, but with unnecessary whitespace, = and ; -@ 0 // unnecessary space -D=M -@INFINITE_LOOP -=D;JLE // unnecessary = -@counter -M=D; // unnecessary ; -@SCREEN -D = A // unnecessary spaces -@address -M = D; // unnecessary spaces and ; -( LOOP ) // unnecessary spaces - @address - A=M - M=-1 - @address - D=M - @32 - D=D+A - @address - M=D - @counter - MD=M-1 - @LOOP - D;JGT - -(INFINITE_LOOP) - @INFINITE_LOOP - 0;JMP diff --git a/projects/hack-as/hack-as.c b/projects/hack-as/hack-as.c new file mode 100644 index 0000000..b152fb8 --- /dev/null +++ b/projects/hack-as/hack-as.c @@ -0,0 +1,384 @@ +#include +#include +#include +#include +#include + +#define MAX_ASM_LINE_LEN 64 +#define INST_CHUNK_LEN 64 +#define MAX_INST_LEN 32768 +#define MAX_ADDR 32767 + +#define EXIT_CODE_FILE_ERROR 1 +#define EXIT_CODE_ILLEGAL_CHAR 2 +#define EXIT_CODE_SIZE_EXCEEDED 3 +#define EXIT_CODE_SYNTAX_ERROR 4 +#define EXIT_CODE_ADDR_ERROR 5 + +struct symbol { + char *label; + int addr; +}; + +char find_illegal_symbol_char(char *symbol) { + // symbol should not begin with number + // nand2tetris implementation allows it, but the standard says otherwise + if (*symbol >= '0' && *symbol <= '9') return *symbol; + for (char *c = symbol; *c != '\0'; c++) { + if (!((*c >= 'A' && *c <= 'Z') || (*c >= 'a' && *c <= 'z') + || (*c >= '0' && *c <= '9') + || *c == '_' || *c == '.' || *c == '$' || *c == ':') + ) { + return *c; + } + } + return -1; +} + +void print_symbols(struct symbol *symbols, int cnt) { + printf("====== SYMBOLS =====\nlabel\taddr\n"); + for (int i = 0; i < cnt; i++) { + printf("%s\t%d\n", symbols[i].label, symbols[i].addr); + } +} + +void print_binary_and_asm(uint16_t *binary, char **asm_lines, int cnt) { + printf("\n====== RESULTS =====\naddr\tbinary \tinst\n"); + for (int i = 0; i < cnt; i++) { + char binary_str[17]; + binary_str[16] = '\0'; + for (int b = 0; b < 16; b++) { + binary_str[b] = '0' + (*(binary + i) >> (15 - b)) % 2; + } + printf("%d\t%s\t%s\n", i, binary_str, asm_lines[i]); + } +} + +void write_binary(FILE *file, uint16_t *binary, int cnt) { + for (int i = 0; i < cnt; i++) { + char binary_str[17]; + binary_str[16] = '\0'; + for (int b = 0; b < 16; b++) { + binary_str[b] = '0' + (*(binary + i) >> (15 - b)) % 2; + } + fprintf(file, "%s\n", binary_str); + } +} + +uint16_t assemble_inst(char *asm_line) { + // assemble one line of assembly, terminated with \0 + // labels and variables must be replaced with corresponding addresses beforehand + uint16_t inst = 0; + if (*asm_line == '@') { + // A instruction + char *addr_str = asm_line + 1; + int addr = atoi(addr_str); + if (addr < 0 || addr > MAX_ADDR) { + fprintf(stderr, "Address out of range: %d\n", addr); + exit(EXIT_CODE_ADDR_ERROR); + } + inst = (uint16_t) addr; + } else { + // C instruction + inst = 0xe000; // set 3 MSBs to 1 + char *eq = asm_line; + for (char *c = asm_line; *c != '\0'; c++) { + // find first equal sign (eq == asm_line if not found) + if (*c == '=') { + eq = c; + break; + } + } + + // slice out destination and copy to dest + int dest_len = eq - asm_line; + char *dest = malloc(dest_len + 1); + strncpy(dest, asm_line, dest_len); + dest[dest_len] = '\0'; + if (dest_len == 0) {} // ignore + else if (strcmp(dest, "M") == 0) inst |= 0b001 << 3; + else if (strcmp(dest, "D") == 0) inst |= 0b010 << 3; + else if (strcmp(dest, "MD") == 0) inst |= 0b011 << 3; + else if (strcmp(dest, "A") == 0) inst |= 0b100 << 3; + else if (strcmp(dest, "AM") == 0) inst |= 0b101 << 3; + else if (strcmp(dest, "AD") == 0) inst |= 0b110 << 3; + else if (strcmp(dest, "AMD") == 0) inst |= 0b111 << 3; + else { + fprintf(stderr, "Invalid destination: %s\n", dest); + exit(EXIT_CODE_SYNTAX_ERROR); + } + free(dest); + + char *semi = eq; + for (; *semi != '\0'; semi++) { + // find jump instruction after semicolon (;) (*semi == '\0' if not found) + if (*semi == ';') break; + } + // ignore if there's no semicolon, or there's nothing after it + if (*semi == '\0' || *(semi + 1) == '\0') {} + else if (strcmp(semi + 1, "JGT") == 0) inst |= 0b001; + else if (strcmp(semi + 1, "JEQ") == 0) inst |= 0b010; + else if (strcmp(semi + 1, "JGE") == 0) inst |= 0b011; + else if (strcmp(semi + 1, "JLT") == 0) inst |= 0b100; + else if (strcmp(semi + 1, "JNE") == 0) inst |= 0b101; + else if (strcmp(semi + 1, "JLE") == 0) inst |= 0b110; + else if (strcmp(semi + 1, "JMP") == 0) inst |= 0b111; + else { + fprintf(stderr, "Invalid jump instruction: %s\n", semi + 1); + exit(EXIT_CODE_SYNTAX_ERROR); + } + + // slice out computation and copy to comp + int comp_len = (*eq == '=') ? (semi - eq - 1) : (semi - eq); + char *comp = malloc(comp_len + 1); + strncpy(comp, (*eq == '=') ? (eq + 1) : eq, comp_len); + comp[comp_len] = '\0'; + if (strcmp(comp, "0") == 0) inst |= 0b0101010 << 6; + else if (strcmp(comp, "1") == 0) inst |= 0b0111111 << 6; + else if (strcmp(comp, "-1") == 0) inst |= 0b0111010 << 6; + else if (strcmp(comp, "D") == 0) inst |= 0b0001100 << 6; + else if (strcmp(comp, "A") == 0) inst |= 0b0110000 << 6; + else if (strcmp(comp, "M") == 0) inst |= 0b1110000 << 6; + else if (strcmp(comp, "!D") == 0) inst |= 0b0001101 << 6; + else if (strcmp(comp, "!A") == 0) inst |= 0b0110001 << 6; + else if (strcmp(comp, "!M") == 0) inst |= 0b1110001 << 6; + else if (strcmp(comp, "-D") == 0) inst |= 0b0001111 << 6; + else if (strcmp(comp, "-A") == 0) inst |= 0b0110011 << 6; + else if (strcmp(comp, "-M") == 0) inst |= 0b1110011 << 6; + else if (strcmp(comp, "D+1") == 0) inst |= 0b0011111 << 6; + else if (strcmp(comp, "A+1") == 0) inst |= 0b0110111 << 6; + else if (strcmp(comp, "M+1") == 0) inst |= 0b1110111 << 6; + else if (strcmp(comp, "D-1") == 0) inst |= 0b0001110 << 6; + else if (strcmp(comp, "A-1") == 0) inst |= 0b0110010 << 6; + else if (strcmp(comp, "M-1") == 0) inst |= 0b1110010 << 6; + else if (strcmp(comp, "D+A") == 0 + || strcmp(comp, "A+D") == 0) inst |= 0b0000010 << 6; + else if (strcmp(comp, "D+M") == 0 + || strcmp(comp, "M+D") == 0) inst |= 0b1000010 << 6; + else if (strcmp(comp, "D-A") == 0) inst |= 0b0010011 << 6; + else if (strcmp(comp, "D-M") == 0) inst |= 0b1010011 << 6; + else if (strcmp(comp, "A-D") == 0) inst |= 0b0000111 << 6; + else if (strcmp(comp, "M-D") == 0) inst |= 0b1000111 << 6; + else if (strcmp(comp, "D&A") == 0 + || strcmp(comp, "A&D") == 0) inst |= 0b0000000 << 6; + else if (strcmp(comp, "D&M") == 0 + || strcmp(comp, "M&D") == 0) inst |= 0b1000000 << 6; + else if (strcmp(comp, "D|A") == 0 + || strcmp(comp, "A|D") == 0) inst |= 0b0010101 << 6; + else if (strcmp(comp, "D|M") == 0 + || strcmp(comp, "M|D") == 0) inst |= 0b1010101 << 6; + else { + fprintf(stderr, "Invalid computation: %s\n", comp); + exit(EXIT_CODE_SYNTAX_ERROR); + } + free(comp); + } + return inst; +} + +size_t assembler(char *input_fn, bool verbose) { + // open input file + FILE *input_file = fopen(input_fn, "r"); + if (input_file == NULL) { + fprintf(stderr, "Cannot open input file: %s\n", input_fn); + exit(EXIT_CODE_FILE_ERROR); + } + // find size of input file + fseek(input_file, 0, SEEK_END); + size_t file_size = ftell(input_file); + fseek(input_file, 0, SEEK_SET); + // read input file + char *file_content = malloc(file_size); + fread(file_content, file_size, 1, input_file); + fclose(input_file); + + // strip away comments, labels, blank lines and whitespace from file_content + // resulting in lines of what looks like instructions in assembly but is not necessarily correct + // labels in parentheses are assigned corresponding addresses in ROM, then collected in `symbols` + // the strings are scattered in the heap but asm_lines collects pointers to them + char **asm_lines = calloc(INST_CHUNK_LEN, sizeof(char*)); + int asm_line_cnt = 0; // no. of lines (metaphorically) written into asm_lines + char *asm_line = malloc(MAX_ASM_LINE_LEN + 1); // one line of (probably) assembly + int asm_char_cnt = 0; // no. of chars written into asm_line + struct symbol symbols[MAX_INST_LEN] = { + {"SP", 0}, {"LCL", 1}, {"ARG", 2}, {"THIS", 3}, {"THAT", 4}, + {"R0", 0}, {"R1", 1}, {"R2", 2}, {"R3", 3}, + {"R4", 4}, {"R5", 5}, {"R6", 6}, {"R7", 7}, + {"R8", 8}, {"R9", 9}, {"R10", 10}, {"R11", 11}, + {"R12", 12}, {"R13", 13}, {"R14", 14}, {"R15", 15}, + {"SCREEN", 16384}, {"KBD", 24576}, + }; + const int predef_symbol_cnt = 23; // no. of predefined symbols + int user_symbol_cnt = 0; + for (size_t i = 0; i < file_size; i++) { + switch (file_content[i]) { + case '\n': + // end of line; try to figure out what's in asm_line + if (asm_char_cnt == 0) continue; // skip blank line or comment line + *(asm_line + asm_char_cnt) = '\0'; + if (*asm_line == '(' && *(asm_line + asm_char_cnt - 1) == ')') { + // this line may be a label; extract label from between the parentheses + char *label = malloc(asm_char_cnt - 1); + strncpy(label, asm_line + 1, asm_char_cnt - 2); + free(asm_line); + *(label + asm_char_cnt - 2) = '\0'; + char illegal_char = find_illegal_symbol_char(label); + if (illegal_char != -1) { + fprintf(stderr, "Illegal character: %c\n", illegal_char); + exit(EXIT_CODE_ILLEGAL_CHAR); + } + // TODO: error on repeated label + symbols[predef_symbol_cnt + user_symbol_cnt] = (struct symbol) {label, asm_line_cnt}; + user_symbol_cnt++; + } else { + // this line may be an instruction + // if we used up a chunk, realloc asm_lines + if (asm_line_cnt > 0 && asm_line_cnt % INST_CHUNK_LEN == 0) { + asm_lines = realloc(asm_lines, (asm_line_cnt + INST_CHUNK_LEN) * sizeof(char*)); + } + asm_lines[asm_line_cnt] = asm_line; + asm_line_cnt++; + } + // allocate memory for next line + asm_char_cnt = 0; + asm_line = malloc(MAX_ASM_LINE_LEN + 1); + break; + case '/': + if (i + 1 < file_size && file_content[i + 1] == '/') { + // we encountered a comment + // skip to last char of line + while (i + 1 < file_size && file_content[i + 1] != '\n') i++; + } else { + fprintf(stderr, "Illegal character: /\n"); + exit(EXIT_CODE_ILLEGAL_CHAR); + } + break; + case ' ': + case '\t': + case '\r': + break; // ignore whitespace and CR + default: + *(asm_line + asm_char_cnt) = file_content[i]; + asm_char_cnt++; + if (asm_char_cnt > MAX_ASM_LINE_LEN) { + fprintf(stderr, "Max assembly line length (%d) exceeded\n", MAX_ASM_LINE_LEN); + exit(EXIT_CODE_SIZE_EXCEEDED); + } + } + } + free(asm_line); + free(file_content); + + // find and assign address to variables on the fly + int addr = 16; // variable addresses start at 16 + for (int i = 0; i < asm_line_cnt; i++) { + if (asm_lines[i] == NULL) break; // no more instructions + if (*(asm_lines[i]) != '@') continue; // not an A-instruction + char *addr_str = malloc(strlen(asm_lines[i])); + strcpy(addr_str, asm_lines[i] + 1); // whatever comes after the @ + if (strlen(addr_str) == 0) { + fprintf(stderr, "Address cannot be empty\n"); + exit(EXIT_CODE_SYNTAX_ERROR); + } + bool is_symbol = false; + for (char *c = addr_str; *c != '\0'; c++) { + // search for non-numeric chars in addr_str + if (*c < '0' || *c > '9') { + is_symbol = true; + } + } + if (!is_symbol) { + free(addr_str); + continue; // address is decimal constant + } + char illegal_char = find_illegal_symbol_char(addr_str); + if (illegal_char != -1) { + fprintf(stderr, "Illegal character: %c\n", illegal_char); + exit(EXIT_CODE_ILLEGAL_CHAR); + } + // search for symbol in list + bool found = false; + for (int s = 0; s < predef_symbol_cnt + user_symbol_cnt; s++) { + if (strcmp(addr_str, symbols[s].label) == 0) { + // overwrite asm line with decimal constant + sprintf(asm_lines[i], "@%d", symbols[s].addr); + found = true; + free(addr_str); + break; + } + } + if (!found) { + // add symbol to list + symbols[predef_symbol_cnt + user_symbol_cnt] = (struct symbol) {addr_str, addr}; + sprintf(asm_lines[i], "@%d", addr); + user_symbol_cnt++; + addr++; + } + } + + // start assembling + uint16_t *binary = calloc(32768, 2); + size_t inst_cnt = 0; // current no. of instructions in binary + for (char **line = asm_lines; *line != NULL; line++) { + *(binary + inst_cnt) = assemble_inst(*line); + inst_cnt++; + if (inst_cnt > MAX_INST_LEN) { + fprintf(stderr, "Max number of instruction (%d) exceeded\n", MAX_INST_LEN); + exit(EXIT_CODE_SIZE_EXCEEDED); + } + } + + if (verbose) { + print_symbols(symbols + predef_symbol_cnt, user_symbol_cnt); + print_binary_and_asm(binary, asm_lines, inst_cnt); + printf("\n"); + } + + for (char **line = asm_lines; *line != NULL; line++) free(*line); + free(asm_lines); + for (int s = predef_symbol_cnt; s < predef_symbol_cnt + user_symbol_cnt; s++) { + free(symbols[s].label); + } + + // write binary + // output_fn = input_fn[:-4] + ".hack" if input_fn.endswith(".asm") else input_fn + ".hack" + int input_fn_len = strlen(input_fn); + char *output_fn = malloc(input_fn_len + 6); + strcpy(output_fn, input_fn); + if (input_fn_len >= 4 && strcmp(input_fn + input_fn_len - 4, ".asm") == 0) { + sprintf(output_fn + input_fn_len - 4, ".hack"); + } else { + sprintf(output_fn + input_fn_len, ".hack"); + } + + FILE *output_file = fopen(output_fn, "w"); + if (output_file == NULL) { + fprintf(stderr, "Cannot open output file: %s\n", output_fn); + exit(EXIT_CODE_FILE_ERROR); + } + write_binary(output_file, binary, inst_cnt); + fclose(output_file); + free(binary); + printf("Binary written to %s\n", output_fn); + free(output_fn); + + return inst_cnt; +} + +int main(int argc, char *argv[]) { + char *input_fn = NULL; + bool verbose = false; + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-h") == 0) { + printf("Usage: %s [-v]\n-v -- verbose mode\n", argv[0]); + exit(0); + } else if (strcmp(argv[i], "-v") == 0) { + verbose = true; + } else { + input_fn = argv[i]; + } + } + + assembler(input_fn, verbose); + return 0; +} diff --git a/projects/hack-as/hack-as.min.c b/projects/hack-as/hack-as.min.c new file mode 100644 index 0000000..5d7252f --- /dev/null +++ b/projects/hack-as/hack-as.min.c @@ -0,0 +1,268 @@ +#include +#include +#include +#include +#include + +#define MAX_ASM_LINE_LEN 64 +#define INST_CHUNK_LEN 64 +#define MAX_INST_LEN 32768 +#define MAX_ADDR 32767 + +struct symbol { + char *label; + int addr; +}; + +void write_binary(FILE *file, uint16_t *binary, int cnt) { + for (int i = 0; i < cnt; i++) { + char binary_str[17]; + binary_str[16] = '\0'; + for (int b = 0; b < 16; b++) { + binary_str[b] = '0' + (*(binary + i) >> (15 - b)) % 2; + } + fprintf(file, "%s\n", binary_str); + } +} + +uint16_t assemble_inst(char *asm_line) { + // assemble one line of assembly, terminated with \0 + // labels and variables must be replaced with corresponding addresses beforehand + if (*asm_line == '@') { + // A instruction + return atoi(asm_line + 1); + } else { + // C instruction + uint16_t inst = 0xe000; // set 3 MSBs to 1 + char *eq = asm_line; + for (char *c = asm_line; *c != '\0'; c++) { + // find first equal sign (eq == asm_line if not found) + if (*c == '=') { + eq = c; + break; + } + } + + // slice out destination and copy to dest + int dest_len = eq - asm_line; + char *dest = malloc(dest_len + 1); + strncpy(dest, asm_line, dest_len); + dest[dest_len] = '\0'; + if (dest_len == 0) {} // ignore + else if (strcmp(dest, "M") == 0) inst |= 0b001 << 3; + else if (strcmp(dest, "D") == 0) inst |= 0b010 << 3; + else if (strcmp(dest, "MD") == 0) inst |= 0b011 << 3; + else if (strcmp(dest, "A") == 0) inst |= 0b100 << 3; + else if (strcmp(dest, "AM") == 0) inst |= 0b101 << 3; + else if (strcmp(dest, "AD") == 0) inst |= 0b110 << 3; + else if (strcmp(dest, "AMD") == 0) inst |= 0b111 << 3; + free(dest); + + char *semi = eq; + for (; *semi != '\0'; semi++) { + // find jump instruction after semicolon (;) (*semi == '\0' if not found) + if (*semi == ';') break; + } + // ignore if there's no semicolon, or there's nothing after it + if (*semi == '\0' || *(semi + 1) == '\0') {} + else if (strcmp(semi + 1, "JGT") == 0) inst |= 0b001; + else if (strcmp(semi + 1, "JEQ") == 0) inst |= 0b010; + else if (strcmp(semi + 1, "JGE") == 0) inst |= 0b011; + else if (strcmp(semi + 1, "JLT") == 0) inst |= 0b100; + else if (strcmp(semi + 1, "JNE") == 0) inst |= 0b101; + else if (strcmp(semi + 1, "JLE") == 0) inst |= 0b110; + else if (strcmp(semi + 1, "JMP") == 0) inst |= 0b111; + + // slice out computation and copy to comp + int comp_len = (*eq == '=') ? (semi - eq - 1) : (semi - eq); + char *comp = malloc(comp_len + 1); + strncpy(comp, (*eq == '=') ? (eq + 1) : eq, comp_len); + comp[comp_len] = '\0'; + if (strcmp(comp, "0") == 0) inst |= 0b0101010 << 6; + else if (strcmp(comp, "1") == 0) inst |= 0b0111111 << 6; + else if (strcmp(comp, "-1") == 0) inst |= 0b0111010 << 6; + else if (strcmp(comp, "D") == 0) inst |= 0b0001100 << 6; + else if (strcmp(comp, "A") == 0) inst |= 0b0110000 << 6; + else if (strcmp(comp, "M") == 0) inst |= 0b1110000 << 6; + else if (strcmp(comp, "!D") == 0) inst |= 0b0001101 << 6; + else if (strcmp(comp, "!A") == 0) inst |= 0b0110001 << 6; + else if (strcmp(comp, "!M") == 0) inst |= 0b1110001 << 6; + else if (strcmp(comp, "-D") == 0) inst |= 0b0001111 << 6; + else if (strcmp(comp, "-A") == 0) inst |= 0b0110011 << 6; + else if (strcmp(comp, "-M") == 0) inst |= 0b1110011 << 6; + else if (strcmp(comp, "D+1") == 0) inst |= 0b0011111 << 6; + else if (strcmp(comp, "A+1") == 0) inst |= 0b0110111 << 6; + else if (strcmp(comp, "M+1") == 0) inst |= 0b1110111 << 6; + else if (strcmp(comp, "D-1") == 0) inst |= 0b0001110 << 6; + else if (strcmp(comp, "A-1") == 0) inst |= 0b0110010 << 6; + else if (strcmp(comp, "M-1") == 0) inst |= 0b1110010 << 6; + else if (strcmp(comp, "D+A") == 0 + || strcmp(comp, "A+D") == 0) inst |= 0b0000010 << 6; + else if (strcmp(comp, "D+M") == 0 + || strcmp(comp, "M+D") == 0) inst |= 0b1000010 << 6; + else if (strcmp(comp, "D-A") == 0) inst |= 0b0010011 << 6; + else if (strcmp(comp, "D-M") == 0) inst |= 0b1010011 << 6; + else if (strcmp(comp, "A-D") == 0) inst |= 0b0000111 << 6; + else if (strcmp(comp, "M-D") == 0) inst |= 0b1000111 << 6; + else if (strcmp(comp, "D&A") == 0 + || strcmp(comp, "A&D") == 0) inst |= 0b0000000 << 6; + else if (strcmp(comp, "D&M") == 0 + || strcmp(comp, "M&D") == 0) inst |= 0b1000000 << 6; + else if (strcmp(comp, "D|A") == 0 + || strcmp(comp, "A|D") == 0) inst |= 0b0010101 << 6; + else if (strcmp(comp, "D|M") == 0 + || strcmp(comp, "M|D") == 0) inst |= 0b1010101 << 6; + free(comp); + return inst; + } +} + +size_t assembler(char *input_fn) { + // open input file + FILE *input_file = fopen(input_fn, "r"); + // find size of input file + fseek(input_file, 0, SEEK_END); + size_t file_size = ftell(input_file); + fseek(input_file, 0, SEEK_SET); + // read input file + char *file_content = malloc(file_size); + fread(file_content, file_size, 1, input_file); + fclose(input_file); + + // strip away comments, labels, blank lines and whitespace from file_content + // resulting in lines of what looks like instructions in assembly but is not necessarily correct + // labels in parentheses are assigned corresponding addresses in ROM, then collected in `symbols` + // the strings are scattered in the heap but asm_lines collects pointers to them + char **asm_lines = calloc(INST_CHUNK_LEN, sizeof(char*)); + int asm_line_cnt = 0; // no. of lines (metaphorically) written into asm_lines + char *asm_line = malloc(MAX_ASM_LINE_LEN + 1); // one line of (probably) assembly + int asm_char_cnt = 0; // no. of chars written into asm_line + struct symbol symbols[MAX_INST_LEN] = { + {"SP", 0}, {"LCL", 1}, {"ARG", 2}, {"THIS", 3}, {"THAT", 4}, + {"R0", 0}, {"R1", 1}, {"R2", 2}, {"R3", 3}, + {"R4", 4}, {"R5", 5}, {"R6", 6}, {"R7", 7}, + {"R8", 8}, {"R9", 9}, {"R10", 10}, {"R11", 11}, + {"R12", 12}, {"R13", 13}, {"R14", 14}, {"R15", 15}, + {"SCREEN", 16384}, {"KBD", 24576}, + }; + const int predef_symbol_cnt = 23; // no. of predefined symbols + int user_symbol_cnt = 0; + for (size_t i = 0; i < file_size; i++) { + switch (file_content[i]) { + case '\n': + // end of line; try to figure out what's in asm_line + if (asm_char_cnt == 0) continue; // skip blank line or comment line + *(asm_line + asm_char_cnt) = '\0'; + if (*asm_line == '(' && *(asm_line + asm_char_cnt - 1) == ')') { + // this line may be a label; extract label from between the parentheses + char *label = malloc(asm_char_cnt - 1); + strncpy(label, asm_line + 1, asm_char_cnt - 2); + free(asm_line); + *(label + asm_char_cnt - 2) = '\0'; + symbols[predef_symbol_cnt + user_symbol_cnt] = (struct symbol) {label, asm_line_cnt}; + user_symbol_cnt++; + } else { + // this line may be an instruction + // if we used up a chunk, realloc asm_lines + if (asm_line_cnt > 0 && asm_line_cnt % INST_CHUNK_LEN == 0) { + asm_lines = realloc(asm_lines, (asm_line_cnt + INST_CHUNK_LEN) * sizeof(char*)); + } + asm_lines[asm_line_cnt] = asm_line; + asm_line_cnt++; + } + // allocate memory for next line + asm_char_cnt = 0; + asm_line = malloc(MAX_ASM_LINE_LEN + 1); + break; + case '/': + // we encountered a comment + // skip to last char of line + while (i + 1 < file_size && file_content[i + 1] != '\n') i++; + break; + case ' ': + case '\t': + case '\r': + break; // ignore whitespace and CR + default: + *(asm_line + asm_char_cnt) = file_content[i]; + asm_char_cnt++; + } + } + free(asm_line); + free(file_content); + + // find and assign address to variables on the fly + int addr = 16; // variable addresses start at 16 + for (int i = 0; i < asm_line_cnt; i++) { + if (asm_lines[i] == NULL) break; // no more instructions + if (*(asm_lines[i]) != '@') continue; // not an A-instruction + char *addr_str = malloc(strlen(asm_lines[i])); + strcpy(addr_str, asm_lines[i] + 1); // whatever comes after the @ + bool is_symbol = false; + for (char *c = addr_str; *c != '\0'; c++) { + // search for non-numeric chars in addr_str + if (*c < '0' || *c > '9') is_symbol = true; + } + if (!is_symbol) { + free(addr_str); + continue; // address is decimal constant + } + // search for symbol in list + bool found = false; + for (int s = 0; s < predef_symbol_cnt + user_symbol_cnt; s++) { + if (strcmp(addr_str, symbols[s].label) == 0) { + // overwrite asm line with decimal constant + sprintf(asm_lines[i], "@%d", symbols[s].addr); + found = true; + free(addr_str); + break; + } + } + if (!found) { + // add symbol to list + symbols[predef_symbol_cnt + user_symbol_cnt] = (struct symbol) {addr_str, addr}; + sprintf(asm_lines[i], "@%d", addr); + user_symbol_cnt++; + addr++; + } + } + + // start assembling + uint16_t *binary = calloc(32768, 2); + size_t inst_cnt = 0; // current no. of instructions in binary + for (char **line = asm_lines; *line != NULL; line++) { + *(binary + inst_cnt) = assemble_inst(*line); + inst_cnt++; + } + + for (char **line = asm_lines; *line != NULL; line++) free(*line); + free(asm_lines); + for (int s = predef_symbol_cnt; s < predef_symbol_cnt + user_symbol_cnt; s++) { + free(symbols[s].label); + } + + // write binary + // output_fn = input_fn[:-4] + ".hack" if input_fn.endswith(".asm") else input_fn + ".hack" + int input_fn_len = strlen(input_fn); + char *output_fn = malloc(input_fn_len + 6); + strcpy(output_fn, input_fn); + if (input_fn_len >= 4 && strcmp(input_fn + input_fn_len - 4, ".asm") == 0) { + sprintf(output_fn + input_fn_len - 4, ".hack"); + } else { + sprintf(output_fn + input_fn_len, ".hack"); + } + + FILE *output_file = fopen(output_fn, "w"); + write_binary(output_file, binary, inst_cnt); + fclose(output_file); + free(binary); + printf("Binary written to %s\n", output_fn); + free(output_fn); + + return inst_cnt; +} + +int main(int argc, char *argv[]) { + assembler(argv[1]); + return 0; +} diff --git a/projects/hack-as/hack-as.py b/projects/hack-as/hack-as.py new file mode 100644 index 0000000..bb70abc --- /dev/null +++ b/projects/hack-as/hack-as.py @@ -0,0 +1,269 @@ +from sys import stderr +from argparse import ArgumentParser + +EXIT_CODES = { + "file_error": 1, + "illegal_char": 2, + "size_exceeded": 3, + "syntax_error": 4, + "addr_error": 5, + "symbol_error": 6, +} + +PREDEFINED_SYMBOL_CNT = 23 +MAX_INST_LEN = 32768 +MAX_ADDR = 32767 + +PREDEFINED_SYMBOLS = { + "SP": 0, + "LCL": 1, + "ARG": 2, + "THIS": 3, + "THAT": 4, + "R0": 0, + "R1": 1, + "R2": 2, + "R3": 3, + "R4": 4, + "R5": 5, + "R6": 6, + "R7": 7, + "R8": 8, + "R9": 9, + "R10": 10, + "R11": 11, + "R12": 12, + "R13": 13, + "R14": 14, + "R15": 15, + "SCREEN": 16384, + "KBD": 24576, +} + +DEST = { + "": 0b000 << 3, + "M": 0b001 << 3, + "D": 0b010 << 3, + "MD": 0b011 << 3, + "A": 0b100 << 3, + "AM": 0b101 << 3, + "AD": 0b110 << 3, + "AMD": 0b111 << 3, +} + +JMP = { + "": 0b000, + "JGT": 0b001, + "JEQ": 0b010, + "JGE": 0b011, + "JLT": 0b100, + "JNE": 0b101, + "JLE": 0b110, + "JMP": 0b111, +} + + +COMP = { + "0": 0b0101010 << 6, + "1": 0b0111111 << 6, + "-1": 0b0111010 << 6, + "D": 0b0001100 << 6, + "A": 0b0110000 << 6, + "M": 0b1110000 << 6, + "!D": 0b0001101 << 6, + "!A": 0b0110001 << 6, + "!M": 0b1110001 << 6, + "-D": 0b0001111 << 6, + "-A": 0b0110011 << 6, + "-M": 0b1110011 << 6, + "D+1": 0b0011111 << 6, + "A+1": 0b0110111 << 6, + "M+1": 0b1110111 << 6, + "D-1": 0b0001110 << 6, + "A-1": 0b0110010 << 6, + "M-1": 0b1110010 << 6, + "D+A": 0b0000010 << 6, + "A+D": 0b0000010 << 6, + "D+M": 0b1000010 << 6, + "M+D": 0b1000010 << 6, + "D-A": 0b0010011 << 6, + "D-M": 0b1010011 << 6, + "A-D": 0b0000111 << 6, + "M-D": 0b1000111 << 6, + "D&A": 0b0000000 << 6, + "A&D": 0b0000000 << 6, + "D&M": 0b1000000 << 6, + "M&D": 0b1000000 << 6, + "D|A": 0b0010101 << 6, + "A|D": 0b0010101 << 6, + "D|M": 0b1010101 << 6, + "M|D": 0b1010101 << 6, +} + + +def find_illegal_symbol_char(symbol): + for c in symbol: + if (not c.isascii()) or (not (c.isalnum() or c in "_.$:")): + return c + + return None + + +def print_symbols(symbols): + print("====== SYMBOLS =====") + print("label\taddr") + for label, addr in symbols.items(): + print(f"{label}\t{addr}") + + +def print_binary_and_asm(binary, asm_lines): + # binary and asm_lines are assumed to be of the same length + print("====== RESULTS =====") + print("addr\tbinary \tinst") + for line_no, bin, asm in zip(range(len(binary)), binary, asm_lines): + print(f"{line_no}\t{bin:016b}\t{asm}") + + +def write_binary(file, binary): + for inst in binary: + file.write(f"{inst:016b}\n") + + +def assemble_inst(asm_line): + if asm_line.startswith("@"): + # A instruction + addr_str = asm_line[1:] + addr = int(addr_str) + if addr > MAX_ADDR: + print(f"Address out of range: {addr}", file=stderr) + exit(EXIT_CODES["addr_error"]) + + return addr + else: + # dest=comp;jmp + inst = 0xE000 + dest, _, rhs = asm_line.partition("=") + if not rhs: + rhs = dest + dest = "" + + comp, _, jmp = rhs.partition(";") + if dest not in DEST: + print(f"Invalid destination: {dest}", file=stderr) + exit(EXIT_CODES["syntax_error"]) + + inst |= DEST[dest] + + if jmp not in JMP: + print(f"Invalid jump instruction: {jmp}", file=stderr) + exit(EXIT_CODES["syntax_error"]) + + inst |= JMP[jmp] + + if comp not in COMP: + print(f"Invalid computation: {comp}", file=stderr) + exit(EXIT_CODES["syntax_error"]) + + inst |= COMP[comp] + + return inst + + +def assembler(input_fn, verbose): + try: + input_file = open(input_fn, "r") + except FileNotFoundError: + print(f"Cannot open input file: {input_fn}", file=stderr) + exit(EXIT_CODES["file_error"]) + + asm_lines = [] + line = input_file.readline() + asm_line_cnt = 0 + user_symbols = {} + while line: + line = line.rstrip("\n") + # throw away comment and whitespace + asm_line = line.partition("//")[0].replace(" ", "").replace("\t", "") + if not asm_line: + # skip blank or comment line + line = input_file.readline() + continue + + if asm_line.startswith("(") and asm_line.endswith(")"): + # asm_line may be a label + label = asm_line[1:-1] + illegal_char = find_illegal_symbol_char(label) + if illegal_char is not None: + print(f"Illegal character: {illegal_char}", file=stderr) + exit(EXIT_CODES["illegal_char"]) + + if label in user_symbols or label in PREDEFINED_SYMBOLS: + print(f"Symbol {label} already exists", file=stderr) + exit(EXIT_CODES["symbol_error"]) + + user_symbols[label] = asm_line_cnt + else: + # asm_line may be an instruction + asm_lines.append(asm_line) + asm_line_cnt += 1 + if asm_line_cnt > MAX_INST_LEN: + print( + f"Max number of instruction ({MAX_INST_LEN}) exceeded", file=stderr + ) + exit(EXIT_CODE_SIZE_EXCEEDED) + + line = input_file.readline() + + input_file.close() + + # find and assign address to variables on the fly + addr = 16 # variable addresses start at 16 + for line_no, asm_line in enumerate(asm_lines): + if not asm_line.startswith("@"): + continue + + addr_str = asm_line[1:] # whatever comes after the @ + if not addr_str: + print("Address cannot be empty", file=stderr) + exit(EXIT_CODES["addr_error"]) + + if addr_str.isascii() and addr_str.isdecimal(): + # address is decimal constant + continue + + if addr_str in PREDEFINED_SYMBOLS: + asm_lines[line_no] = f"@{PREDEFINED_SYMBOLS[addr_str]}" + elif addr_str in user_symbols: + asm_lines[line_no] = f"@{user_symbols[addr_str]}" + else: + user_symbols[addr_str] = addr + asm_lines[line_no] = f"@{addr}" + addr += 1 + + binary = [] + for asm_line in asm_lines: + binary.append(assemble_inst(asm_line)) + + if verbose: + print_symbols(user_symbols) + print_binary_and_asm(binary, asm_lines) + + output_fn = ( + input_fn[:-4] + ".hack" if input_fn.endswith(".asm") else input_fn + ".hack" + ) + try: + output_file = open(output_fn, "w") + except: + print(f"Cannot open output file: {output_fn}", file=stderr) + + write_binary(output_file, binary) + output_file.close() + print(f"Binary written to {output_fn}") + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("-v", "--verbose", action="store_true", help="verbose mode") + parser.add_argument("input_fn", help="input file in assembly") + args = parser.parse_args() + assembler(args.input_fn, args.verbose) diff --git a/projects/hack-as/test/compare.hack b/projects/hack-as/test/compare.hack new file mode 100644 index 0000000..f33a9d1 --- /dev/null +++ b/projects/hack-as/test/compare.hack @@ -0,0 +1,25 @@ +0000000000000000 +1111110000010000 +0000000000010111 +1110001100000110 +0000000000010000 +1110001100001000 +0100000000000000 +1110110000010000 +0000000000010001 +1110001100001000 +0000000000010001 +1111110000100000 +1110111010001000 +0000000000010001 +1111110000010000 +0000000000100000 +1110000010010000 +0000000000010001 +1110001100001000 +0000000000010000 +1111110010011000 +0000000000001010 +1110001100000001 +0000000000010111 +1110101010000111 diff --git a/projects/hack-as/test/conventional.asm b/projects/hack-as/test/conventional.asm new file mode 100644 index 0000000..5510424 --- /dev/null +++ b/projects/hack-as/test/conventional.asm @@ -0,0 +1,32 @@ +// conventional asm a sane person would write +// draw a rectangle on top left of screen +// width 16px, height specified in RAM[0] +@0 +D=M +@INFINITE_LOOP +D;JLE // reject if height is negative +@counter +M=D +@SCREEN +D=A +@address +M=D +(LOOP) + @address + A=M + M=-1 + @address + D=M + @32 + D=D+A + @address + M=D + @counter + MD=M-1 + @LOOP + D;JGT + +(INFINITE_LOOP) + @INFINITE_LOOP + 0;JMP + diff --git a/projects/hack-as/test/valid.asm b/projects/hack-as/test/valid.asm new file mode 100644 index 0000000..d705f1e --- /dev/null +++ b/projects/hack-as/test/valid.asm @@ -0,0 +1,29 @@ +// valid asm, but with unnecessary whitespace, = and ; +@ 0 // unnecessary space +D=M +@INFINITE_LOOP +=D;JLE // unnecessary = +@counter +M=D; // unnecessary ; +@SCREEN +D = A // unnecessary spaces +@address +M = D; // unnecessary spaces and ; +( LOOP ) // unnecessary spaces + @address + A=M + M=-1 + @address + D=M + @32 + D=D+A + @address + M=D + @counter + MD=M-1 + @LOOP + D;JGT + +(INFINITE_LOOP) + @INFINITE_LOOP + 0;JMP -- cgit v1.2.3