From 6b9d8f151cc04a0590a7fed80ae5b8eb3928952a Mon Sep 17 00:00:00 2001 From: Frederick Yin Date: Sun, 21 Aug 2022 22:08:53 +0800 Subject: Move hack-as up one directory --- projects/06/hack-as/hack-as.min.c | 268 -------------------------------------- 1 file changed, 268 deletions(-) delete mode 100644 projects/06/hack-as/hack-as.min.c (limited to 'projects/06/hack-as/hack-as.min.c') diff --git a/projects/06/hack-as/hack-as.min.c b/projects/06/hack-as/hack-as.min.c deleted file mode 100644 index 5d7252f..0000000 --- a/projects/06/hack-as/hack-as.min.c +++ /dev/null @@ -1,268 +0,0 @@ -#include -#include -#include -#include -#include - -#define MAX_ASM_LINE_LEN 64 -#define INST_CHUNK_LEN 64 -#define MAX_INST_LEN 32768 -#define MAX_ADDR 32767 - -struct symbol { - char *label; - int addr; -}; - -void write_binary(FILE *file, uint16_t *binary, int cnt) { - for (int i = 0; i < cnt; i++) { - char binary_str[17]; - binary_str[16] = '\0'; - for (int b = 0; b < 16; b++) { - binary_str[b] = '0' + (*(binary + i) >> (15 - b)) % 2; - } - fprintf(file, "%s\n", binary_str); - } -} - -uint16_t assemble_inst(char *asm_line) { - // assemble one line of assembly, terminated with \0 - // labels and variables must be replaced with corresponding addresses beforehand - if (*asm_line == '@') { - // A instruction - return atoi(asm_line + 1); - } else { - // C instruction - uint16_t inst = 0xe000; // set 3 MSBs to 1 - char *eq = asm_line; - for (char *c = asm_line; *c != '\0'; c++) { - // find first equal sign (eq == asm_line if not found) - if (*c == '=') { - eq = c; - break; - } - } - - // slice out destination and copy to dest - int dest_len = eq - asm_line; - char *dest = malloc(dest_len + 1); - strncpy(dest, asm_line, dest_len); - dest[dest_len] = '\0'; - if (dest_len == 0) {} // ignore - else if (strcmp(dest, "M") == 0) inst |= 0b001 << 3; - else if (strcmp(dest, "D") == 0) inst |= 0b010 << 3; - else if (strcmp(dest, "MD") == 0) inst |= 0b011 << 3; - else if (strcmp(dest, "A") == 0) inst |= 0b100 << 3; - else if (strcmp(dest, "AM") == 0) inst |= 0b101 << 3; - else if (strcmp(dest, "AD") == 0) inst |= 0b110 << 3; - else if (strcmp(dest, "AMD") == 0) inst |= 0b111 << 3; - free(dest); - - char *semi = eq; - for (; *semi != '\0'; semi++) { - // find jump instruction after semicolon (;) (*semi == '\0' if not found) - if (*semi == ';') break; - } - // ignore if there's no semicolon, or there's nothing after it - if (*semi == '\0' || *(semi + 1) == '\0') {} - else if (strcmp(semi + 1, "JGT") == 0) inst |= 0b001; - else if (strcmp(semi + 1, "JEQ") == 0) inst |= 0b010; - else if (strcmp(semi + 1, "JGE") == 0) inst |= 0b011; - else if (strcmp(semi + 1, "JLT") == 0) inst |= 0b100; - else if (strcmp(semi + 1, "JNE") == 0) inst |= 0b101; - else if (strcmp(semi + 1, "JLE") == 0) inst |= 0b110; - else if (strcmp(semi + 1, "JMP") == 0) inst |= 0b111; - - // slice out computation and copy to comp - int comp_len = (*eq == '=') ? (semi - eq - 1) : (semi - eq); - char *comp = malloc(comp_len + 1); - strncpy(comp, (*eq == '=') ? (eq + 1) : eq, comp_len); - comp[comp_len] = '\0'; - if (strcmp(comp, "0") == 0) inst |= 0b0101010 << 6; - else if (strcmp(comp, "1") == 0) inst |= 0b0111111 << 6; - else if (strcmp(comp, "-1") == 0) inst |= 0b0111010 << 6; - else if (strcmp(comp, "D") == 0) inst |= 0b0001100 << 6; - else if (strcmp(comp, "A") == 0) inst |= 0b0110000 << 6; - else if (strcmp(comp, "M") == 0) inst |= 0b1110000 << 6; - else if (strcmp(comp, "!D") == 0) inst |= 0b0001101 << 6; - else if (strcmp(comp, "!A") == 0) inst |= 0b0110001 << 6; - else if (strcmp(comp, "!M") == 0) inst |= 0b1110001 << 6; - else if (strcmp(comp, "-D") == 0) inst |= 0b0001111 << 6; - else if (strcmp(comp, "-A") == 0) inst |= 0b0110011 << 6; - else if (strcmp(comp, "-M") == 0) inst |= 0b1110011 << 6; - else if (strcmp(comp, "D+1") == 0) inst |= 0b0011111 << 6; - else if (strcmp(comp, "A+1") == 0) inst |= 0b0110111 << 6; - else if (strcmp(comp, "M+1") == 0) inst |= 0b1110111 << 6; - else if (strcmp(comp, "D-1") == 0) inst |= 0b0001110 << 6; - else if (strcmp(comp, "A-1") == 0) inst |= 0b0110010 << 6; - else if (strcmp(comp, "M-1") == 0) inst |= 0b1110010 << 6; - else if (strcmp(comp, "D+A") == 0 - || strcmp(comp, "A+D") == 0) inst |= 0b0000010 << 6; - else if (strcmp(comp, "D+M") == 0 - || strcmp(comp, "M+D") == 0) inst |= 0b1000010 << 6; - else if (strcmp(comp, "D-A") == 0) inst |= 0b0010011 << 6; - else if (strcmp(comp, "D-M") == 0) inst |= 0b1010011 << 6; - else if (strcmp(comp, "A-D") == 0) inst |= 0b0000111 << 6; - else if (strcmp(comp, "M-D") == 0) inst |= 0b1000111 << 6; - else if (strcmp(comp, "D&A") == 0 - || strcmp(comp, "A&D") == 0) inst |= 0b0000000 << 6; - else if (strcmp(comp, "D&M") == 0 - || strcmp(comp, "M&D") == 0) inst |= 0b1000000 << 6; - else if (strcmp(comp, "D|A") == 0 - || strcmp(comp, "A|D") == 0) inst |= 0b0010101 << 6; - else if (strcmp(comp, "D|M") == 0 - || strcmp(comp, "M|D") == 0) inst |= 0b1010101 << 6; - free(comp); - return inst; - } -} - -size_t assembler(char *input_fn) { - // open input file - FILE *input_file = fopen(input_fn, "r"); - // find size of input file - fseek(input_file, 0, SEEK_END); - size_t file_size = ftell(input_file); - fseek(input_file, 0, SEEK_SET); - // read input file - char *file_content = malloc(file_size); - fread(file_content, file_size, 1, input_file); - fclose(input_file); - - // strip away comments, labels, blank lines and whitespace from file_content - // resulting in lines of what looks like instructions in assembly but is not necessarily correct - // labels in parentheses are assigned corresponding addresses in ROM, then collected in `symbols` - // the strings are scattered in the heap but asm_lines collects pointers to them - char **asm_lines = calloc(INST_CHUNK_LEN, sizeof(char*)); - int asm_line_cnt = 0; // no. of lines (metaphorically) written into asm_lines - char *asm_line = malloc(MAX_ASM_LINE_LEN + 1); // one line of (probably) assembly - int asm_char_cnt = 0; // no. of chars written into asm_line - struct symbol symbols[MAX_INST_LEN] = { - {"SP", 0}, {"LCL", 1}, {"ARG", 2}, {"THIS", 3}, {"THAT", 4}, - {"R0", 0}, {"R1", 1}, {"R2", 2}, {"R3", 3}, - {"R4", 4}, {"R5", 5}, {"R6", 6}, {"R7", 7}, - {"R8", 8}, {"R9", 9}, {"R10", 10}, {"R11", 11}, - {"R12", 12}, {"R13", 13}, {"R14", 14}, {"R15", 15}, - {"SCREEN", 16384}, {"KBD", 24576}, - }; - const int predef_symbol_cnt = 23; // no. of predefined symbols - int user_symbol_cnt = 0; - for (size_t i = 0; i < file_size; i++) { - switch (file_content[i]) { - case '\n': - // end of line; try to figure out what's in asm_line - if (asm_char_cnt == 0) continue; // skip blank line or comment line - *(asm_line + asm_char_cnt) = '\0'; - if (*asm_line == '(' && *(asm_line + asm_char_cnt - 1) == ')') { - // this line may be a label; extract label from between the parentheses - char *label = malloc(asm_char_cnt - 1); - strncpy(label, asm_line + 1, asm_char_cnt - 2); - free(asm_line); - *(label + asm_char_cnt - 2) = '\0'; - symbols[predef_symbol_cnt + user_symbol_cnt] = (struct symbol) {label, asm_line_cnt}; - user_symbol_cnt++; - } else { - // this line may be an instruction - // if we used up a chunk, realloc asm_lines - if (asm_line_cnt > 0 && asm_line_cnt % INST_CHUNK_LEN == 0) { - asm_lines = realloc(asm_lines, (asm_line_cnt + INST_CHUNK_LEN) * sizeof(char*)); - } - asm_lines[asm_line_cnt] = asm_line; - asm_line_cnt++; - } - // allocate memory for next line - asm_char_cnt = 0; - asm_line = malloc(MAX_ASM_LINE_LEN + 1); - break; - case '/': - // we encountered a comment - // skip to last char of line - while (i + 1 < file_size && file_content[i + 1] != '\n') i++; - break; - case ' ': - case '\t': - case '\r': - break; // ignore whitespace and CR - default: - *(asm_line + asm_char_cnt) = file_content[i]; - asm_char_cnt++; - } - } - free(asm_line); - free(file_content); - - // find and assign address to variables on the fly - int addr = 16; // variable addresses start at 16 - for (int i = 0; i < asm_line_cnt; i++) { - if (asm_lines[i] == NULL) break; // no more instructions - if (*(asm_lines[i]) != '@') continue; // not an A-instruction - char *addr_str = malloc(strlen(asm_lines[i])); - strcpy(addr_str, asm_lines[i] + 1); // whatever comes after the @ - bool is_symbol = false; - for (char *c = addr_str; *c != '\0'; c++) { - // search for non-numeric chars in addr_str - if (*c < '0' || *c > '9') is_symbol = true; - } - if (!is_symbol) { - free(addr_str); - continue; // address is decimal constant - } - // search for symbol in list - bool found = false; - for (int s = 0; s < predef_symbol_cnt + user_symbol_cnt; s++) { - if (strcmp(addr_str, symbols[s].label) == 0) { - // overwrite asm line with decimal constant - sprintf(asm_lines[i], "@%d", symbols[s].addr); - found = true; - free(addr_str); - break; - } - } - if (!found) { - // add symbol to list - symbols[predef_symbol_cnt + user_symbol_cnt] = (struct symbol) {addr_str, addr}; - sprintf(asm_lines[i], "@%d", addr); - user_symbol_cnt++; - addr++; - } - } - - // start assembling - uint16_t *binary = calloc(32768, 2); - size_t inst_cnt = 0; // current no. of instructions in binary - for (char **line = asm_lines; *line != NULL; line++) { - *(binary + inst_cnt) = assemble_inst(*line); - inst_cnt++; - } - - for (char **line = asm_lines; *line != NULL; line++) free(*line); - free(asm_lines); - for (int s = predef_symbol_cnt; s < predef_symbol_cnt + user_symbol_cnt; s++) { - free(symbols[s].label); - } - - // write binary - // output_fn = input_fn[:-4] + ".hack" if input_fn.endswith(".asm") else input_fn + ".hack" - int input_fn_len = strlen(input_fn); - char *output_fn = malloc(input_fn_len + 6); - strcpy(output_fn, input_fn); - if (input_fn_len >= 4 && strcmp(input_fn + input_fn_len - 4, ".asm") == 0) { - sprintf(output_fn + input_fn_len - 4, ".hack"); - } else { - sprintf(output_fn + input_fn_len, ".hack"); - } - - FILE *output_file = fopen(output_fn, "w"); - write_binary(output_file, binary, inst_cnt); - fclose(output_file); - free(binary); - printf("Binary written to %s\n", output_fn); - free(output_fn); - - return inst_cnt; -} - -int main(int argc, char *argv[]) { - assembler(argv[1]); - return 0; -} -- cgit v1.2.3