summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Yin <fkfd@fkfd.me>2022-08-18 19:46:59 +0800
committerFrederick Yin <fkfd@fkfd.me>2022-08-18 19:46:59 +0800
commit444007cd01665a11b584c410be8de1cdd25c51b9 (patch)
treec0792bab4d8b4bb79a1834f92b222f8f560bad29
parent055d84eb2943fed2021f7b35da2706c4d6db00ef (diff)
Add hack-as-min.c
-rw-r--r--projects/06/hack-as/hack-as.min.c268
1 files changed, 268 insertions, 0 deletions
diff --git a/projects/06/hack-as/hack-as.min.c b/projects/06/hack-as/hack-as.min.c
new file mode 100644
index 0000000..5d7252f
--- /dev/null
+++ b/projects/06/hack-as/hack-as.min.c
@@ -0,0 +1,268 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+
+#define MAX_ASM_LINE_LEN 64
+#define INST_CHUNK_LEN 64
+#define MAX_INST_LEN 32768
+#define MAX_ADDR 32767
+
+struct symbol {
+ char *label;
+ int addr;
+};
+
+void write_binary(FILE *file, uint16_t *binary, int cnt) {
+ for (int i = 0; i < cnt; i++) {
+ char binary_str[17];
+ binary_str[16] = '\0';
+ for (int b = 0; b < 16; b++) {
+ binary_str[b] = '0' + (*(binary + i) >> (15 - b)) % 2;
+ }
+ fprintf(file, "%s\n", binary_str);
+ }
+}
+
+uint16_t assemble_inst(char *asm_line) {
+ // assemble one line of assembly, terminated with \0
+ // labels and variables must be replaced with corresponding addresses beforehand
+ if (*asm_line == '@') {
+ // A instruction
+ return atoi(asm_line + 1);
+ } else {
+ // C instruction
+ uint16_t inst = 0xe000; // set 3 MSBs to 1
+ char *eq = asm_line;
+ for (char *c = asm_line; *c != '\0'; c++) {
+ // find first equal sign (eq == asm_line if not found)
+ if (*c == '=') {
+ eq = c;
+ break;
+ }
+ }
+
+ // slice out destination and copy to dest
+ int dest_len = eq - asm_line;
+ char *dest = malloc(dest_len + 1);
+ strncpy(dest, asm_line, dest_len);
+ dest[dest_len] = '\0';
+ if (dest_len == 0) {} // ignore
+ else if (strcmp(dest, "M") == 0) inst |= 0b001 << 3;
+ else if (strcmp(dest, "D") == 0) inst |= 0b010 << 3;
+ else if (strcmp(dest, "MD") == 0) inst |= 0b011 << 3;
+ else if (strcmp(dest, "A") == 0) inst |= 0b100 << 3;
+ else if (strcmp(dest, "AM") == 0) inst |= 0b101 << 3;
+ else if (strcmp(dest, "AD") == 0) inst |= 0b110 << 3;
+ else if (strcmp(dest, "AMD") == 0) inst |= 0b111 << 3;
+ free(dest);
+
+ char *semi = eq;
+ for (; *semi != '\0'; semi++) {
+ // find jump instruction after semicolon (;) (*semi == '\0' if not found)
+ if (*semi == ';') break;
+ }
+ // ignore if there's no semicolon, or there's nothing after it
+ if (*semi == '\0' || *(semi + 1) == '\0') {}
+ else if (strcmp(semi + 1, "JGT") == 0) inst |= 0b001;
+ else if (strcmp(semi + 1, "JEQ") == 0) inst |= 0b010;
+ else if (strcmp(semi + 1, "JGE") == 0) inst |= 0b011;
+ else if (strcmp(semi + 1, "JLT") == 0) inst |= 0b100;
+ else if (strcmp(semi + 1, "JNE") == 0) inst |= 0b101;
+ else if (strcmp(semi + 1, "JLE") == 0) inst |= 0b110;
+ else if (strcmp(semi + 1, "JMP") == 0) inst |= 0b111;
+
+ // slice out computation and copy to comp
+ int comp_len = (*eq == '=') ? (semi - eq - 1) : (semi - eq);
+ char *comp = malloc(comp_len + 1);
+ strncpy(comp, (*eq == '=') ? (eq + 1) : eq, comp_len);
+ comp[comp_len] = '\0';
+ if (strcmp(comp, "0") == 0) inst |= 0b0101010 << 6;
+ else if (strcmp(comp, "1") == 0) inst |= 0b0111111 << 6;
+ else if (strcmp(comp, "-1") == 0) inst |= 0b0111010 << 6;
+ else if (strcmp(comp, "D") == 0) inst |= 0b0001100 << 6;
+ else if (strcmp(comp, "A") == 0) inst |= 0b0110000 << 6;
+ else if (strcmp(comp, "M") == 0) inst |= 0b1110000 << 6;
+ else if (strcmp(comp, "!D") == 0) inst |= 0b0001101 << 6;
+ else if (strcmp(comp, "!A") == 0) inst |= 0b0110001 << 6;
+ else if (strcmp(comp, "!M") == 0) inst |= 0b1110001 << 6;
+ else if (strcmp(comp, "-D") == 0) inst |= 0b0001111 << 6;
+ else if (strcmp(comp, "-A") == 0) inst |= 0b0110011 << 6;
+ else if (strcmp(comp, "-M") == 0) inst |= 0b1110011 << 6;
+ else if (strcmp(comp, "D+1") == 0) inst |= 0b0011111 << 6;
+ else if (strcmp(comp, "A+1") == 0) inst |= 0b0110111 << 6;
+ else if (strcmp(comp, "M+1") == 0) inst |= 0b1110111 << 6;
+ else if (strcmp(comp, "D-1") == 0) inst |= 0b0001110 << 6;
+ else if (strcmp(comp, "A-1") == 0) inst |= 0b0110010 << 6;
+ else if (strcmp(comp, "M-1") == 0) inst |= 0b1110010 << 6;
+ else if (strcmp(comp, "D+A") == 0
+ || strcmp(comp, "A+D") == 0) inst |= 0b0000010 << 6;
+ else if (strcmp(comp, "D+M") == 0
+ || strcmp(comp, "M+D") == 0) inst |= 0b1000010 << 6;
+ else if (strcmp(comp, "D-A") == 0) inst |= 0b0010011 << 6;
+ else if (strcmp(comp, "D-M") == 0) inst |= 0b1010011 << 6;
+ else if (strcmp(comp, "A-D") == 0) inst |= 0b0000111 << 6;
+ else if (strcmp(comp, "M-D") == 0) inst |= 0b1000111 << 6;
+ else if (strcmp(comp, "D&A") == 0
+ || strcmp(comp, "A&D") == 0) inst |= 0b0000000 << 6;
+ else if (strcmp(comp, "D&M") == 0
+ || strcmp(comp, "M&D") == 0) inst |= 0b1000000 << 6;
+ else if (strcmp(comp, "D|A") == 0
+ || strcmp(comp, "A|D") == 0) inst |= 0b0010101 << 6;
+ else if (strcmp(comp, "D|M") == 0
+ || strcmp(comp, "M|D") == 0) inst |= 0b1010101 << 6;
+ free(comp);
+ return inst;
+ }
+}
+
+size_t assembler(char *input_fn) {
+ // open input file
+ FILE *input_file = fopen(input_fn, "r");
+ // find size of input file
+ fseek(input_file, 0, SEEK_END);
+ size_t file_size = ftell(input_file);
+ fseek(input_file, 0, SEEK_SET);
+ // read input file
+ char *file_content = malloc(file_size);
+ fread(file_content, file_size, 1, input_file);
+ fclose(input_file);
+
+ // strip away comments, labels, blank lines and whitespace from file_content
+ // resulting in lines of what looks like instructions in assembly but is not necessarily correct
+ // labels in parentheses are assigned corresponding addresses in ROM, then collected in `symbols`
+ // the strings are scattered in the heap but asm_lines collects pointers to them
+ char **asm_lines = calloc(INST_CHUNK_LEN, sizeof(char*));
+ int asm_line_cnt = 0; // no. of lines (metaphorically) written into asm_lines
+ char *asm_line = malloc(MAX_ASM_LINE_LEN + 1); // one line of (probably) assembly
+ int asm_char_cnt = 0; // no. of chars written into asm_line
+ struct symbol symbols[MAX_INST_LEN] = {
+ {"SP", 0}, {"LCL", 1}, {"ARG", 2}, {"THIS", 3}, {"THAT", 4},
+ {"R0", 0}, {"R1", 1}, {"R2", 2}, {"R3", 3},
+ {"R4", 4}, {"R5", 5}, {"R6", 6}, {"R7", 7},
+ {"R8", 8}, {"R9", 9}, {"R10", 10}, {"R11", 11},
+ {"R12", 12}, {"R13", 13}, {"R14", 14}, {"R15", 15},
+ {"SCREEN", 16384}, {"KBD", 24576},
+ };
+ const int predef_symbol_cnt = 23; // no. of predefined symbols
+ int user_symbol_cnt = 0;
+ for (size_t i = 0; i < file_size; i++) {
+ switch (file_content[i]) {
+ case '\n':
+ // end of line; try to figure out what's in asm_line
+ if (asm_char_cnt == 0) continue; // skip blank line or comment line
+ *(asm_line + asm_char_cnt) = '\0';
+ if (*asm_line == '(' && *(asm_line + asm_char_cnt - 1) == ')') {
+ // this line may be a label; extract label from between the parentheses
+ char *label = malloc(asm_char_cnt - 1);
+ strncpy(label, asm_line + 1, asm_char_cnt - 2);
+ free(asm_line);
+ *(label + asm_char_cnt - 2) = '\0';
+ symbols[predef_symbol_cnt + user_symbol_cnt] = (struct symbol) {label, asm_line_cnt};
+ user_symbol_cnt++;
+ } else {
+ // this line may be an instruction
+ // if we used up a chunk, realloc asm_lines
+ if (asm_line_cnt > 0 && asm_line_cnt % INST_CHUNK_LEN == 0) {
+ asm_lines = realloc(asm_lines, (asm_line_cnt + INST_CHUNK_LEN) * sizeof(char*));
+ }
+ asm_lines[asm_line_cnt] = asm_line;
+ asm_line_cnt++;
+ }
+ // allocate memory for next line
+ asm_char_cnt = 0;
+ asm_line = malloc(MAX_ASM_LINE_LEN + 1);
+ break;
+ case '/':
+ // we encountered a comment
+ // skip to last char of line
+ while (i + 1 < file_size && file_content[i + 1] != '\n') i++;
+ break;
+ case ' ':
+ case '\t':
+ case '\r':
+ break; // ignore whitespace and CR
+ default:
+ *(asm_line + asm_char_cnt) = file_content[i];
+ asm_char_cnt++;
+ }
+ }
+ free(asm_line);
+ free(file_content);
+
+ // find and assign address to variables on the fly
+ int addr = 16; // variable addresses start at 16
+ for (int i = 0; i < asm_line_cnt; i++) {
+ if (asm_lines[i] == NULL) break; // no more instructions
+ if (*(asm_lines[i]) != '@') continue; // not an A-instruction
+ char *addr_str = malloc(strlen(asm_lines[i]));
+ strcpy(addr_str, asm_lines[i] + 1); // whatever comes after the @
+ bool is_symbol = false;
+ for (char *c = addr_str; *c != '\0'; c++) {
+ // search for non-numeric chars in addr_str
+ if (*c < '0' || *c > '9') is_symbol = true;
+ }
+ if (!is_symbol) {
+ free(addr_str);
+ continue; // address is decimal constant
+ }
+ // search for symbol in list
+ bool found = false;
+ for (int s = 0; s < predef_symbol_cnt + user_symbol_cnt; s++) {
+ if (strcmp(addr_str, symbols[s].label) == 0) {
+ // overwrite asm line with decimal constant
+ sprintf(asm_lines[i], "@%d", symbols[s].addr);
+ found = true;
+ free(addr_str);
+ break;
+ }
+ }
+ if (!found) {
+ // add symbol to list
+ symbols[predef_symbol_cnt + user_symbol_cnt] = (struct symbol) {addr_str, addr};
+ sprintf(asm_lines[i], "@%d", addr);
+ user_symbol_cnt++;
+ addr++;
+ }
+ }
+
+ // start assembling
+ uint16_t *binary = calloc(32768, 2);
+ size_t inst_cnt = 0; // current no. of instructions in binary
+ for (char **line = asm_lines; *line != NULL; line++) {
+ *(binary + inst_cnt) = assemble_inst(*line);
+ inst_cnt++;
+ }
+
+ for (char **line = asm_lines; *line != NULL; line++) free(*line);
+ free(asm_lines);
+ for (int s = predef_symbol_cnt; s < predef_symbol_cnt + user_symbol_cnt; s++) {
+ free(symbols[s].label);
+ }
+
+ // write binary
+ // output_fn = input_fn[:-4] + ".hack" if input_fn.endswith(".asm") else input_fn + ".hack"
+ int input_fn_len = strlen(input_fn);
+ char *output_fn = malloc(input_fn_len + 6);
+ strcpy(output_fn, input_fn);
+ if (input_fn_len >= 4 && strcmp(input_fn + input_fn_len - 4, ".asm") == 0) {
+ sprintf(output_fn + input_fn_len - 4, ".hack");
+ } else {
+ sprintf(output_fn + input_fn_len, ".hack");
+ }
+
+ FILE *output_file = fopen(output_fn, "w");
+ write_binary(output_file, binary, inst_cnt);
+ fclose(output_file);
+ free(binary);
+ printf("Binary written to %s\n", output_fn);
+ free(output_fn);
+
+ return inst_cnt;
+}
+
+int main(int argc, char *argv[]) {
+ assembler(argv[1]);
+ return 0;
+}