summaryrefslogtreecommitdiff
path: root/projects/06/hack-as
diff options
context:
space:
mode:
authorFrederick Yin <fkfd@fkfd.me>2022-08-21 22:08:53 +0800
committerFrederick Yin <fkfd@fkfd.me>2022-08-21 22:08:53 +0800
commit6b9d8f151cc04a0590a7fed80ae5b8eb3928952a (patch)
treed1aa6304c866ee761fbb7109ef0c2494bec45636 /projects/06/hack-as
parentdcb924797d68f8de0755471b679d9256516360a9 (diff)
Move hack-as up one directory
Diffstat (limited to 'projects/06/hack-as')
-rw-r--r--projects/06/hack-as/hack-as.c384
-rw-r--r--projects/06/hack-as/hack-as.min.c268
-rw-r--r--projects/06/hack-as/hack-as.py269
-rw-r--r--projects/06/hack-as/test/compare.hack25
-rw-r--r--projects/06/hack-as/test/conventional.asm32
-rw-r--r--projects/06/hack-as/test/valid.asm29
6 files changed, 0 insertions, 1007 deletions
diff --git a/projects/06/hack-as/hack-as.c b/projects/06/hack-as/hack-as.c
deleted file mode 100644
index b152fb8..0000000
--- a/projects/06/hack-as/hack-as.c
+++ /dev/null
@@ -1,384 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <string.h>
-
-#define MAX_ASM_LINE_LEN 64
-#define INST_CHUNK_LEN 64
-#define MAX_INST_LEN 32768
-#define MAX_ADDR 32767
-
-#define EXIT_CODE_FILE_ERROR 1
-#define EXIT_CODE_ILLEGAL_CHAR 2
-#define EXIT_CODE_SIZE_EXCEEDED 3
-#define EXIT_CODE_SYNTAX_ERROR 4
-#define EXIT_CODE_ADDR_ERROR 5
-
-struct symbol {
- char *label;
- int addr;
-};
-
-char find_illegal_symbol_char(char *symbol) {
- // symbol should not begin with number
- // nand2tetris implementation allows it, but the standard says otherwise
- if (*symbol >= '0' && *symbol <= '9') return *symbol;
- for (char *c = symbol; *c != '\0'; c++) {
- if (!((*c >= 'A' && *c <= 'Z') || (*c >= 'a' && *c <= 'z')
- || (*c >= '0' && *c <= '9')
- || *c == '_' || *c == '.' || *c == '$' || *c == ':')
- ) {
- return *c;
- }
- }
- return -1;
-}
-
-void print_symbols(struct symbol *symbols, int cnt) {
- printf("====== SYMBOLS =====\nlabel\taddr\n");
- for (int i = 0; i < cnt; i++) {
- printf("%s\t%d\n", symbols[i].label, symbols[i].addr);
- }
-}
-
-void print_binary_and_asm(uint16_t *binary, char **asm_lines, int cnt) {
- printf("\n====== RESULTS =====\naddr\tbinary \tinst\n");
- for (int i = 0; i < cnt; i++) {
- char binary_str[17];
- binary_str[16] = '\0';
- for (int b = 0; b < 16; b++) {
- binary_str[b] = '0' + (*(binary + i) >> (15 - b)) % 2;
- }
- printf("%d\t%s\t%s\n", i, binary_str, asm_lines[i]);
- }
-}
-
-void write_binary(FILE *file, uint16_t *binary, int cnt) {
- for (int i = 0; i < cnt; i++) {
- char binary_str[17];
- binary_str[16] = '\0';
- for (int b = 0; b < 16; b++) {
- binary_str[b] = '0' + (*(binary + i) >> (15 - b)) % 2;
- }
- fprintf(file, "%s\n", binary_str);
- }
-}
-
-uint16_t assemble_inst(char *asm_line) {
- // assemble one line of assembly, terminated with \0
- // labels and variables must be replaced with corresponding addresses beforehand
- uint16_t inst = 0;
- if (*asm_line == '@') {
- // A instruction
- char *addr_str = asm_line + 1;
- int addr = atoi(addr_str);
- if (addr < 0 || addr > MAX_ADDR) {
- fprintf(stderr, "Address out of range: %d\n", addr);
- exit(EXIT_CODE_ADDR_ERROR);
- }
- inst = (uint16_t) addr;
- } else {
- // C instruction
- inst = 0xe000; // set 3 MSBs to 1
- char *eq = asm_line;
- for (char *c = asm_line; *c != '\0'; c++) {
- // find first equal sign (eq == asm_line if not found)
- if (*c == '=') {
- eq = c;
- break;
- }
- }
-
- // slice out destination and copy to dest
- int dest_len = eq - asm_line;
- char *dest = malloc(dest_len + 1);
- strncpy(dest, asm_line, dest_len);
- dest[dest_len] = '\0';
- if (dest_len == 0) {} // ignore
- else if (strcmp(dest, "M") == 0) inst |= 0b001 << 3;
- else if (strcmp(dest, "D") == 0) inst |= 0b010 << 3;
- else if (strcmp(dest, "MD") == 0) inst |= 0b011 << 3;
- else if (strcmp(dest, "A") == 0) inst |= 0b100 << 3;
- else if (strcmp(dest, "AM") == 0) inst |= 0b101 << 3;
- else if (strcmp(dest, "AD") == 0) inst |= 0b110 << 3;
- else if (strcmp(dest, "AMD") == 0) inst |= 0b111 << 3;
- else {
- fprintf(stderr, "Invalid destination: %s\n", dest);
- exit(EXIT_CODE_SYNTAX_ERROR);
- }
- free(dest);
-
- char *semi = eq;
- for (; *semi != '\0'; semi++) {
- // find jump instruction after semicolon (;) (*semi == '\0' if not found)
- if (*semi == ';') break;
- }
- // ignore if there's no semicolon, or there's nothing after it
- if (*semi == '\0' || *(semi + 1) == '\0') {}
- else if (strcmp(semi + 1, "JGT") == 0) inst |= 0b001;
- else if (strcmp(semi + 1, "JEQ") == 0) inst |= 0b010;
- else if (strcmp(semi + 1, "JGE") == 0) inst |= 0b011;
- else if (strcmp(semi + 1, "JLT") == 0) inst |= 0b100;
- else if (strcmp(semi + 1, "JNE") == 0) inst |= 0b101;
- else if (strcmp(semi + 1, "JLE") == 0) inst |= 0b110;
- else if (strcmp(semi + 1, "JMP") == 0) inst |= 0b111;
- else {
- fprintf(stderr, "Invalid jump instruction: %s\n", semi + 1);
- exit(EXIT_CODE_SYNTAX_ERROR);
- }
-
- // slice out computation and copy to comp
- int comp_len = (*eq == '=') ? (semi - eq - 1) : (semi - eq);
- char *comp = malloc(comp_len + 1);
- strncpy(comp, (*eq == '=') ? (eq + 1) : eq, comp_len);
- comp[comp_len] = '\0';
- if (strcmp(comp, "0") == 0) inst |= 0b0101010 << 6;
- else if (strcmp(comp, "1") == 0) inst |= 0b0111111 << 6;
- else if (strcmp(comp, "-1") == 0) inst |= 0b0111010 << 6;
- else if (strcmp(comp, "D") == 0) inst |= 0b0001100 << 6;
- else if (strcmp(comp, "A") == 0) inst |= 0b0110000 << 6;
- else if (strcmp(comp, "M") == 0) inst |= 0b1110000 << 6;
- else if (strcmp(comp, "!D") == 0) inst |= 0b0001101 << 6;
- else if (strcmp(comp, "!A") == 0) inst |= 0b0110001 << 6;
- else if (strcmp(comp, "!M") == 0) inst |= 0b1110001 << 6;
- else if (strcmp(comp, "-D") == 0) inst |= 0b0001111 << 6;
- else if (strcmp(comp, "-A") == 0) inst |= 0b0110011 << 6;
- else if (strcmp(comp, "-M") == 0) inst |= 0b1110011 << 6;
- else if (strcmp(comp, "D+1") == 0) inst |= 0b0011111 << 6;
- else if (strcmp(comp, "A+1") == 0) inst |= 0b0110111 << 6;
- else if (strcmp(comp, "M+1") == 0) inst |= 0b1110111 << 6;
- else if (strcmp(comp, "D-1") == 0) inst |= 0b0001110 << 6;
- else if (strcmp(comp, "A-1") == 0) inst |= 0b0110010 << 6;
- else if (strcmp(comp, "M-1") == 0) inst |= 0b1110010 << 6;
- else if (strcmp(comp, "D+A") == 0
- || strcmp(comp, "A+D") == 0) inst |= 0b0000010 << 6;
- else if (strcmp(comp, "D+M") == 0
- || strcmp(comp, "M+D") == 0) inst |= 0b1000010 << 6;
- else if (strcmp(comp, "D-A") == 0) inst |= 0b0010011 << 6;
- else if (strcmp(comp, "D-M") == 0) inst |= 0b1010011 << 6;
- else if (strcmp(comp, "A-D") == 0) inst |= 0b0000111 << 6;
- else if (strcmp(comp, "M-D") == 0) inst |= 0b1000111 << 6;
- else if (strcmp(comp, "D&A") == 0
- || strcmp(comp, "A&D") == 0) inst |= 0b0000000 << 6;
- else if (strcmp(comp, "D&M") == 0
- || strcmp(comp, "M&D") == 0) inst |= 0b1000000 << 6;
- else if (strcmp(comp, "D|A") == 0
- || strcmp(comp, "A|D") == 0) inst |= 0b0010101 << 6;
- else if (strcmp(comp, "D|M") == 0
- || strcmp(comp, "M|D") == 0) inst |= 0b1010101 << 6;
- else {
- fprintf(stderr, "Invalid computation: %s\n", comp);
- exit(EXIT_CODE_SYNTAX_ERROR);
- }
- free(comp);
- }
- return inst;
-}
-
-size_t assembler(char *input_fn, bool verbose) {
- // open input file
- FILE *input_file = fopen(input_fn, "r");
- if (input_file == NULL) {
- fprintf(stderr, "Cannot open input file: %s\n", input_fn);
- exit(EXIT_CODE_FILE_ERROR);
- }
- // find size of input file
- fseek(input_file, 0, SEEK_END);
- size_t file_size = ftell(input_file);
- fseek(input_file, 0, SEEK_SET);
- // read input file
- char *file_content = malloc(file_size);
- fread(file_content, file_size, 1, input_file);
- fclose(input_file);
-
- // strip away comments, labels, blank lines and whitespace from file_content
- // resulting in lines of what looks like instructions in assembly but is not necessarily correct
- // labels in parentheses are assigned corresponding addresses in ROM, then collected in `symbols`
- // the strings are scattered in the heap but asm_lines collects pointers to them
- char **asm_lines = calloc(INST_CHUNK_LEN, sizeof(char*));
- int asm_line_cnt = 0; // no. of lines (metaphorically) written into asm_lines
- char *asm_line = malloc(MAX_ASM_LINE_LEN + 1); // one line of (probably) assembly
- int asm_char_cnt = 0; // no. of chars written into asm_line
- struct symbol symbols[MAX_INST_LEN] = {
- {"SP", 0}, {"LCL", 1}, {"ARG", 2}, {"THIS", 3}, {"THAT", 4},
- {"R0", 0}, {"R1", 1}, {"R2", 2}, {"R3", 3},
- {"R4", 4}, {"R5", 5}, {"R6", 6}, {"R7", 7},
- {"R8", 8}, {"R9", 9}, {"R10", 10}, {"R11", 11},
- {"R12", 12}, {"R13", 13}, {"R14", 14}, {"R15", 15},
- {"SCREEN", 16384}, {"KBD", 24576},
- };
- const int predef_symbol_cnt = 23; // no. of predefined symbols
- int user_symbol_cnt = 0;
- for (size_t i = 0; i < file_size; i++) {
- switch (file_content[i]) {
- case '\n':
- // end of line; try to figure out what's in asm_line
- if (asm_char_cnt == 0) continue; // skip blank line or comment line
- *(asm_line + asm_char_cnt) = '\0';
- if (*asm_line == '(' && *(asm_line + asm_char_cnt - 1) == ')') {
- // this line may be a label; extract label from between the parentheses
- char *label = malloc(asm_char_cnt - 1);
- strncpy(label, asm_line + 1, asm_char_cnt - 2);
- free(asm_line);
- *(label + asm_char_cnt - 2) = '\0';
- char illegal_char = find_illegal_symbol_char(label);
- if (illegal_char != -1) {
- fprintf(stderr, "Illegal character: %c\n", illegal_char);
- exit(EXIT_CODE_ILLEGAL_CHAR);
- }
- // TODO: error on repeated label
- symbols[predef_symbol_cnt + user_symbol_cnt] = (struct symbol) {label, asm_line_cnt};
- user_symbol_cnt++;
- } else {
- // this line may be an instruction
- // if we used up a chunk, realloc asm_lines
- if (asm_line_cnt > 0 && asm_line_cnt % INST_CHUNK_LEN == 0) {
- asm_lines = realloc(asm_lines, (asm_line_cnt + INST_CHUNK_LEN) * sizeof(char*));
- }
- asm_lines[asm_line_cnt] = asm_line;
- asm_line_cnt++;
- }
- // allocate memory for next line
- asm_char_cnt = 0;
- asm_line = malloc(MAX_ASM_LINE_LEN + 1);
- break;
- case '/':
- if (i + 1 < file_size && file_content[i + 1] == '/') {
- // we encountered a comment
- // skip to last char of line
- while (i + 1 < file_size && file_content[i + 1] != '\n') i++;
- } else {
- fprintf(stderr, "Illegal character: /\n");
- exit(EXIT_CODE_ILLEGAL_CHAR);
- }
- break;
- case ' ':
- case '\t':
- case '\r':
- break; // ignore whitespace and CR
- default:
- *(asm_line + asm_char_cnt) = file_content[i];
- asm_char_cnt++;
- if (asm_char_cnt > MAX_ASM_LINE_LEN) {
- fprintf(stderr, "Max assembly line length (%d) exceeded\n", MAX_ASM_LINE_LEN);
- exit(EXIT_CODE_SIZE_EXCEEDED);
- }
- }
- }
- free(asm_line);
- free(file_content);
-
- // find and assign address to variables on the fly
- int addr = 16; // variable addresses start at 16
- for (int i = 0; i < asm_line_cnt; i++) {
- if (asm_lines[i] == NULL) break; // no more instructions
- if (*(asm_lines[i]) != '@') continue; // not an A-instruction
- char *addr_str = malloc(strlen(asm_lines[i]));
- strcpy(addr_str, asm_lines[i] + 1); // whatever comes after the @
- if (strlen(addr_str) == 0) {
- fprintf(stderr, "Address cannot be empty\n");
- exit(EXIT_CODE_SYNTAX_ERROR);
- }
- bool is_symbol = false;
- for (char *c = addr_str; *c != '\0'; c++) {
- // search for non-numeric chars in addr_str
- if (*c < '0' || *c > '9') {
- is_symbol = true;
- }
- }
- if (!is_symbol) {
- free(addr_str);
- continue; // address is decimal constant
- }
- char illegal_char = find_illegal_symbol_char(addr_str);
- if (illegal_char != -1) {
- fprintf(stderr, "Illegal character: %c\n", illegal_char);
- exit(EXIT_CODE_ILLEGAL_CHAR);
- }
- // search for symbol in list
- bool found = false;
- for (int s = 0; s < predef_symbol_cnt + user_symbol_cnt; s++) {
- if (strcmp(addr_str, symbols[s].label) == 0) {
- // overwrite asm line with decimal constant
- sprintf(asm_lines[i], "@%d", symbols[s].addr);
- found = true;
- free(addr_str);
- break;
- }
- }
- if (!found) {
- // add symbol to list
- symbols[predef_symbol_cnt + user_symbol_cnt] = (struct symbol) {addr_str, addr};
- sprintf(asm_lines[i], "@%d", addr);
- user_symbol_cnt++;
- addr++;
- }
- }
-
- // start assembling
- uint16_t *binary = calloc(32768, 2);
- size_t inst_cnt = 0; // current no. of instructions in binary
- for (char **line = asm_lines; *line != NULL; line++) {
- *(binary + inst_cnt) = assemble_inst(*line);
- inst_cnt++;
- if (inst_cnt > MAX_INST_LEN) {
- fprintf(stderr, "Max number of instruction (%d) exceeded\n", MAX_INST_LEN);
- exit(EXIT_CODE_SIZE_EXCEEDED);
- }
- }
-
- if (verbose) {
- print_symbols(symbols + predef_symbol_cnt, user_symbol_cnt);
- print_binary_and_asm(binary, asm_lines, inst_cnt);
- printf("\n");
- }
-
- for (char **line = asm_lines; *line != NULL; line++) free(*line);
- free(asm_lines);
- for (int s = predef_symbol_cnt; s < predef_symbol_cnt + user_symbol_cnt; s++) {
- free(symbols[s].label);
- }
-
- // write binary
- // output_fn = input_fn[:-4] + ".hack" if input_fn.endswith(".asm") else input_fn + ".hack"
- int input_fn_len = strlen(input_fn);
- char *output_fn = malloc(input_fn_len + 6);
- strcpy(output_fn, input_fn);
- if (input_fn_len >= 4 && strcmp(input_fn + input_fn_len - 4, ".asm") == 0) {
- sprintf(output_fn + input_fn_len - 4, ".hack");
- } else {
- sprintf(output_fn + input_fn_len, ".hack");
- }
-
- FILE *output_file = fopen(output_fn, "w");
- if (output_file == NULL) {
- fprintf(stderr, "Cannot open output file: %s\n", output_fn);
- exit(EXIT_CODE_FILE_ERROR);
- }
- write_binary(output_file, binary, inst_cnt);
- fclose(output_file);
- free(binary);
- printf("Binary written to %s\n", output_fn);
- free(output_fn);
-
- return inst_cnt;
-}
-
-int main(int argc, char *argv[]) {
- char *input_fn = NULL;
- bool verbose = false;
- for (int i = 1; i < argc; i++) {
- if (strcmp(argv[i], "-h") == 0) {
- printf("Usage: %s <input.asm> [-v]\n-v -- verbose mode\n", argv[0]);
- exit(0);
- } else if (strcmp(argv[i], "-v") == 0) {
- verbose = true;
- } else {
- input_fn = argv[i];
- }
- }
-
- assembler(input_fn, verbose);
- return 0;
-}
diff --git a/projects/06/hack-as/hack-as.min.c b/projects/06/hack-as/hack-as.min.c
deleted file mode 100644
index 5d7252f..0000000
--- a/projects/06/hack-as/hack-as.min.c
+++ /dev/null
@@ -1,268 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <string.h>
-
-#define MAX_ASM_LINE_LEN 64
-#define INST_CHUNK_LEN 64
-#define MAX_INST_LEN 32768
-#define MAX_ADDR 32767
-
-struct symbol {
- char *label;
- int addr;
-};
-
-void write_binary(FILE *file, uint16_t *binary, int cnt) {
- for (int i = 0; i < cnt; i++) {
- char binary_str[17];
- binary_str[16] = '\0';
- for (int b = 0; b < 16; b++) {
- binary_str[b] = '0' + (*(binary + i) >> (15 - b)) % 2;
- }
- fprintf(file, "%s\n", binary_str);
- }
-}
-
-uint16_t assemble_inst(char *asm_line) {
- // assemble one line of assembly, terminated with \0
- // labels and variables must be replaced with corresponding addresses beforehand
- if (*asm_line == '@') {
- // A instruction
- return atoi(asm_line + 1);
- } else {
- // C instruction
- uint16_t inst = 0xe000; // set 3 MSBs to 1
- char *eq = asm_line;
- for (char *c = asm_line; *c != '\0'; c++) {
- // find first equal sign (eq == asm_line if not found)
- if (*c == '=') {
- eq = c;
- break;
- }
- }
-
- // slice out destination and copy to dest
- int dest_len = eq - asm_line;
- char *dest = malloc(dest_len + 1);
- strncpy(dest, asm_line, dest_len);
- dest[dest_len] = '\0';
- if (dest_len == 0) {} // ignore
- else if (strcmp(dest, "M") == 0) inst |= 0b001 << 3;
- else if (strcmp(dest, "D") == 0) inst |= 0b010 << 3;
- else if (strcmp(dest, "MD") == 0) inst |= 0b011 << 3;
- else if (strcmp(dest, "A") == 0) inst |= 0b100 << 3;
- else if (strcmp(dest, "AM") == 0) inst |= 0b101 << 3;
- else if (strcmp(dest, "AD") == 0) inst |= 0b110 << 3;
- else if (strcmp(dest, "AMD") == 0) inst |= 0b111 << 3;
- free(dest);
-
- char *semi = eq;
- for (; *semi != '\0'; semi++) {
- // find jump instruction after semicolon (;) (*semi == '\0' if not found)
- if (*semi == ';') break;
- }
- // ignore if there's no semicolon, or there's nothing after it
- if (*semi == '\0' || *(semi + 1) == '\0') {}
- else if (strcmp(semi + 1, "JGT") == 0) inst |= 0b001;
- else if (strcmp(semi + 1, "JEQ") == 0) inst |= 0b010;
- else if (strcmp(semi + 1, "JGE") == 0) inst |= 0b011;
- else if (strcmp(semi + 1, "JLT") == 0) inst |= 0b100;
- else if (strcmp(semi + 1, "JNE") == 0) inst |= 0b101;
- else if (strcmp(semi + 1, "JLE") == 0) inst |= 0b110;
- else if (strcmp(semi + 1, "JMP") == 0) inst |= 0b111;
-
- // slice out computation and copy to comp
- int comp_len = (*eq == '=') ? (semi - eq - 1) : (semi - eq);
- char *comp = malloc(comp_len + 1);
- strncpy(comp, (*eq == '=') ? (eq + 1) : eq, comp_len);
- comp[comp_len] = '\0';
- if (strcmp(comp, "0") == 0) inst |= 0b0101010 << 6;
- else if (strcmp(comp, "1") == 0) inst |= 0b0111111 << 6;
- else if (strcmp(comp, "-1") == 0) inst |= 0b0111010 << 6;
- else if (strcmp(comp, "D") == 0) inst |= 0b0001100 << 6;
- else if (strcmp(comp, "A") == 0) inst |= 0b0110000 << 6;
- else if (strcmp(comp, "M") == 0) inst |= 0b1110000 << 6;
- else if (strcmp(comp, "!D") == 0) inst |= 0b0001101 << 6;
- else if (strcmp(comp, "!A") == 0) inst |= 0b0110001 << 6;
- else if (strcmp(comp, "!M") == 0) inst |= 0b1110001 << 6;
- else if (strcmp(comp, "-D") == 0) inst |= 0b0001111 << 6;
- else if (strcmp(comp, "-A") == 0) inst |= 0b0110011 << 6;
- else if (strcmp(comp, "-M") == 0) inst |= 0b1110011 << 6;
- else if (strcmp(comp, "D+1") == 0) inst |= 0b0011111 << 6;
- else if (strcmp(comp, "A+1") == 0) inst |= 0b0110111 << 6;
- else if (strcmp(comp, "M+1") == 0) inst |= 0b1110111 << 6;
- else if (strcmp(comp, "D-1") == 0) inst |= 0b0001110 << 6;
- else if (strcmp(comp, "A-1") == 0) inst |= 0b0110010 << 6;
- else if (strcmp(comp, "M-1") == 0) inst |= 0b1110010 << 6;
- else if (strcmp(comp, "D+A") == 0
- || strcmp(comp, "A+D") == 0) inst |= 0b0000010 << 6;
- else if (strcmp(comp, "D+M") == 0
- || strcmp(comp, "M+D") == 0) inst |= 0b1000010 << 6;
- else if (strcmp(comp, "D-A") == 0) inst |= 0b0010011 << 6;
- else if (strcmp(comp, "D-M") == 0) inst |= 0b1010011 << 6;
- else if (strcmp(comp, "A-D") == 0) inst |= 0b0000111 << 6;
- else if (strcmp(comp, "M-D") == 0) inst |= 0b1000111 << 6;
- else if (strcmp(comp, "D&A") == 0
- || strcmp(comp, "A&D") == 0) inst |= 0b0000000 << 6;
- else if (strcmp(comp, "D&M") == 0
- || strcmp(comp, "M&D") == 0) inst |= 0b1000000 << 6;
- else if (strcmp(comp, "D|A") == 0
- || strcmp(comp, "A|D") == 0) inst |= 0b0010101 << 6;
- else if (strcmp(comp, "D|M") == 0
- || strcmp(comp, "M|D") == 0) inst |= 0b1010101 << 6;
- free(comp);
- return inst;
- }
-}
-
-size_t assembler(char *input_fn) {
- // open input file
- FILE *input_file = fopen(input_fn, "r");
- // find size of input file
- fseek(input_file, 0, SEEK_END);
- size_t file_size = ftell(input_file);
- fseek(input_file, 0, SEEK_SET);
- // read input file
- char *file_content = malloc(file_size);
- fread(file_content, file_size, 1, input_file);
- fclose(input_file);
-
- // strip away comments, labels, blank lines and whitespace from file_content
- // resulting in lines of what looks like instructions in assembly but is not necessarily correct
- // labels in parentheses are assigned corresponding addresses in ROM, then collected in `symbols`
- // the strings are scattered in the heap but asm_lines collects pointers to them
- char **asm_lines = calloc(INST_CHUNK_LEN, sizeof(char*));
- int asm_line_cnt = 0; // no. of lines (metaphorically) written into asm_lines
- char *asm_line = malloc(MAX_ASM_LINE_LEN + 1); // one line of (probably) assembly
- int asm_char_cnt = 0; // no. of chars written into asm_line
- struct symbol symbols[MAX_INST_LEN] = {
- {"SP", 0}, {"LCL", 1}, {"ARG", 2}, {"THIS", 3}, {"THAT", 4},
- {"R0", 0}, {"R1", 1}, {"R2", 2}, {"R3", 3},
- {"R4", 4}, {"R5", 5}, {"R6", 6}, {"R7", 7},
- {"R8", 8}, {"R9", 9}, {"R10", 10}, {"R11", 11},
- {"R12", 12}, {"R13", 13}, {"R14", 14}, {"R15", 15},
- {"SCREEN", 16384}, {"KBD", 24576},
- };
- const int predef_symbol_cnt = 23; // no. of predefined symbols
- int user_symbol_cnt = 0;
- for (size_t i = 0; i < file_size; i++) {
- switch (file_content[i]) {
- case '\n':
- // end of line; try to figure out what's in asm_line
- if (asm_char_cnt == 0) continue; // skip blank line or comment line
- *(asm_line + asm_char_cnt) = '\0';
- if (*asm_line == '(' && *(asm_line + asm_char_cnt - 1) == ')') {
- // this line may be a label; extract label from between the parentheses
- char *label = malloc(asm_char_cnt - 1);
- strncpy(label, asm_line + 1, asm_char_cnt - 2);
- free(asm_line);
- *(label + asm_char_cnt - 2) = '\0';
- symbols[predef_symbol_cnt + user_symbol_cnt] = (struct symbol) {label, asm_line_cnt};
- user_symbol_cnt++;
- } else {
- // this line may be an instruction
- // if we used up a chunk, realloc asm_lines
- if (asm_line_cnt > 0 && asm_line_cnt % INST_CHUNK_LEN == 0) {
- asm_lines = realloc(asm_lines, (asm_line_cnt + INST_CHUNK_LEN) * sizeof(char*));
- }
- asm_lines[asm_line_cnt] = asm_line;
- asm_line_cnt++;
- }
- // allocate memory for next line
- asm_char_cnt = 0;
- asm_line = malloc(MAX_ASM_LINE_LEN + 1);
- break;
- case '/':
- // we encountered a comment
- // skip to last char of line
- while (i + 1 < file_size && file_content[i + 1] != '\n') i++;
- break;
- case ' ':
- case '\t':
- case '\r':
- break; // ignore whitespace and CR
- default:
- *(asm_line + asm_char_cnt) = file_content[i];
- asm_char_cnt++;
- }
- }
- free(asm_line);
- free(file_content);
-
- // find and assign address to variables on the fly
- int addr = 16; // variable addresses start at 16
- for (int i = 0; i < asm_line_cnt; i++) {
- if (asm_lines[i] == NULL) break; // no more instructions
- if (*(asm_lines[i]) != '@') continue; // not an A-instruction
- char *addr_str = malloc(strlen(asm_lines[i]));
- strcpy(addr_str, asm_lines[i] + 1); // whatever comes after the @
- bool is_symbol = false;
- for (char *c = addr_str; *c != '\0'; c++) {
- // search for non-numeric chars in addr_str
- if (*c < '0' || *c > '9') is_symbol = true;
- }
- if (!is_symbol) {
- free(addr_str);
- continue; // address is decimal constant
- }
- // search for symbol in list
- bool found = false;
- for (int s = 0; s < predef_symbol_cnt + user_symbol_cnt; s++) {
- if (strcmp(addr_str, symbols[s].label) == 0) {
- // overwrite asm line with decimal constant
- sprintf(asm_lines[i], "@%d", symbols[s].addr);
- found = true;
- free(addr_str);
- break;
- }
- }
- if (!found) {
- // add symbol to list
- symbols[predef_symbol_cnt + user_symbol_cnt] = (struct symbol) {addr_str, addr};
- sprintf(asm_lines[i], "@%d", addr);
- user_symbol_cnt++;
- addr++;
- }
- }
-
- // start assembling
- uint16_t *binary = calloc(32768, 2);
- size_t inst_cnt = 0; // current no. of instructions in binary
- for (char **line = asm_lines; *line != NULL; line++) {
- *(binary + inst_cnt) = assemble_inst(*line);
- inst_cnt++;
- }
-
- for (char **line = asm_lines; *line != NULL; line++) free(*line);
- free(asm_lines);
- for (int s = predef_symbol_cnt; s < predef_symbol_cnt + user_symbol_cnt; s++) {
- free(symbols[s].label);
- }
-
- // write binary
- // output_fn = input_fn[:-4] + ".hack" if input_fn.endswith(".asm") else input_fn + ".hack"
- int input_fn_len = strlen(input_fn);
- char *output_fn = malloc(input_fn_len + 6);
- strcpy(output_fn, input_fn);
- if (input_fn_len >= 4 && strcmp(input_fn + input_fn_len - 4, ".asm") == 0) {
- sprintf(output_fn + input_fn_len - 4, ".hack");
- } else {
- sprintf(output_fn + input_fn_len, ".hack");
- }
-
- FILE *output_file = fopen(output_fn, "w");
- write_binary(output_file, binary, inst_cnt);
- fclose(output_file);
- free(binary);
- printf("Binary written to %s\n", output_fn);
- free(output_fn);
-
- return inst_cnt;
-}
-
-int main(int argc, char *argv[]) {
- assembler(argv[1]);
- return 0;
-}
diff --git a/projects/06/hack-as/hack-as.py b/projects/06/hack-as/hack-as.py
deleted file mode 100644
index bb70abc..0000000
--- a/projects/06/hack-as/hack-as.py
+++ /dev/null
@@ -1,269 +0,0 @@
-from sys import stderr
-from argparse import ArgumentParser
-
-EXIT_CODES = {
- "file_error": 1,
- "illegal_char": 2,
- "size_exceeded": 3,
- "syntax_error": 4,
- "addr_error": 5,
- "symbol_error": 6,
-}
-
-PREDEFINED_SYMBOL_CNT = 23
-MAX_INST_LEN = 32768
-MAX_ADDR = 32767
-
-PREDEFINED_SYMBOLS = {
- "SP": 0,
- "LCL": 1,
- "ARG": 2,
- "THIS": 3,
- "THAT": 4,
- "R0": 0,
- "R1": 1,
- "R2": 2,
- "R3": 3,
- "R4": 4,
- "R5": 5,
- "R6": 6,
- "R7": 7,
- "R8": 8,
- "R9": 9,
- "R10": 10,
- "R11": 11,
- "R12": 12,
- "R13": 13,
- "R14": 14,
- "R15": 15,
- "SCREEN": 16384,
- "KBD": 24576,
-}
-
-DEST = {
- "": 0b000 << 3,
- "M": 0b001 << 3,
- "D": 0b010 << 3,
- "MD": 0b011 << 3,
- "A": 0b100 << 3,
- "AM": 0b101 << 3,
- "AD": 0b110 << 3,
- "AMD": 0b111 << 3,
-}
-
-JMP = {
- "": 0b000,
- "JGT": 0b001,
- "JEQ": 0b010,
- "JGE": 0b011,
- "JLT": 0b100,
- "JNE": 0b101,
- "JLE": 0b110,
- "JMP": 0b111,
-}
-
-
-COMP = {
- "0": 0b0101010 << 6,
- "1": 0b0111111 << 6,
- "-1": 0b0111010 << 6,
- "D": 0b0001100 << 6,
- "A": 0b0110000 << 6,
- "M": 0b1110000 << 6,
- "!D": 0b0001101 << 6,
- "!A": 0b0110001 << 6,
- "!M": 0b1110001 << 6,
- "-D": 0b0001111 << 6,
- "-A": 0b0110011 << 6,
- "-M": 0b1110011 << 6,
- "D+1": 0b0011111 << 6,
- "A+1": 0b0110111 << 6,
- "M+1": 0b1110111 << 6,
- "D-1": 0b0001110 << 6,
- "A-1": 0b0110010 << 6,
- "M-1": 0b1110010 << 6,
- "D+A": 0b0000010 << 6,
- "A+D": 0b0000010 << 6,
- "D+M": 0b1000010 << 6,
- "M+D": 0b1000010 << 6,
- "D-A": 0b0010011 << 6,
- "D-M": 0b1010011 << 6,
- "A-D": 0b0000111 << 6,
- "M-D": 0b1000111 << 6,
- "D&A": 0b0000000 << 6,
- "A&D": 0b0000000 << 6,
- "D&M": 0b1000000 << 6,
- "M&D": 0b1000000 << 6,
- "D|A": 0b0010101 << 6,
- "A|D": 0b0010101 << 6,
- "D|M": 0b1010101 << 6,
- "M|D": 0b1010101 << 6,
-}
-
-
-def find_illegal_symbol_char(symbol):
- for c in symbol:
- if (not c.isascii()) or (not (c.isalnum() or c in "_.$:")):
- return c
-
- return None
-
-
-def print_symbols(symbols):
- print("====== SYMBOLS =====")
- print("label\taddr")
- for label, addr in symbols.items():
- print(f"{label}\t{addr}")
-
-
-def print_binary_and_asm(binary, asm_lines):
- # binary and asm_lines are assumed to be of the same length
- print("====== RESULTS =====")
- print("addr\tbinary \tinst")
- for line_no, bin, asm in zip(range(len(binary)), binary, asm_lines):
- print(f"{line_no}\t{bin:016b}\t{asm}")
-
-
-def write_binary(file, binary):
- for inst in binary:
- file.write(f"{inst:016b}\n")
-
-
-def assemble_inst(asm_line):
- if asm_line.startswith("@"):
- # A instruction
- addr_str = asm_line[1:]
- addr = int(addr_str)
- if addr > MAX_ADDR:
- print(f"Address out of range: {addr}", file=stderr)
- exit(EXIT_CODES["addr_error"])
-
- return addr
- else:
- # dest=comp;jmp
- inst = 0xE000
- dest, _, rhs = asm_line.partition("=")
- if not rhs:
- rhs = dest
- dest = ""
-
- comp, _, jmp = rhs.partition(";")
- if dest not in DEST:
- print(f"Invalid destination: {dest}", file=stderr)
- exit(EXIT_CODES["syntax_error"])
-
- inst |= DEST[dest]
-
- if jmp not in JMP:
- print(f"Invalid jump instruction: {jmp}", file=stderr)
- exit(EXIT_CODES["syntax_error"])
-
- inst |= JMP[jmp]
-
- if comp not in COMP:
- print(f"Invalid computation: {comp}", file=stderr)
- exit(EXIT_CODES["syntax_error"])
-
- inst |= COMP[comp]
-
- return inst
-
-
-def assembler(input_fn, verbose):
- try:
- input_file = open(input_fn, "r")
- except FileNotFoundError:
- print(f"Cannot open input file: {input_fn}", file=stderr)
- exit(EXIT_CODES["file_error"])
-
- asm_lines = []
- line = input_file.readline()
- asm_line_cnt = 0
- user_symbols = {}
- while line:
- line = line.rstrip("\n")
- # throw away comment and whitespace
- asm_line = line.partition("//")[0].replace(" ", "").replace("\t", "")
- if not asm_line:
- # skip blank or comment line
- line = input_file.readline()
- continue
-
- if asm_line.startswith("(") and asm_line.endswith(")"):
- # asm_line may be a label
- label = asm_line[1:-1]
- illegal_char = find_illegal_symbol_char(label)
- if illegal_char is not None:
- print(f"Illegal character: {illegal_char}", file=stderr)
- exit(EXIT_CODES["illegal_char"])
-
- if label in user_symbols or label in PREDEFINED_SYMBOLS:
- print(f"Symbol {label} already exists", file=stderr)
- exit(EXIT_CODES["symbol_error"])
-
- user_symbols[label] = asm_line_cnt
- else:
- # asm_line may be an instruction
- asm_lines.append(asm_line)
- asm_line_cnt += 1
- if asm_line_cnt > MAX_INST_LEN:
- print(
- f"Max number of instruction ({MAX_INST_LEN}) exceeded", file=stderr
- )
- exit(EXIT_CODE_SIZE_EXCEEDED)
-
- line = input_file.readline()
-
- input_file.close()
-
- # find and assign address to variables on the fly
- addr = 16 # variable addresses start at 16
- for line_no, asm_line in enumerate(asm_lines):
- if not asm_line.startswith("@"):
- continue
-
- addr_str = asm_line[1:] # whatever comes after the @
- if not addr_str:
- print("Address cannot be empty", file=stderr)
- exit(EXIT_CODES["addr_error"])
-
- if addr_str.isascii() and addr_str.isdecimal():
- # address is decimal constant
- continue
-
- if addr_str in PREDEFINED_SYMBOLS:
- asm_lines[line_no] = f"@{PREDEFINED_SYMBOLS[addr_str]}"
- elif addr_str in user_symbols:
- asm_lines[line_no] = f"@{user_symbols[addr_str]}"
- else:
- user_symbols[addr_str] = addr
- asm_lines[line_no] = f"@{addr}"
- addr += 1
-
- binary = []
- for asm_line in asm_lines:
- binary.append(assemble_inst(asm_line))
-
- if verbose:
- print_symbols(user_symbols)
- print_binary_and_asm(binary, asm_lines)
-
- output_fn = (
- input_fn[:-4] + ".hack" if input_fn.endswith(".asm") else input_fn + ".hack"
- )
- try:
- output_file = open(output_fn, "w")
- except:
- print(f"Cannot open output file: {output_fn}", file=stderr)
-
- write_binary(output_file, binary)
- output_file.close()
- print(f"Binary written to {output_fn}")
-
-
-if __name__ == "__main__":
- parser = ArgumentParser()
- parser.add_argument("-v", "--verbose", action="store_true", help="verbose mode")
- parser.add_argument("input_fn", help="input file in assembly")
- args = parser.parse_args()
- assembler(args.input_fn, args.verbose)
diff --git a/projects/06/hack-as/test/compare.hack b/projects/06/hack-as/test/compare.hack
deleted file mode 100644
index f33a9d1..0000000
--- a/projects/06/hack-as/test/compare.hack
+++ /dev/null
@@ -1,25 +0,0 @@
-0000000000000000
-1111110000010000
-0000000000010111
-1110001100000110
-0000000000010000
-1110001100001000
-0100000000000000
-1110110000010000
-0000000000010001
-1110001100001000
-0000000000010001
-1111110000100000
-1110111010001000
-0000000000010001
-1111110000010000
-0000000000100000
-1110000010010000
-0000000000010001
-1110001100001000
-0000000000010000
-1111110010011000
-0000000000001010
-1110001100000001
-0000000000010111
-1110101010000111
diff --git a/projects/06/hack-as/test/conventional.asm b/projects/06/hack-as/test/conventional.asm
deleted file mode 100644
index 5510424..0000000
--- a/projects/06/hack-as/test/conventional.asm
+++ /dev/null
@@ -1,32 +0,0 @@
-// conventional asm a sane person would write
-// draw a rectangle on top left of screen
-// width 16px, height specified in RAM[0]
-@0
-D=M
-@INFINITE_LOOP
-D;JLE // reject if height is negative
-@counter
-M=D
-@SCREEN
-D=A
-@address
-M=D
-(LOOP)
- @address
- A=M
- M=-1
- @address
- D=M
- @32
- D=D+A
- @address
- M=D
- @counter
- MD=M-1
- @LOOP
- D;JGT
-
-(INFINITE_LOOP)
- @INFINITE_LOOP
- 0;JMP
-
diff --git a/projects/06/hack-as/test/valid.asm b/projects/06/hack-as/test/valid.asm
deleted file mode 100644
index d705f1e..0000000
--- a/projects/06/hack-as/test/valid.asm
+++ /dev/null
@@ -1,29 +0,0 @@
-// valid asm, but with unnecessary whitespace, = and ;
-@ 0 // unnecessary space
-D=M
-@INFINITE_LOOP
-=D;JLE // unnecessary =
-@counter
-M=D; // unnecessary ;
-@SCREEN
-D = A // unnecessary spaces
-@address
-M = D; // unnecessary spaces and ;
-( LOOP ) // unnecessary spaces
- @address
- A=M
- M=-1
- @address
- D=M
- @32
- D=D+A
- @address
- M=D
- @counter
- MD=M-1
- @LOOP
- D;JGT
-
-(INFINITE_LOOP)
- @INFINITE_LOOP
- 0;JMP