uxn

Varvara Ordinator, written in ANSI C(SDL2)
git clone https://git.eamoncaddigan.net/uxn.git
Log | Files | Refs | README | LICENSE

uxnasm.c (13938B)


      1 #include <stdio.h>
      2 
      3 /*
      4 Copyright (c) 2021-2023 Devine Lu Linvega, Andrew Alderwick
      5 
      6 Permission to use, copy, modify, and distribute this software for any
      7 purpose with or without fee is hereby granted, provided that the above
      8 copyright notice and this permission notice appear in all copies.
      9 
     10 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
     11 WITH REGARD TO THIS SOFTWARE.
     12 */
     13 
     14 #define TRIM 0x0100
     15 #define LENGTH 0x10000
     16 
     17 typedef unsigned char Uint8;
     18 typedef signed char Sint8;
     19 typedef unsigned short Uint16;
     20 
     21 typedef struct {
     22 	char name[0x40], items[0x40][0x40];
     23 	Uint8 len;
     24 } Macro;
     25 
     26 typedef struct {
     27 	char name[0x40];
     28 	Uint16 addr, refs;
     29 } Label;
     30 
     31 typedef struct {
     32 	char name[0x40], rune;
     33 	Uint16 addr;
     34 } Reference;
     35 
     36 typedef struct {
     37 	Uint8 data[LENGTH];
     38 	Uint8 lambda_stack[0x100], lambda_ptr, lambda_count;
     39 	char scope[0x40], lambda[0x10], *location, *entry;
     40 	unsigned int ptr, length;
     41 	Uint16 label_len, macro_len, refs_len;
     42 	Label labels[0x400];
     43 	Macro macros[0x100];
     44 	Reference refs[0x1000];
     45 } Program;
     46 
     47 Program p;
     48 
     49 /* clang-format off */
     50 
     51 static char ops[][4] = {
     52 	"LIT", "INC", "POP", "NIP", "SWP", "ROT", "DUP", "OVR",
     53 	"EQU", "NEQ", "GTH", "LTH", "JMP", "JCN", "JSR", "STH",
     54 	"LDZ", "STZ", "LDR", "STR", "LDA", "STA", "DEI", "DEO",
     55 	"ADD", "SUB", "MUL", "DIV", "AND", "ORA", "EOR", "SFT"
     56 };
     57 
     58 static char *runes = "|$@&,_.-;=!?#\"%~";
     59 
     60 static int   scmp(char *a, char *b, int len) { int i = 0; while(a[i] == b[i]) if(!a[i] || ++i >= len) return 1; return 0; } /* string compare */
     61 static int   sihx(char *s) { int i = 0; char c; while((c = s[i++])) if(!(c >= '0' && c <= '9') && !(c >= 'a' && c <= 'f')) return 0; return i > 1; } /* string is hexadecimal */
     62 static int   shex(char *s) { int n = 0, i = 0; char c; while((c = s[i++])) if(c >= '0' && c <= '9') n = n * 16 + (c - '0'); else if(c >= 'a' && c <= 'f') n = n * 16 + 10 + (c - 'a'); return n; } /* string to num */
     63 static int   slen(char *s) { int i = 0; while(s[i]) i++; return i; } /* string length */
     64 static int   spos(char *s, char c) { Uint8 i = 0, j; while((j = s[i++])) if(j == c) return i; return -1; } /* character position */
     65 static char *scpy(char *src, char *dst, int len) { int i = 0; while((dst[i] = src[i]) && i < len - 2) i++; dst[i + 1] = '\0'; return dst; } /* string copy */
     66 static char *scat(char *dst, const char *src) { char *ptr = dst + slen(dst); while(*src) *ptr++ = *src++; *ptr = '\0'; return dst; } /* string cat */
     67 
     68 /* clang-format on */
     69 
     70 static int parse(char *w, FILE *f);
     71 
     72 static int
     73 error_top(const char *name, const char *msg)
     74 {
     75 	fprintf(stderr, "%s: %s\n", name, msg);
     76 	return 0;
     77 }
     78 
     79 static int
     80 error_asm(const char *name, const char *msg)
     81 {
     82 	fprintf(stderr, "%s: %s in @%s, %s:%d.\n", name, msg, p.scope, p.location, 123);
     83 	return 0;
     84 }
     85 
     86 static char *
     87 setlocation(char *name)
     88 {
     89 	p.location = name;
     90 	return name;
     91 }
     92 
     93 static char *
     94 sublabel(char *src, char *scope, char *name)
     95 {
     96 	if(slen(scope) + slen(name) >= 0x3f) {
     97 		error_asm("Sublabel length too long", name);
     98 		return NULL;
     99 	}
    100 	return scat(scat(scpy(scope, src, 0x40), "/"), name);
    101 }
    102 
    103 static Macro *
    104 findmacro(char *name)
    105 {
    106 	int i;
    107 	for(i = 0; i < p.macro_len; i++)
    108 		if(scmp(p.macros[i].name, name, 0x40))
    109 			return &p.macros[i];
    110 	return NULL;
    111 }
    112 
    113 static Label *
    114 findlabel(char *name)
    115 {
    116 	int i;
    117 	for(i = 0; i < p.label_len; i++)
    118 		if(scmp(p.labels[i].name, name, 0x40))
    119 			return &p.labels[i];
    120 	return NULL;
    121 }
    122 
    123 static Uint8
    124 findopcode(char *s)
    125 {
    126 	int i;
    127 	for(i = 0; i < 0x20; i++) {
    128 		int m = 0;
    129 		if(!scmp(ops[i], s, 3))
    130 			continue;
    131 		if(!i) i |= (1 << 7); /* force keep for LIT */
    132 		while(s[3 + m]) {
    133 			if(s[3 + m] == '2')
    134 				i |= (1 << 5); /* mode: short */
    135 			else if(s[3 + m] == 'r')
    136 				i |= (1 << 6); /* mode: return */
    137 			else if(s[3 + m] == 'k')
    138 				i |= (1 << 7); /* mode: keep */
    139 			else
    140 				return 0; /* failed to match */
    141 			m++;
    142 		}
    143 		return i;
    144 	}
    145 	return 0;
    146 }
    147 
    148 static int
    149 makemacro(char *name, FILE *f)
    150 {
    151 	Macro *m;
    152 	char word[0x40];
    153 	if(findmacro(name))
    154 		return error_asm("Macro duplicate", name);
    155 	if(sihx(name) && slen(name) % 2 == 0)
    156 		return error_asm("Macro name is hex number", name);
    157 	if(findopcode(name) || scmp(name, "BRK", 4) || !slen(name))
    158 		return error_asm("Macro name is invalid", name);
    159 	if(p.macro_len == 0x100)
    160 		return error_asm("Macros limit exceeded", name);
    161 	m = &p.macros[p.macro_len++];
    162 	scpy(name, m->name, 0x40);
    163 	while(fscanf(f, "%63s", word) == 1) {
    164 		if(word[0] == '{') continue;
    165 		if(word[0] == '}') break;
    166 		if(word[0] == '%')
    167 			return error_asm("Macro error", name);
    168 		if(m->len >= 0x40)
    169 			return error_asm("Macro size exceeded", name);
    170 		scpy(word, m->items[m->len++], 0x40);
    171 	}
    172 	return 1;
    173 }
    174 
    175 static int
    176 isrune(char c)
    177 {
    178 	char cc, *r = runes;
    179 	while((cc = *r++))
    180 		if(c == cc) return 1;
    181 	return 0;
    182 }
    183 
    184 static int
    185 makelabel(char *name)
    186 {
    187 	Label *l;
    188 	if(findlabel(name))
    189 		return error_asm("Label duplicate", name);
    190 	if(sihx(name) && (slen(name) == 2 || slen(name) == 4))
    191 		return error_asm("Label name is hex number", name);
    192 	if(findopcode(name) || scmp(name, "BRK", 4) || !slen(name))
    193 		return error_asm("Label name is invalid", name);
    194 	if(isrune(name[0]))
    195 		return error_asm("Label name is runic", name);
    196 	if(p.label_len == 0x400)
    197 		return error_asm("Labels limit exceeded", name);
    198 	l = &p.labels[p.label_len++];
    199 	l->addr = p.ptr;
    200 	l->refs = 0;
    201 	scpy(name, l->name, 0x40);
    202 	return 1;
    203 }
    204 
    205 static char *
    206 makelambda(int id)
    207 {
    208 	scpy("lambda", p.lambda, 0x07);
    209 	p.lambda[6] = '0' + (id >> 0x4);
    210 	p.lambda[7] = '0' + (id & 0xf);
    211 	return p.lambda;
    212 }
    213 
    214 static int
    215 makereference(char *scope, char *label, char rune, Uint16 addr)
    216 {
    217 	char subw[0x40], parent[0x40];
    218 	Reference *r;
    219 	if(p.refs_len >= 0x1000)
    220 		return error_asm("References limit exceeded", label);
    221 	r = &p.refs[p.refs_len++];
    222 	if(label[0] == '{') {
    223 		p.lambda_stack[p.lambda_ptr++] = p.lambda_count;
    224 		scpy(makelambda(p.lambda_count++), r->name, 0x40);
    225 	} else if(label[0] == '&' || label[0] == '/') {
    226 		if(!sublabel(subw, scope, label + 1))
    227 			return error_asm("Invalid sublabel", label);
    228 		scpy(subw, r->name, 0x40);
    229 	} else {
    230 		int pos = spos(label, '/');
    231 		if(pos > 0) {
    232 			Label *l;
    233 			if((l = findlabel(scpy(label, parent, pos))))
    234 				l->refs++;
    235 		}
    236 		scpy(label, r->name, 0x40);
    237 	}
    238 	r->rune = rune;
    239 	r->addr = addr;
    240 	return 1;
    241 }
    242 
    243 static int
    244 writebyte(Uint8 b)
    245 {
    246 	if(p.ptr < TRIM)
    247 		return error_asm("Writing in zero-page", "");
    248 	else if(p.ptr > 0xffff)
    249 		return error_asm("Writing after the end of RAM", "");
    250 	else if(p.ptr < p.length)
    251 		return error_asm("Memory overwrite", "");
    252 	p.data[p.ptr++] = b;
    253 	p.length = p.ptr;
    254 	return 1;
    255 }
    256 
    257 static int
    258 writeopcode(char *w)
    259 {
    260 	return writebyte(findopcode(w));
    261 }
    262 
    263 static int
    264 writeshort(Uint16 s, int lit)
    265 {
    266 	if(lit)
    267 		if(!writebyte(findopcode("LIT2"))) return 0;
    268 	return writebyte(s >> 8) && writebyte(s & 0xff);
    269 }
    270 
    271 static int
    272 writelitbyte(Uint8 b)
    273 {
    274 	return writebyte(findopcode("LIT")) && writebyte(b);
    275 }
    276 
    277 static int
    278 doinclude(char *filename)
    279 {
    280 	FILE *f;
    281 	char w[0x40];
    282 	if(!(f = fopen(setlocation(filename), "r")))
    283 		return error_asm("Include missing", filename);
    284 	while(fscanf(f, "%63s", w) == 1)
    285 		if(!parse(w, f))
    286 			return error_asm("Unknown token", w);
    287 	fclose(f);
    288 	return 1;
    289 }
    290 
    291 static int
    292 parse(char *w, FILE *f)
    293 {
    294 	int i;
    295 	char word[0x40], subw[0x40], c;
    296 	Label *l;
    297 	Macro *m;
    298 	if(slen(w) >= 63)
    299 		return error_asm("Invalid token", w);
    300 	switch(w[0]) {
    301 	case '(': /* comment */
    302 		if(slen(w) != 1) fprintf(stderr, "-- Malformed comment: %s\n", w);
    303 		i = 1; /* track nested comment depth */
    304 		while(fscanf(f, "%63s", word) == 1) {
    305 			if(slen(word) != 1)
    306 				continue;
    307 			else if(word[0] == '(')
    308 				i++;
    309 			else if(word[0] == ')' && --i < 1)
    310 				break;
    311 		}
    312 		break;
    313 	case '~': /* include */
    314 		if(!doinclude(w + 1))
    315 			return error_asm("Invalid include", w);
    316 		break;
    317 	case '%': /* macro */
    318 		if(!makemacro(w + 1, f))
    319 			return error_asm("Invalid macro", w);
    320 		break;
    321 	case '|': /* pad-absolute */
    322 		if(sihx(w + 1))
    323 			p.ptr = shex(w + 1);
    324 		else if(w[1] == '&') {
    325 			if(!sublabel(subw, p.scope, w + 2) || !(l = findlabel(subw)))
    326 				return error_asm("Invalid sublabel", w);
    327 			p.ptr = l->addr;
    328 		} else {
    329 			if(!(l = findlabel(w + 1)))
    330 				return error_asm("Invalid label", w);
    331 			p.ptr = l->addr;
    332 		}
    333 		break;
    334 	case '$': /* pad-relative */
    335 		if(sihx(w + 1))
    336 			p.ptr += shex(w + 1);
    337 		else if(w[1] == '&') {
    338 			if(!sublabel(subw, p.scope, w + 2) || !(l = findlabel(subw)))
    339 				return error_asm("Invalid sublabel", w);
    340 			p.ptr += l->addr;
    341 		} else {
    342 			if(!(l = findlabel(w + 1)))
    343 				return error_asm("Invalid label", w);
    344 			p.ptr += l->addr;
    345 		}
    346 		break;
    347 	case '@': /* label */
    348 		if(!makelabel(w + 1))
    349 			return error_asm("Invalid label", w);
    350 		i = 0;
    351 		while(w[i + 1] != '/' && i < 0x3e && (p.scope[i] = w[i + 1]))
    352 			i++;
    353 		p.scope[i] = '\0';
    354 		break;
    355 	case '&': /* sublabel */
    356 		if(!sublabel(subw, p.scope, w + 1) || !makelabel(subw))
    357 			return error_asm("Invalid sublabel", w);
    358 		break;
    359 	case '#': /* literals hex */
    360 		if(sihx(w + 1) && slen(w) == 3)
    361 			return writelitbyte(shex(w + 1));
    362 		else if(sihx(w + 1) && slen(w) == 5)
    363 			return writeshort(shex(w + 1), 1);
    364 		else
    365 			return error_asm("Invalid hex literal", w);
    366 		break;
    367 	case '_': /* raw byte relative */
    368 		return makereference(p.scope, w + 1, w[0], p.ptr) && writebyte(0xff);
    369 	case ',': /* literal byte relative */
    370 		return makereference(p.scope, w + 1, w[0], p.ptr + 1) && writelitbyte(0xff);
    371 	case '-': /* raw byte absolute */
    372 		return makereference(p.scope, w + 1, w[0], p.ptr) && writebyte(0xff);
    373 	case '.': /* literal byte zero-page */
    374 		return makereference(p.scope, w + 1, w[0], p.ptr + 1) && writelitbyte(0xff);
    375 	case ':': fprintf(stderr, "Deprecated rune %s, use =%s\n", w, w + 1);
    376 	case '=': /* raw short absolute */
    377 		return makereference(p.scope, w + 1, w[0], p.ptr) && writeshort(0xffff, 0);
    378 	case ';': /* literal short absolute */
    379 		return makereference(p.scope, w + 1, w[0], p.ptr + 1) && writeshort(0xffff, 1);
    380 	case '?': /* JCI */
    381 		return makereference(p.scope, w + 1, w[0], p.ptr + 1) && writebyte(0x20) && writeshort(0xffff, 0);
    382 	case '!': /* JMI */
    383 		return makereference(p.scope, w + 1, w[0], p.ptr + 1) && writebyte(0x40) && writeshort(0xffff, 0);
    384 	case '"': /* raw string */
    385 		i = 0;
    386 		while((c = w[++i]))
    387 			if(!writebyte(c)) return 0;
    388 		break;
    389 	case '}': /* lambda end */
    390 		if(!makelabel(makelambda(p.lambda_stack[--p.lambda_ptr])))
    391 			return error_asm("Invalid label", w);
    392 		break;
    393 	case '[':
    394 	case ']':
    395 		if(slen(w) == 1) break; /* else fallthrough */
    396 	default:
    397 		/* opcode */
    398 		if(findopcode(w) || scmp(w, "BRK", 4))
    399 			return writeopcode(w);
    400 		/* raw byte */
    401 		else if(sihx(w) && slen(w) == 2)
    402 			return writebyte(shex(w));
    403 		/* raw short */
    404 		else if(sihx(w) && slen(w) == 4)
    405 			return writeshort(shex(w), 0);
    406 		/* macro */
    407 		else if((m = findmacro(w))) {
    408 			for(i = 0; i < m->len; i++)
    409 				if(!parse(m->items[i], f))
    410 					return 0;
    411 			return 1;
    412 		} else
    413 			return makereference(p.scope, w, ' ', p.ptr + 1) && writebyte(0x60) && writeshort(0xffff, 0);
    414 	}
    415 	return 1;
    416 }
    417 
    418 static int
    419 resolve(void)
    420 {
    421 	Label *l;
    422 	int i;
    423 	Uint16 a;
    424 	for(i = 0; i < p.refs_len; i++) {
    425 		Reference *r = &p.refs[i];
    426 		switch(r->rune) {
    427 		case '_':
    428 		case ',':
    429 			if(!(l = findlabel(r->name)))
    430 				return error_top("Unknown relative reference", r->name);
    431 			p.data[r->addr] = (Sint8)(l->addr - r->addr - 2);
    432 			if((Sint8)p.data[r->addr] != (l->addr - r->addr - 2))
    433 				return error_asm("Relative reference is too far", r->name);
    434 			l->refs++;
    435 			break;
    436 		case '-':
    437 		case '.':
    438 			if(!(l = findlabel(r->name)))
    439 				return error_top("Unknown zero-page reference", r->name);
    440 			p.data[r->addr] = l->addr & 0xff;
    441 			l->refs++;
    442 			break;
    443 		case ':':
    444 		case '=':
    445 		case ';':
    446 			if(!(l = findlabel(r->name)))
    447 				return error_top("Unknown absolute reference", r->name);
    448 			p.data[r->addr] = l->addr >> 0x8;
    449 			p.data[r->addr + 1] = l->addr & 0xff;
    450 			l->refs++;
    451 			break;
    452 		case '?':
    453 		case '!':
    454 		default:
    455 			if(!(l = findlabel(r->name)))
    456 				return error_top("Unknown absolute reference", r->name);
    457 			a = l->addr - r->addr - 2;
    458 			p.data[r->addr] = a >> 0x8;
    459 			p.data[r->addr + 1] = a & 0xff;
    460 			l->refs++;
    461 			break;
    462 		}
    463 	}
    464 	return 1;
    465 }
    466 
    467 static int
    468 assemble(FILE *f)
    469 {
    470 	char w[0x40];
    471 	p.ptr = 0x100;
    472 	scpy("on-reset", p.scope, 0x40);
    473 	while(fscanf(f, "%62s", w) == 1)
    474 		if(slen(w) > 0x3d || !parse(w, f))
    475 			return error_asm("Invalid token", w);
    476 	return resolve();
    477 }
    478 
    479 static void
    480 review(char *filename)
    481 {
    482 	int i;
    483 	for(i = 0; i < p.label_len; i++)
    484 		if(p.labels[i].name[0] >= 'A' && p.labels[i].name[0] <= 'Z')
    485 			continue; /* Ignore capitalized labels(devices) */
    486 		else if(!p.labels[i].refs)
    487 			fprintf(stdout, "-- Unused label: %s\n", p.labels[i].name);
    488 	fprintf(stdout,
    489 		"Assembled %s in %d bytes(%.2f%% used), %d labels, %d macros.\n",
    490 		filename,
    491 		p.length - TRIM,
    492 		(p.length - TRIM) / 652.80,
    493 		p.label_len,
    494 		p.macro_len);
    495 }
    496 
    497 static void
    498 writesym(char *filename)
    499 {
    500 	int i;
    501 	char symdst[0x60];
    502 	FILE *fp;
    503 	if(slen(filename) > 0x60 - 5)
    504 		return;
    505 	fp = fopen(scat(scpy(filename, symdst, slen(filename) + 1), ".sym"), "w");
    506 	if(fp != NULL) {
    507 		for(i = 0; i < p.label_len; i++) {
    508 			Uint8 hb = p.labels[i].addr >> 8, lb = p.labels[i].addr & 0xff;
    509 			fwrite(&hb, 1, 1, fp);
    510 			fwrite(&lb, 1, 1, fp);
    511 			fwrite(p.labels[i].name, slen(p.labels[i].name) + 1, 1, fp);
    512 		}
    513 	}
    514 	fclose(fp);
    515 }
    516 
    517 int
    518 main(int argc, char *argv[])
    519 {
    520 	FILE *src, *dst;
    521 	if(argc == 1)
    522 		return error_top("usage", "uxnasm [-v] input.tal output.rom");
    523 	if(argv[1][0] == '-' && argv[1][1] == 'v')
    524 		return !fprintf(stdout, "Uxnasm - Uxntal Assembler, 7 Mar 2024.\n");
    525 	if(!(src = fopen(setlocation(argv[1]), "r")))
    526 		return !error_top("Invalid input", argv[1]);
    527 	p.entry = argv[1];
    528 	if(!assemble(src))
    529 		return !error_top("Assembly", "Failed to assemble rom.");
    530 	if(!(dst = fopen(argv[2], "wb")))
    531 		return !error_top("Invalid Output", argv[2]);
    532 	if(p.length <= TRIM)
    533 		return !error_top("Assembly", "Output rom is empty.");
    534 	fwrite(p.data + TRIM, p.length - TRIM, 1, dst);
    535 	if(!scmp(argv[2], "-", 2)) {
    536 		review(argv[2]);
    537 		writesym(argv[2]);
    538 	}
    539 	return 0;
    540 }