uxnasm.c (13938B)
1 #include <stdio.h> 2 3 /* 4 Copyright (c) 2021-2023 Devine Lu Linvega, Andrew Alderwick 5 6 Permission to use, copy, modify, and distribute this software for any 7 purpose with or without fee is hereby granted, provided that the above 8 copyright notice and this permission notice appear in all copies. 9 10 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 WITH REGARD TO THIS SOFTWARE. 12 */ 13 14 #define TRIM 0x0100 15 #define LENGTH 0x10000 16 17 typedef unsigned char Uint8; 18 typedef signed char Sint8; 19 typedef unsigned short Uint16; 20 21 typedef struct { 22 char name[0x40], items[0x40][0x40]; 23 Uint8 len; 24 } Macro; 25 26 typedef struct { 27 char name[0x40]; 28 Uint16 addr, refs; 29 } Label; 30 31 typedef struct { 32 char name[0x40], rune; 33 Uint16 addr; 34 } Reference; 35 36 typedef struct { 37 Uint8 data[LENGTH]; 38 Uint8 lambda_stack[0x100], lambda_ptr, lambda_count; 39 char scope[0x40], lambda[0x10], *location, *entry; 40 unsigned int ptr, length; 41 Uint16 label_len, macro_len, refs_len; 42 Label labels[0x400]; 43 Macro macros[0x100]; 44 Reference refs[0x1000]; 45 } Program; 46 47 Program p; 48 49 /* clang-format off */ 50 51 static char ops[][4] = { 52 "LIT", "INC", "POP", "NIP", "SWP", "ROT", "DUP", "OVR", 53 "EQU", "NEQ", "GTH", "LTH", "JMP", "JCN", "JSR", "STH", 54 "LDZ", "STZ", "LDR", "STR", "LDA", "STA", "DEI", "DEO", 55 "ADD", "SUB", "MUL", "DIV", "AND", "ORA", "EOR", "SFT" 56 }; 57 58 static char *runes = "|$@&,_.-;=!?#\"%~"; 59 60 static int scmp(char *a, char *b, int len) { int i = 0; while(a[i] == b[i]) if(!a[i] || ++i >= len) return 1; return 0; } /* string compare */ 61 static int sihx(char *s) { int i = 0; char c; while((c = s[i++])) if(!(c >= '0' && c <= '9') && !(c >= 'a' && c <= 'f')) return 0; return i > 1; } /* string is hexadecimal */ 62 static int shex(char *s) { int n = 0, i = 0; char c; while((c = s[i++])) if(c >= '0' && c <= '9') n = n * 16 + (c - '0'); else if(c >= 'a' && c <= 'f') n = n * 16 + 10 + (c - 'a'); return n; } /* string to num */ 63 static int slen(char *s) { int i = 0; while(s[i]) i++; return i; } /* string length */ 64 static int spos(char *s, char c) { Uint8 i = 0, j; while((j = s[i++])) if(j == c) return i; return -1; } /* character position */ 65 static char *scpy(char *src, char *dst, int len) { int i = 0; while((dst[i] = src[i]) && i < len - 2) i++; dst[i + 1] = '\0'; return dst; } /* string copy */ 66 static char *scat(char *dst, const char *src) { char *ptr = dst + slen(dst); while(*src) *ptr++ = *src++; *ptr = '\0'; return dst; } /* string cat */ 67 68 /* clang-format on */ 69 70 static int parse(char *w, FILE *f); 71 72 static int 73 error_top(const char *name, const char *msg) 74 { 75 fprintf(stderr, "%s: %s\n", name, msg); 76 return 0; 77 } 78 79 static int 80 error_asm(const char *name, const char *msg) 81 { 82 fprintf(stderr, "%s: %s in @%s, %s:%d.\n", name, msg, p.scope, p.location, 123); 83 return 0; 84 } 85 86 static char * 87 setlocation(char *name) 88 { 89 p.location = name; 90 return name; 91 } 92 93 static char * 94 sublabel(char *src, char *scope, char *name) 95 { 96 if(slen(scope) + slen(name) >= 0x3f) { 97 error_asm("Sublabel length too long", name); 98 return NULL; 99 } 100 return scat(scat(scpy(scope, src, 0x40), "/"), name); 101 } 102 103 static Macro * 104 findmacro(char *name) 105 { 106 int i; 107 for(i = 0; i < p.macro_len; i++) 108 if(scmp(p.macros[i].name, name, 0x40)) 109 return &p.macros[i]; 110 return NULL; 111 } 112 113 static Label * 114 findlabel(char *name) 115 { 116 int i; 117 for(i = 0; i < p.label_len; i++) 118 if(scmp(p.labels[i].name, name, 0x40)) 119 return &p.labels[i]; 120 return NULL; 121 } 122 123 static Uint8 124 findopcode(char *s) 125 { 126 int i; 127 for(i = 0; i < 0x20; i++) { 128 int m = 0; 129 if(!scmp(ops[i], s, 3)) 130 continue; 131 if(!i) i |= (1 << 7); /* force keep for LIT */ 132 while(s[3 + m]) { 133 if(s[3 + m] == '2') 134 i |= (1 << 5); /* mode: short */ 135 else if(s[3 + m] == 'r') 136 i |= (1 << 6); /* mode: return */ 137 else if(s[3 + m] == 'k') 138 i |= (1 << 7); /* mode: keep */ 139 else 140 return 0; /* failed to match */ 141 m++; 142 } 143 return i; 144 } 145 return 0; 146 } 147 148 static int 149 makemacro(char *name, FILE *f) 150 { 151 Macro *m; 152 char word[0x40]; 153 if(findmacro(name)) 154 return error_asm("Macro duplicate", name); 155 if(sihx(name) && slen(name) % 2 == 0) 156 return error_asm("Macro name is hex number", name); 157 if(findopcode(name) || scmp(name, "BRK", 4) || !slen(name)) 158 return error_asm("Macro name is invalid", name); 159 if(p.macro_len == 0x100) 160 return error_asm("Macros limit exceeded", name); 161 m = &p.macros[p.macro_len++]; 162 scpy(name, m->name, 0x40); 163 while(fscanf(f, "%63s", word) == 1) { 164 if(word[0] == '{') continue; 165 if(word[0] == '}') break; 166 if(word[0] == '%') 167 return error_asm("Macro error", name); 168 if(m->len >= 0x40) 169 return error_asm("Macro size exceeded", name); 170 scpy(word, m->items[m->len++], 0x40); 171 } 172 return 1; 173 } 174 175 static int 176 isrune(char c) 177 { 178 char cc, *r = runes; 179 while((cc = *r++)) 180 if(c == cc) return 1; 181 return 0; 182 } 183 184 static int 185 makelabel(char *name) 186 { 187 Label *l; 188 if(findlabel(name)) 189 return error_asm("Label duplicate", name); 190 if(sihx(name) && (slen(name) == 2 || slen(name) == 4)) 191 return error_asm("Label name is hex number", name); 192 if(findopcode(name) || scmp(name, "BRK", 4) || !slen(name)) 193 return error_asm("Label name is invalid", name); 194 if(isrune(name[0])) 195 return error_asm("Label name is runic", name); 196 if(p.label_len == 0x400) 197 return error_asm("Labels limit exceeded", name); 198 l = &p.labels[p.label_len++]; 199 l->addr = p.ptr; 200 l->refs = 0; 201 scpy(name, l->name, 0x40); 202 return 1; 203 } 204 205 static char * 206 makelambda(int id) 207 { 208 scpy("lambda", p.lambda, 0x07); 209 p.lambda[6] = '0' + (id >> 0x4); 210 p.lambda[7] = '0' + (id & 0xf); 211 return p.lambda; 212 } 213 214 static int 215 makereference(char *scope, char *label, char rune, Uint16 addr) 216 { 217 char subw[0x40], parent[0x40]; 218 Reference *r; 219 if(p.refs_len >= 0x1000) 220 return error_asm("References limit exceeded", label); 221 r = &p.refs[p.refs_len++]; 222 if(label[0] == '{') { 223 p.lambda_stack[p.lambda_ptr++] = p.lambda_count; 224 scpy(makelambda(p.lambda_count++), r->name, 0x40); 225 } else if(label[0] == '&' || label[0] == '/') { 226 if(!sublabel(subw, scope, label + 1)) 227 return error_asm("Invalid sublabel", label); 228 scpy(subw, r->name, 0x40); 229 } else { 230 int pos = spos(label, '/'); 231 if(pos > 0) { 232 Label *l; 233 if((l = findlabel(scpy(label, parent, pos)))) 234 l->refs++; 235 } 236 scpy(label, r->name, 0x40); 237 } 238 r->rune = rune; 239 r->addr = addr; 240 return 1; 241 } 242 243 static int 244 writebyte(Uint8 b) 245 { 246 if(p.ptr < TRIM) 247 return error_asm("Writing in zero-page", ""); 248 else if(p.ptr > 0xffff) 249 return error_asm("Writing after the end of RAM", ""); 250 else if(p.ptr < p.length) 251 return error_asm("Memory overwrite", ""); 252 p.data[p.ptr++] = b; 253 p.length = p.ptr; 254 return 1; 255 } 256 257 static int 258 writeopcode(char *w) 259 { 260 return writebyte(findopcode(w)); 261 } 262 263 static int 264 writeshort(Uint16 s, int lit) 265 { 266 if(lit) 267 if(!writebyte(findopcode("LIT2"))) return 0; 268 return writebyte(s >> 8) && writebyte(s & 0xff); 269 } 270 271 static int 272 writelitbyte(Uint8 b) 273 { 274 return writebyte(findopcode("LIT")) && writebyte(b); 275 } 276 277 static int 278 doinclude(char *filename) 279 { 280 FILE *f; 281 char w[0x40]; 282 if(!(f = fopen(setlocation(filename), "r"))) 283 return error_asm("Include missing", filename); 284 while(fscanf(f, "%63s", w) == 1) 285 if(!parse(w, f)) 286 return error_asm("Unknown token", w); 287 fclose(f); 288 return 1; 289 } 290 291 static int 292 parse(char *w, FILE *f) 293 { 294 int i; 295 char word[0x40], subw[0x40], c; 296 Label *l; 297 Macro *m; 298 if(slen(w) >= 63) 299 return error_asm("Invalid token", w); 300 switch(w[0]) { 301 case '(': /* comment */ 302 if(slen(w) != 1) fprintf(stderr, "-- Malformed comment: %s\n", w); 303 i = 1; /* track nested comment depth */ 304 while(fscanf(f, "%63s", word) == 1) { 305 if(slen(word) != 1) 306 continue; 307 else if(word[0] == '(') 308 i++; 309 else if(word[0] == ')' && --i < 1) 310 break; 311 } 312 break; 313 case '~': /* include */ 314 if(!doinclude(w + 1)) 315 return error_asm("Invalid include", w); 316 break; 317 case '%': /* macro */ 318 if(!makemacro(w + 1, f)) 319 return error_asm("Invalid macro", w); 320 break; 321 case '|': /* pad-absolute */ 322 if(sihx(w + 1)) 323 p.ptr = shex(w + 1); 324 else if(w[1] == '&') { 325 if(!sublabel(subw, p.scope, w + 2) || !(l = findlabel(subw))) 326 return error_asm("Invalid sublabel", w); 327 p.ptr = l->addr; 328 } else { 329 if(!(l = findlabel(w + 1))) 330 return error_asm("Invalid label", w); 331 p.ptr = l->addr; 332 } 333 break; 334 case '$': /* pad-relative */ 335 if(sihx(w + 1)) 336 p.ptr += shex(w + 1); 337 else if(w[1] == '&') { 338 if(!sublabel(subw, p.scope, w + 2) || !(l = findlabel(subw))) 339 return error_asm("Invalid sublabel", w); 340 p.ptr += l->addr; 341 } else { 342 if(!(l = findlabel(w + 1))) 343 return error_asm("Invalid label", w); 344 p.ptr += l->addr; 345 } 346 break; 347 case '@': /* label */ 348 if(!makelabel(w + 1)) 349 return error_asm("Invalid label", w); 350 i = 0; 351 while(w[i + 1] != '/' && i < 0x3e && (p.scope[i] = w[i + 1])) 352 i++; 353 p.scope[i] = '\0'; 354 break; 355 case '&': /* sublabel */ 356 if(!sublabel(subw, p.scope, w + 1) || !makelabel(subw)) 357 return error_asm("Invalid sublabel", w); 358 break; 359 case '#': /* literals hex */ 360 if(sihx(w + 1) && slen(w) == 3) 361 return writelitbyte(shex(w + 1)); 362 else if(sihx(w + 1) && slen(w) == 5) 363 return writeshort(shex(w + 1), 1); 364 else 365 return error_asm("Invalid hex literal", w); 366 break; 367 case '_': /* raw byte relative */ 368 return makereference(p.scope, w + 1, w[0], p.ptr) && writebyte(0xff); 369 case ',': /* literal byte relative */ 370 return makereference(p.scope, w + 1, w[0], p.ptr + 1) && writelitbyte(0xff); 371 case '-': /* raw byte absolute */ 372 return makereference(p.scope, w + 1, w[0], p.ptr) && writebyte(0xff); 373 case '.': /* literal byte zero-page */ 374 return makereference(p.scope, w + 1, w[0], p.ptr + 1) && writelitbyte(0xff); 375 case ':': fprintf(stderr, "Deprecated rune %s, use =%s\n", w, w + 1); 376 case '=': /* raw short absolute */ 377 return makereference(p.scope, w + 1, w[0], p.ptr) && writeshort(0xffff, 0); 378 case ';': /* literal short absolute */ 379 return makereference(p.scope, w + 1, w[0], p.ptr + 1) && writeshort(0xffff, 1); 380 case '?': /* JCI */ 381 return makereference(p.scope, w + 1, w[0], p.ptr + 1) && writebyte(0x20) && writeshort(0xffff, 0); 382 case '!': /* JMI */ 383 return makereference(p.scope, w + 1, w[0], p.ptr + 1) && writebyte(0x40) && writeshort(0xffff, 0); 384 case '"': /* raw string */ 385 i = 0; 386 while((c = w[++i])) 387 if(!writebyte(c)) return 0; 388 break; 389 case '}': /* lambda end */ 390 if(!makelabel(makelambda(p.lambda_stack[--p.lambda_ptr]))) 391 return error_asm("Invalid label", w); 392 break; 393 case '[': 394 case ']': 395 if(slen(w) == 1) break; /* else fallthrough */ 396 default: 397 /* opcode */ 398 if(findopcode(w) || scmp(w, "BRK", 4)) 399 return writeopcode(w); 400 /* raw byte */ 401 else if(sihx(w) && slen(w) == 2) 402 return writebyte(shex(w)); 403 /* raw short */ 404 else if(sihx(w) && slen(w) == 4) 405 return writeshort(shex(w), 0); 406 /* macro */ 407 else if((m = findmacro(w))) { 408 for(i = 0; i < m->len; i++) 409 if(!parse(m->items[i], f)) 410 return 0; 411 return 1; 412 } else 413 return makereference(p.scope, w, ' ', p.ptr + 1) && writebyte(0x60) && writeshort(0xffff, 0); 414 } 415 return 1; 416 } 417 418 static int 419 resolve(void) 420 { 421 Label *l; 422 int i; 423 Uint16 a; 424 for(i = 0; i < p.refs_len; i++) { 425 Reference *r = &p.refs[i]; 426 switch(r->rune) { 427 case '_': 428 case ',': 429 if(!(l = findlabel(r->name))) 430 return error_top("Unknown relative reference", r->name); 431 p.data[r->addr] = (Sint8)(l->addr - r->addr - 2); 432 if((Sint8)p.data[r->addr] != (l->addr - r->addr - 2)) 433 return error_asm("Relative reference is too far", r->name); 434 l->refs++; 435 break; 436 case '-': 437 case '.': 438 if(!(l = findlabel(r->name))) 439 return error_top("Unknown zero-page reference", r->name); 440 p.data[r->addr] = l->addr & 0xff; 441 l->refs++; 442 break; 443 case ':': 444 case '=': 445 case ';': 446 if(!(l = findlabel(r->name))) 447 return error_top("Unknown absolute reference", r->name); 448 p.data[r->addr] = l->addr >> 0x8; 449 p.data[r->addr + 1] = l->addr & 0xff; 450 l->refs++; 451 break; 452 case '?': 453 case '!': 454 default: 455 if(!(l = findlabel(r->name))) 456 return error_top("Unknown absolute reference", r->name); 457 a = l->addr - r->addr - 2; 458 p.data[r->addr] = a >> 0x8; 459 p.data[r->addr + 1] = a & 0xff; 460 l->refs++; 461 break; 462 } 463 } 464 return 1; 465 } 466 467 static int 468 assemble(FILE *f) 469 { 470 char w[0x40]; 471 p.ptr = 0x100; 472 scpy("on-reset", p.scope, 0x40); 473 while(fscanf(f, "%62s", w) == 1) 474 if(slen(w) > 0x3d || !parse(w, f)) 475 return error_asm("Invalid token", w); 476 return resolve(); 477 } 478 479 static void 480 review(char *filename) 481 { 482 int i; 483 for(i = 0; i < p.label_len; i++) 484 if(p.labels[i].name[0] >= 'A' && p.labels[i].name[0] <= 'Z') 485 continue; /* Ignore capitalized labels(devices) */ 486 else if(!p.labels[i].refs) 487 fprintf(stdout, "-- Unused label: %s\n", p.labels[i].name); 488 fprintf(stdout, 489 "Assembled %s in %d bytes(%.2f%% used), %d labels, %d macros.\n", 490 filename, 491 p.length - TRIM, 492 (p.length - TRIM) / 652.80, 493 p.label_len, 494 p.macro_len); 495 } 496 497 static void 498 writesym(char *filename) 499 { 500 int i; 501 char symdst[0x60]; 502 FILE *fp; 503 if(slen(filename) > 0x60 - 5) 504 return; 505 fp = fopen(scat(scpy(filename, symdst, slen(filename) + 1), ".sym"), "w"); 506 if(fp != NULL) { 507 for(i = 0; i < p.label_len; i++) { 508 Uint8 hb = p.labels[i].addr >> 8, lb = p.labels[i].addr & 0xff; 509 fwrite(&hb, 1, 1, fp); 510 fwrite(&lb, 1, 1, fp); 511 fwrite(p.labels[i].name, slen(p.labels[i].name) + 1, 1, fp); 512 } 513 } 514 fclose(fp); 515 } 516 517 int 518 main(int argc, char *argv[]) 519 { 520 FILE *src, *dst; 521 if(argc == 1) 522 return error_top("usage", "uxnasm [-v] input.tal output.rom"); 523 if(argv[1][0] == '-' && argv[1][1] == 'v') 524 return !fprintf(stdout, "Uxnasm - Uxntal Assembler, 7 Mar 2024.\n"); 525 if(!(src = fopen(setlocation(argv[1]), "r"))) 526 return !error_top("Invalid input", argv[1]); 527 p.entry = argv[1]; 528 if(!assemble(src)) 529 return !error_top("Assembly", "Failed to assemble rom."); 530 if(!(dst = fopen(argv[2], "wb"))) 531 return !error_top("Invalid Output", argv[2]); 532 if(p.length <= TRIM) 533 return !error_top("Assembly", "Output rom is empty."); 534 fwrite(p.data + TRIM, p.length - TRIM, 1, dst); 535 if(!scmp(argv[2], "-", 2)) { 536 review(argv[2]); 537 writesym(argv[2]); 538 } 539 return 0; 540 }