commit 6b24c002a7b1fe509d8fd6d8577d938020a15056
parent 40e5f2b539d730729dc261b22db36eade9179a7b
Author: Andrew Alderwick <andrew@alderwick.co.uk>
Date: Wed, 31 Mar 2021 23:55:02 +0100
Add beginnings of assembler project.
Diffstat:
4 files changed, 943 insertions(+), 2 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -4,4 +4,5 @@
*gif~
*bmp~
/bin
-*io.bit
-\ No newline at end of file
+*io.bit
+*.bak
+\ No newline at end of file
diff --git a/etc/assembler-trees.lua b/etc/assembler-trees.lua
@@ -0,0 +1,295 @@
+local build_dag
+build_dag = function(t, dag, i, j, level)
+ if dag == nil then
+ dag = { }
+ end
+ if i == nil then
+ i = 1
+ end
+ if j == nil then
+ j = #t
+ end
+ if level == nil then
+ level = 0
+ end
+ if i > j then
+ return
+ end
+ local mid = math.floor((i + j) / 2)
+ dag[t[mid]] = {
+ (build_dag(t, dag, i, mid - 1, level + 1)),
+ (build_dag(t, dag, mid + 1, j, level + 1))
+ }
+ return t[mid], dag
+end
+local append_dag
+append_dag = function(node, dag, k)
+ local i = k > node and 2 or 1
+ local next_node = dag[node][i]
+ if next_node then
+ return append_dag(next_node, dag, k)
+ end
+ dag[node][i] = k
+ dag[k] = { }
+end
+local build_dag_from_chars
+build_dag_from_chars = function(s, ...)
+ local t
+ do
+ local _accum_0 = { }
+ local _len_0 = 1
+ for i = 1, #s do
+ _accum_0[_len_0] = s:sub(i, i)
+ _len_0 = _len_0 + 1
+ end
+ t = _accum_0
+ end
+ table.sort(t)
+ local root, dag = build_dag(t)
+ for i = 1, select('#', ...) do
+ append_dag(root, dag, (select(i, ...)))
+ end
+ return root, dag
+end
+local check_terminals
+check_terminals = function(dag, s)
+ for i = 1, #s do
+ local k = s:sub(i, i)
+ assert(not dag[k][1], ('%s has left child node'):format(k))
+ assert(not dag[k][2], ('%s has right child node'):format(k))
+ end
+end
+local dump
+dump = function(f, root, dag, level)
+ if level == nil then
+ level = 0
+ end
+ if dag[root][1] then
+ dump(f, dag[root][1], dag, level + 1)
+ end
+ f:write((' '):rep(level))
+ f:write(root)
+ f:write('\n')
+ if dag[root][2] then
+ return dump(f, dag[root][2], dag, level + 1)
+ end
+end
+local write_opcode_tree
+do
+ local byte_to_opcode = { }
+ local byte = false
+ for l in assert(io.lines('src/assembler.c')) do
+ if l:match('^%s*char%s+ops%[%]%[4%]') then
+ byte = 0
+ elseif l:match('%}') then
+ byte = false
+ elseif byte then
+ for opcode in l:gmatch('"([A-Z-][A-Z-][A-Z-])"') do
+ byte_to_opcode[byte] = opcode
+ byte = byte + 1
+ end
+ end
+ end
+ local order_to_opcode
+ do
+ local _accum_0 = { }
+ local _len_0 = 1
+ for i = 0, #byte_to_opcode do
+ if byte_to_opcode[i] ~= '---' then
+ _accum_0[_len_0] = byte_to_opcode[i]
+ _len_0 = _len_0 + 1
+ end
+ end
+ order_to_opcode = _accum_0
+ end
+ table.sort(order_to_opcode)
+ local root, opcode_to_links = build_dag(order_to_opcode)
+ write_opcode_tree = function(f)
+ for i = 0, #byte_to_opcode do
+ local opcode = byte_to_opcode[i]
+ f:write('\t')
+ if opcode == root then
+ f:write('$root ')
+ elseif opcode ~= '---' then
+ f:write(('$op-%s '):format(opcode:lower()))
+ else
+ f:write(' ')
+ end
+ for j = 1, 2 do
+ if opcode ~= '---' and opcode_to_links[opcode][j] then
+ f:write(('.$op-%s '):format(opcode_to_links[opcode][j]:lower()))
+ else
+ f:write('[ 0000 ] ')
+ end
+ end
+ if i == 0 then
+ f:write('$disasm ')
+ else
+ f:write(' ')
+ end
+ if opcode ~= '---' then
+ f:write(('[ %s ]'):format(opcode))
+ else
+ f:write('[ ??? ]')
+ end
+ if i == 0 then
+ f:write(' $asm')
+ end
+ f:write('\n')
+ end
+ end
+end
+local type_byte
+type_byte = function(size, has_subtree)
+ local n1 = has_subtree and '8' or '0'
+ local n2
+ local _exp_0 = size
+ if '1' == _exp_0 then
+ n2 = '1'
+ elseif '2' == _exp_0 then
+ n2 = '3'
+ else
+ n2 = '0'
+ end
+ return n1 .. n2
+end
+local globals = { }
+local add_globals
+add_globals = function(root, dag, key_to_label, key_to_contents, pad_before, pad_after)
+ if pad_before == nil then
+ pad_before = ''
+ end
+ if pad_after == nil then
+ pad_after = ''
+ end
+ for k in pairs(dag) do
+ local l = ''
+ if k == root then
+ l = l .. ('@%s\n'):format(key_to_label('root'):gsub('%s', ''))
+ end
+ l = l .. ('@%s '):format(key_to_label(k))
+ for j = 1, 2 do
+ if dag[k][j] then
+ l = l .. ('.%s '):format(key_to_label(dag[k][j]))
+ else
+ l = l .. ('%s[ 0000 ]%s '):format(pad_before, pad_after)
+ end
+ end
+ l = l .. key_to_contents(k)
+ l = l .. '\n'
+ globals[key_to_label(k):gsub('%s', '')] = l
+ end
+ globals[key_to_label('root'):gsub('%s', '')] = ''
+end
+do
+ local root, dag = build_dag_from_chars('{}[]%@$;|=~,.^#"\0', '(', ')')
+ check_terminals(dag, ')')
+ local convert = {
+ ['.'] = 'dot',
+ ['\0'] = 'nul'
+ }
+ local label_name
+ label_name = function(s)
+ return ('first-char-%-3s'):format(convert[s] or s)
+ end
+ local label_value
+ label_value = function(k)
+ return ('[ %02x ]'):format(k:byte())
+ end
+ add_globals(root, dag, label_name, label_value, ' ', ' ')
+end
+local devices = { }
+local add_device
+add_device = function(name, fields)
+ local field_sizes
+ do
+ local _tbl_0 = { }
+ for k, size in fields:gmatch('(%S+) (%d+)') do
+ _tbl_0[k] = size
+ end
+ field_sizes = _tbl_0
+ end
+ field_sizes.pad = nil
+ local field_names
+ do
+ local _accum_0 = { }
+ local _len_0 = 1
+ for k in pairs(field_sizes) do
+ _accum_0[_len_0] = k
+ _len_0 = _len_0 + 1
+ end
+ field_names = _accum_0
+ end
+ table.sort(field_names)
+ local root, dag = build_dag(field_names)
+ local label_name
+ label_name = function(k)
+ return ('l-%-14s'):format(name .. '-' .. k)
+ end
+ local label_value
+ label_value = function(k)
+ return ('%-17s [ %s ] .%s.%s'):format(('[ %s 00 ]'):format(k), type_byte(field_sizes[k], false), name, k)
+ end
+ add_globals(root, dag, label_name, label_value, ' ', ' ')
+ return table.insert(devices, name)
+end
+local add_devices
+add_devices = function()
+ table.sort(devices)
+ local root, dag = build_dag(devices)
+ local label_name
+ label_name = function(k)
+ return ('l-%-14s'):format(k)
+ end
+ local label_value
+ label_value = function(k)
+ return ('%-17s [ %s ] .%s .l-%s-root'):format(('[ %s 00 ]'):format(k), type_byte(0, true), k, k)
+ end
+ return add_globals(root, dag, label_name, label_value, ' ', ' ')
+end
+local filename = 'projects/software/assembler.usm'
+local f = assert(io.open(('%s.tmp'):format(filename), 'w'))
+local state = 'normal'
+local machine = {
+ normal = function(l)
+ if l:match('%$disasm .*%$asm') then
+ write_opcode_tree(f)
+ state = 'opcode'
+ elseif l:match('^%@') then
+ if l == '@RESET' then
+ add_devices()
+ end
+ for k in l:gmatch('%@(%S+)') do
+ if globals[k] then
+ f:write(globals[k])
+ globals[k] = nil
+ return
+ end
+ end
+ f:write(l)
+ return f:write('\n')
+ else
+ if l:match('^%|%x%x%x%x %;') then
+ add_device(l:match('%;(%S+) %{ (.*) %}'))
+ end
+ f:write(l)
+ return f:write('\n')
+ end
+ end,
+ opcode = function(l)
+ if not l:match('%[') then
+ f:write(l)
+ f:write('\n')
+ state = 'normal'
+ end
+ end
+}
+for l in assert(io.lines(filename)) do
+ machine[state](l)
+end
+for _, l in pairs(globals) do
+ f:write(l)
+end
+f:close()
+assert(0 == os.execute(('mv %s %s.bak'):format(filename, filename)))
+return assert(0 == os.execute(('mv %s.tmp %s'):format(filename, filename)))
diff --git a/etc/assembler-trees.moon b/etc/assembler-trees.moon
@@ -0,0 +1,180 @@
+build_dag = (t, dag = {}, i = 1, j = #t, level = 0) ->
+ if i > j
+ return
+ mid = math.floor (i + j) / 2
+ dag[t[mid]] = {
+ (build_dag t, dag, i, mid - 1, level + 1)
+ (build_dag t, dag, mid + 1, j, level + 1)
+ }
+ t[mid], dag
+append_dag = (node, dag, k) ->
+ i = k > node and 2 or 1
+ next_node = dag[node][i]
+ if next_node
+ return append_dag next_node, dag, k
+ dag[node][i] = k
+ dag[k] = {}
+build_dag_from_chars = (s, ...) ->
+ t = [ s\sub i, i for i = 1, #s ]
+ table.sort t
+ root, dag = build_dag t
+ for i = 1, select '#', ...
+ append_dag root, dag, (select i, ...)
+ return root, dag
+check_terminals = (dag, s) ->
+ for i = 1, #s
+ k = s\sub i, i
+ assert not dag[k][1], '%s has left child node'\format k
+ assert not dag[k][2], '%s has right child node'\format k
+dump = (f, root, dag, level = 0) ->
+ if dag[root][1]
+ dump f, dag[root][1], dag, level + 1
+ f\write ' '\rep level
+ f\write root
+ f\write '\n'
+ if dag[root][2]
+ dump f, dag[root][2], dag, level + 1
+
+-- deal with opcodes
+
+write_opcode_tree = do
+ byte_to_opcode = {}
+ byte = false
+ for l in assert io.lines 'src/assembler.c'
+ if l\match '^%s*char%s+ops%[%]%[4%]'
+ byte = 0
+ elseif l\match '%}'
+ byte = false
+ elseif byte
+ for opcode in l\gmatch '"([A-Z-][A-Z-][A-Z-])"'
+ byte_to_opcode[byte] = opcode
+ byte += 1
+ order_to_opcode = [ byte_to_opcode[i] for i = 0, #byte_to_opcode when byte_to_opcode[i] != '---' ]
+ table.sort order_to_opcode
+ root, opcode_to_links = build_dag order_to_opcode
+ (f) ->
+ for i = 0, #byte_to_opcode
+ opcode = byte_to_opcode[i]
+ f\write '\t'
+ if opcode == root
+ f\write '$root '
+ elseif opcode != '---'
+ f\write '$op-%s '\format opcode\lower!
+ else
+ f\write ' '
+ for j = 1, 2
+ if opcode != '---' and opcode_to_links[opcode][j]
+ f\write '.$op-%s '\format opcode_to_links[opcode][j]\lower!
+ else
+ f\write '[ 0000 ] '
+ if i == 0
+ f\write '$disasm '
+ else
+ f\write ' '
+ if opcode != '---'
+ f\write '[ %s ]'\format opcode
+ else
+ f\write '[ ??? ]'
+ if i == 0
+ f\write ' $asm'
+ f\write '\n'
+
+type_byte = (size, has_subtree) ->
+ n1 = has_subtree and '8' or '0'
+ n2 = switch size
+ when '1'
+ '1'
+ when '2'
+ '3'
+ else
+ '0'
+ n1 .. n2
+
+globals = {}
+
+add_globals = (root, dag, key_to_label, key_to_contents, pad_before = '', pad_after = '') ->
+ for k in pairs dag
+ l = ''
+ if k == root
+ l ..= '@%s\n'\format key_to_label('root')\gsub '%s', ''
+ l ..= '@%s '\format key_to_label k
+ for j = 1, 2
+ if dag[k][j]
+ l ..= '.%s '\format key_to_label dag[k][j]
+ else
+ l ..= '%s[ 0000 ]%s '\format pad_before, pad_after
+ l ..= key_to_contents k
+ l ..= '\n'
+ globals[key_to_label(k)\gsub '%s', ''] = l
+ globals[key_to_label('root')\gsub '%s', ''] = ''
+
+do
+ root, dag = build_dag_from_chars '{}[]%@$;|=~,.^#"\0', '(', ')'
+ check_terminals dag, ')'
+-- dump io.stdout, root, dag
+ convert = {
+ ['.']: 'dot'
+ ['\0']: 'nul'
+ }
+ label_name = (s) -> 'first-char-%-3s'\format convert[s] or s
+ label_value = (k) -> '[ %02x ]'\format k\byte!
+ add_globals root, dag, label_name, label_value, ' ', ' '
+
+devices = {}
+
+add_device = (name, fields) ->
+ field_sizes = { k, size for k, size in fields\gmatch '(%S+) (%d+)' }
+ field_sizes.pad = nil
+ field_names = [ k for k in pairs field_sizes ]
+ table.sort field_names
+ root, dag = build_dag field_names
+ label_name = (k) -> 'l-%-14s'\format name .. '-' .. k
+ label_value = (k) -> '%-17s [ %s ] .%s.%s'\format '[ %s 00 ]'\format(k), type_byte(field_sizes[k], false), name, k
+ add_globals root, dag, label_name, label_value, ' ', ' '
+ table.insert devices, name
+
+add_devices = ->
+ table.sort devices
+ root, dag = build_dag devices
+ label_name = (k) -> 'l-%-14s'\format k
+ label_value = (k) -> '%-17s [ %s ] .%s .l-%s-root'\format '[ %s 00 ]'\format(k), type_byte(0, true), k, k
+ add_globals root, dag, label_name, label_value, ' ', ' '
+
+filename = 'projects/software/assembler.usm'
+
+f = assert io.open '%s.tmp'\format(filename), 'w'
+-- f = io.stdout
+state = 'normal'
+machine =
+ normal: (l) ->
+ if l\match '%$disasm .*%$asm'
+ write_opcode_tree f
+ state = 'opcode'
+ elseif l\match '^%@'
+ if l == '@RESET'
+ add_devices!
+ for k in l\gmatch '%@(%S+)'
+ if globals[k]
+ f\write globals[k]
+ globals[k] = nil
+ return
+ f\write l
+ f\write '\n'
+ else
+ if l\match '^%|%x%x%x%x %;'
+ add_device l\match '%;(%S+) %{ (.*) %}'
+ f\write l
+ f\write '\n'
+ opcode: (l) ->
+ if not l\match '%['
+ f\write l
+ f\write '\n'
+ state = 'normal'
+for l in assert io.lines filename
+ machine[state] l
+for _, l in pairs globals
+ f\write l
+f\close!
+assert 0 == os.execute 'mv %s %s.bak'\format filename, filename
+assert 0 == os.execute 'mv %s.tmp %s'\format filename, filename
+
diff --git a/projects/software/assembler.usm b/projects/software/assembler.usm
@@ -0,0 +1,465 @@
+;tree { search-key 2 max-key-len 1 }
+;assembler { pass 1 state 1 token 2 scope-len 1 scope 80 }
+
+%HCF { #0000 DIV }
+
+( devices )
+
+|0100 ;Console { pad 8 char 1 byte 1 short 2 string 2 }
+|0110 ;Screen { width 2 height 2 pad 4 x 2 y 2 color 1 }
+|0120 ;Sprite { pad 8 x 2 y 2 addr 2 color 1 }
+|0130 ;Controller { p1 1 }
+|0140 ;Keys { key 1 }
+|0150 ;Mouse { x 2 y 2 state 1 chord 1 }
+|0160 ;File { pad 8 name 2 length 2 load 2 save 2 }
+|01F0 ;System { pad 8 r 2 g 2 b 2 }
+
+( vectors )
+
+|0200 ,RESET JMP2
+|0204 BRK
+|0208 BRK
+
+@RESET
+ #b000 #c000 #0010 ,memcpy JSR2
+ HCF
+
+ ,$token ,strlen JSR2
+ HCF
+
+ #00
+ $loop
+ DUP ,highest-bit JSR2
+ ( )
+ POP
+ #01 ADD
+ DUP ^$loop JNZ
+ POP
+
+
+ ,$token ^assemble-token JSR
+ ,$token2 ^assemble-token JSR
+ ,$token3 ^assemble-token JSR
+ ~assembler.state
+ HCF
+
+ $token [ hello 00 ]
+ $token2 [ 00 ]
+ $token3 [ 00 ]
+
+@assemble-tokens ( string-ptr* -- )
+ DUP2 ^assemble-token JSR
+
+@assemble-token ( string-ptr* -- )
+ ( get location of tree )
+ DUP2
+ ,state-machine-pointers #00 ~assembler.state ,highest-bit JSR2 #0004 MUL2 ADD2
+ DUP2 STH2
+ ( see if first char is recognised )
+ SWP2 #01 ,traverse-tree JSR2
+ ^$not-found JNZ
+ ( skip first character of token )
+ SWP2 #0001 ADD2 =assembler.token
+ ( tail call handling function defined in tree )
+ POP2r JMP2
+
+ $not-found
+ ( not interested in incoming-ptr )
+ POP2
+ =assembler.token
+ ( tail call default handling function defined in state-machine-pointers )
+ LIT2r [ 0002 ] ADD2r LDR2r
+ JMP2r
+
+@parse-hex-length ( string-ptr* -- value 01 if one or two hex digits
+ OR 00 otherwise )
+ DUP2 #0001 ADD2 PEK2 ^parse-hex-string-try-two JNZ
+ PEK2 ^parse-hex-digit JSR DUP #04 SFT ^parse-hex-string-fail1 JNZ
+ #01 JMP2r
+
+@parse-hex-string ( string-ptr* -- value* 02 if four hex digits
+ OR value 01 if two hex digits
+ OR 00 otherwise )
+ DUP2 #0004 ADD2 PEK2 #00 EQU ^$try-four JNZ
+ $try-two
+ DUP2 #0002 ADD2 PEK2 ^$fail2 JNZ
+ $known-two
+ DUP2 PEK2 ^parse-hex-digit JSR DUP #04 SFT ^$fail3 JNZ ROT ROT
+ #0001 ADD2 PEK2 ^parse-hex-digit JSR DUP #04 SFT ^$fail2 JNZ
+ SWP #40 SFT ORA #01 JMP2r
+
+ $fail3 POP
+ $fail2 POP
+ $fail1 POP #00 JMP2r
+
+ $try-four
+ DUP2 #0002 ADD2 ^$known-two JSR ^$maybe-four JNZ
+ ^$try-two JMP
+
+ $maybe-four
+ ROT ROT ^$known-two JSR ^$four JNZ
+ ^$fail1 JMP
+
+ $four
+ SWP #02 JMP2r
+
+@parse-hex-digit ( charcode -- 00-0f if valid hex
+ -- 10-ff otherwise )
+ DUP #3a LTH ^$digit JNZ
+ DUP #60 GTH ^$lowercase JNZ
+ DUP #40 GTH ^$uppercase JNZ
+ JMP2r
+
+ $digit ( #30 is #00 )
+ #30 SUB JMP2r
+
+ $lowercase ( #61 is #0a )
+ #57 SUB JMP2r
+
+ $uppercase ( #41 is #0a )
+ #37 SUB JMP2r
+
+@find-opcode ( name* -- byte 00 if valid opcode name
+ OR 01 if not found )
+ ,opcodes-tree SWP2 #03 ^traverse-tree JSR
+ ^$nomatch JNZ
+ ,opcodes-asm SUB2 #0007 DIV2
+ SWP JMP2r
+
+ $nomatch
+ DUP2 EQU2 JMP2r
+
+@traverse-tree ( tree-ptr* search-key* max-key-len --
+ binary-ptr* 00 if key matched
+ OR incoming-ptr* 01 if key not found )
+ =tree.max-key-len =tree.search-key
+
+ $loop
+ DUP2 LDR2 #0000 NEQ2 ^$valid-node JNZ
+ #01 JMP2r
+
+ $valid-node
+ LDR2 DUP2 STH2 #0004 ADD2 ^strcmp-tree JSR
+ DUP ^$nomatch JNZ
+ POP2r JMP2r
+
+ $nomatch
+ #07 SFT #02 MUL #00 SWP
+ STH2r ADD2
+ ^$loop JMP
+
+@strcmp-tree ( node-key* -- order if strings differ
+ OR after-node-key* 00 if strings match )
+ ~tree.search-key STH2
+ ~tree.max-key-len
+
+ $loop ( node-key* key-len in wst, search-key* in rst )
+ DUP ^$keep-going JNZ
+
+ ( exhausted key-len, match found )
+ POP2r
+ JMP2r
+
+ $keep-going
+ #01 OVR2 PEK2 DUP2r PEK2r STHr
+ DUP2 ORA ^$not-end JNZ
+
+ ( end of C strings, match found )
+ POP2r POP ROT POP SWP ADD2 #00
+ JMP2r
+
+ $not-end
+ SUB DUP ^$nomatch JNZ
+ POP SUB
+ LIT2r [ 0001 ] ADD2r STH
+ LIT2 [ 0001 ] ADD2 STHr
+ ^$loop JMP
+
+ $nomatch
+ STH POP2 POP2 STHr POP2r
+ JMP2r
+
+@memcpy ( src-ptr* dest-ptr* length* -- )
+ SWP2 STH2
+
+ $loop
+ DUP2 ORA ^$keep-going JNZ
+ POP2 POP2 POP2r
+ JMP2r
+
+ $keep-going
+ #0001 SUB2
+ SWP2 DUP2 PEK2 DUP2r STH2r POK2
+ #0001 ADD2 SWP2
+ LIT2r [ 0001 ] ADD2r
+ ^$loop JMP
+
+@strlen ( string-ptr* -- length* )
+ DUP2 #0001 SUB2
+ $loop
+ #0001 ADD2
+ DUP2 PEK2 ^$loop JNZ
+ SWP2 SUB2
+ JMP2r
+
+
+
+
+@add-label ( string-ptr* label-flags -- )
+ ( NYI )
+ POP POP2 JMP2r
+
+@highest-bit ( n -- 00 if n is 00
+ OR 01 if n is 01
+ OR 02 if n is 02..03
+ OR 03 if n is 04..07
+ OR 04 if n is 08..0f
+ ..
+ OR 08 if n is 80..ff )
+ DUP #00 NEQ JMP JMP2r
+ DUP #01 SFT ORA
+ DUP #02 SFT ORA
+ DUP #04 SFT ORA
+ #1d MUL #05 SFT #00 SWP ,$lookup ADD2 PEK2
+ JMP2r
+
+ $lookup
+ [ 01 06 02 07 05 04 03 08 ]
+
+@opcodes
+ (
+ The code for this section is automatically generated, and needs to be
+ regenerated when the opcode list in src/assembler.c is updated.
+
+ After editing src/assembler.c, run "lua etc/assembler-trees.lua"
+ and this file will be edited automatically.
+
+ This is the first example of a binary tree in this code, so let's
+ explore them in general. The format of a tree node in memory is:
+
+ left-node* right-node* node-key-cstring binary-data
+
+ and the general algorithm is to compare the key you're looking for
+ against node-key-cstring, and move to the node pointed to by left-node*
+ or right-node* if the keys don't match. If your key sorts earlier than
+ use left-node*, otherwise go to right-node*. When you find a node that
+ matches your key, traverse-bintree gives you a pointer to the
+ binary-data straight after the node-key-cstring. This data can contain
+ anything you want: fixed length fields, executable code... in this case
+ of this opcode tree, we store nothing. traverse-bintree is passed the
+ maximum length of node-key-cstring, not including the zero, so the zero
+ can be omitted if the string is at that maximum length.
+
+ If the key isn't present in the tree, you'll eventually get to a node
+ where the left-node* or right-node* pointer you'll need to follow is
+ null (0000). traverse-bintree will give you the location of that
+ pointer, so if you want to insert another node, you can write it to the
+ heap and overwrite the pointer with the new node's location. This
+ approach works even if the tree is completely empty and the pointer
+ you've provided to the root node is null, since that pointer gets
+ updated to point to the first node without needing any special logic.
+
+ The ordering of nodes in memory is totally arbitrary, so for pre-
+ prepared trees like this one we can have our own meaning for the order
+ of the nodes. By ordering the opcodes by their byte value, we can find
+ the byte by subtracting $asm from the binary-data pointer and dividing
+ by seven (the size of each node). By multiplying the byte value by seven
+ and adding to $disasm, we get the opcode name when disassembling too.
+ )
+ $tree .$root
+ $op-brk .$op-add .$op-dup $disasm [ BRK ] $asm
+ $op-nop .$op-mul .$op-ovr [ NOP ]
+ $op-lit [ 0000 ] [ 0000 ] [ LIT ]
+ $op-pop [ 0000 ] [ 0000 ] [ POP ]
+ $op-dup .$op-div .$op-eor [ DUP ]
+ $op-swp [ 0000 ] [ 0000 ] [ SWP ]
+ $op-ovr .$op-ora .$op-pek [ OVR ]
+ $op-rot .$op-pop .$op-sft [ ROT ]
+ $op-equ .$op-brk .$op-jnz [ EQU ]
+ $op-neq [ 0000 ] [ 0000 ] [ NEQ ]
+ $op-gth [ 0000 ] [ 0000 ] [ GTH ]
+ $root .$op-equ .$op-pok [ LTH ]
+ $op-gts .$op-gth .$op-jmp [ GTS ]
+ $op-lts [ 0000 ] [ 0000 ] [ LTS ]
+ [ 0000 ] [ 0000 ] [ ??? ]
+ [ 0000 ] [ 0000 ] [ ??? ]
+ $op-pek [ 0000 ] [ 0000 ] [ PEK ]
+ $op-pok .$op-nop .$op-sth [ POK ]
+ $op-ldr .$op-jsr .$op-lit [ LDR ]
+ $op-str [ 0000 ] [ 0000 ] [ STR ]
+ $op-jmp [ 0000 ] [ 0000 ] [ JMP ]
+ $op-jnz .$op-gts .$op-ldr [ JNZ ]
+ $op-jsr [ 0000 ] [ 0000 ] [ JSR ]
+ $op-sth .$op-rot .$op-sub [ STH ]
+ $op-add [ 0000 ] .$op-and [ ADD ]
+ $op-sub .$op-str .$op-swp [ SUB ]
+ $op-mul .$op-lts .$op-neq [ MUL ]
+ $op-div [ 0000 ] [ 0000 ] [ DIV ]
+ $op-and [ 0000 ] [ 0000 ] [ AND ]
+ $op-ora [ 0000 ] [ 0000 ] [ ORA ]
+ $op-eor [ 0000 ] [ 0000 ] [ EOR ]
+ $op-sft [ 0000 ] [ 0000 ] [ SFT ]
+
+@state-machine-pointers
+( normal mode 00 )
+.first-char-root .nyi
+( FIXME 01 )
+.nyi .nyi
+( FIXME 02 )
+.nyi .nyi
+( FIXME 04 )
+.nyi .nyi
+( FIXME 08 )
+.nyi .nyi
+( FIXME 10 )
+.nyi .nyi
+( literal data 20 )
+[ 0000 ] .nyi
+( FIXME 40 )
+.nyi .nyi
+( comment 80 )
+.first-char-) .ignore
+
+(
+ Next up, we have the tree of code corresponding to each token's
+ first character. Here we do have a binary payload, which is
+ the code to run when the assembler considers the token.
+
+ Some special assembler modes have their own trees. Since comments
+ have a very simple tree that only understands the end of comments,
+ we reuse the terminal branch of the main tree as the root of
+ the comment tree.
+)
+
+(
+ Left and right parentheses start and end comment sections. They use the
+ highest bit in assembler state, so they receive highest priority: it
+ doesn't matter what other bits are set, a comment's a comment.
+)
+
+@first-char-( [ 0000 ] .first-char-) [ 28 ]
+ ~assembler.state #80 ORA =assembler.state
+JMP2r
+
+@first-char-) [ 0000 ] [ 0000 ] [ 29 ]
+ ~assembler.state #7f AND =assembler.state
+JMP2r
+
+(
+ Left and right square brackets start and end literal data sections.
+)
+
+@first-char-[ .first-char-@ .first-char-] [ 5b ]
+ ~assembler.state #20 ORA =assembler.state
+JMP2r
+
+@first-char-] [ 0000 ] [ 0000 ] [ 5d ]
+ ~assembler.state #df AND =assembler.state
+JMP2r
+
+(
+ Ampersands introduce global labels, and define the scope for any
+ local labels that follow.
+)
+
+@first-char-@ [ 0000 ] [ 0000 ] [ 40 ]
+ ~assembler.pass ^$scope JNZ
+ DUP2 #00 ,add-label JSR2
+
+ $scope
+ DUP2 ,strlen JSR2
+ DUP2 =assembler.scope-len POP
+ ,assembler.scope SWP2 JMP2
+
+@first-char-root
+@first-char-= .first-char-$ .first-char-^ [ 3d ]
+@first-char-" .first-char-nul .first-char-# [ 22 ]
+@first-char-# [ 0000 ] [ 0000 ] [ 23 ]
+@first-char-$ .first-char-" .first-char-, [ 24 ]
+@first-char-% [ 0000 ] .first-char-( [ 25 ]
+@first-char-, .first-char-% .first-char-dot [ 2c ]
+@first-char-dot [ 0000 ] .first-char-; [ 2e ]
+@first-char-; [ 0000 ] [ 0000 ] [ 3b ]
+@first-char-^ .first-char-[ .first-char-| [ 5e ]
+@first-char-{ [ 0000 ] [ 0000 ] [ 7b ]
+@first-char-| .first-char-{ .first-char-} [ 7c ]
+@first-char-} [ 0000 ] .first-char-~ [ 7d ]
+@first-char-~ [ 0000 ] [ 0000 ] [ 7e ]
+
+@first-char-nul [ 0000 ] [ 0000 ] [ 00 ]
+@ignore
+JMP2r
+
+@nyi
+ ,$string =Console.string
+ HCF
+
+ $string [ Not 20 implemented 0a 00 ]
+
+(
+ Here's the big set of trees relating to labels. Starting from l-root, all
+ the devices are stored here, perhaps some helper functions in the future,
+ too.
+
+ left-node* right-node* node-key-cstring binary-data
+
+ The node-keys are terminated with NUL since, unlike the opcodes and first
+ characters, the keys are variable length.
+
+ The binary-data is either three or five bytes long:
+ flags value* [ subtree-pointer* ]
+
+ The flags byte is divided up into bits:
+
+ bit 0: 01 means load or store helpers can be used,
+ bit 1: 02 means the helpers use STR/LDR, 00 means they use POK/PEK;
+ bits 2-6 are reserved; and
+ bit 7: 80 means there is a subtree.
+
+ If there is a subtree, it is searched when the reference contains a dot.
+)
+
+@l-Console [ 0000 ] [ 0000 ] [ Console 00 ] [ 80 ] .Console .l-Console-root
+@l-Console-byte [ 0000 ] [ 0000 ] [ byte 00 ] [ 01 ] .Console.byte
+@l-Console-root
+@l-Console-char .l-Console-byte .l-Console-short [ char 00 ] [ 01 ] .Console.char
+@l-Console-short [ 0000 ] .l-Console-string [ short 00 ] [ 03 ] .Console.short
+@l-Console-string [ 0000 ] [ 0000 ] [ string 00 ] [ 03 ] .Console.string
+@l-Controller .l-Console .l-File [ Controller 00 ] [ 80 ] .Controller .l-Controller-root
+@l-Controller-root
+@l-Controller-p1 [ 0000 ] [ 0000 ] [ p1 00 ] [ 01 ] .Controller.p1
+@l-File [ 0000 ] [ 0000 ] [ File 00 ] [ 80 ] .File .l-File-root
+@l-File-length [ 0000 ] [ 0000 ] [ length 00 ] [ 03 ] .File.length
+@l-File-root
+@l-File-load .l-File-length .l-File-name [ load 00 ] [ 03 ] .File.load
+@l-File-name [ 0000 ] .l-File-save [ name 00 ] [ 03 ] .File.name
+@l-File-save [ 0000 ] [ 0000 ] [ save 00 ] [ 03 ] .File.save
+@l-root
+@l-Keys .l-Controller .l-Screen [ Keys 00 ] [ 80 ] .Keys .l-Keys-root
+@l-Keys-root
+@l-Keys-key [ 0000 ] [ 0000 ] [ key 00 ] [ 01 ] .Keys.key
+@l-Mouse [ 0000 ] [ 0000 ] [ Mouse 00 ] [ 80 ] .Mouse .l-Mouse-root
+@l-Mouse-chord [ 0000 ] [ 0000 ] [ chord 00 ] [ 01 ] .Mouse.chord
+@l-Mouse-root
+@l-Mouse-state .l-Mouse-chord .l-Mouse-x [ state 00 ] [ 01 ] .Mouse.state
+@l-Mouse-x [ 0000 ] .l-Mouse-y [ x 00 ] [ 03 ] .Mouse.x
+@l-Mouse-y [ 0000 ] [ 0000 ] [ y 00 ] [ 03 ] .Mouse.y
+@l-Screen .l-Mouse .l-Sprite [ Screen 00 ] [ 80 ] .Screen .l-Screen-root
+@l-Screen-color [ 0000 ] .l-Screen-height [ color 00 ] [ 01 ] .Screen.color
+@l-Screen-height [ 0000 ] [ 0000 ] [ height 00 ] [ 03 ] .Screen.height
+@l-Screen-root
+@l-Screen-width .l-Screen-color .l-Screen-x [ width 00 ] [ 03 ] .Screen.width
+@l-Screen-x [ 0000 ] .l-Screen-y [ x 00 ] [ 03 ] .Screen.x
+@l-Screen-y [ 0000 ] [ 0000 ] [ y 00 ] [ 03 ] .Screen.y
+@l-Sprite [ 0000 ] .l-System [ Sprite 00 ] [ 80 ] .Sprite .l-Sprite-root
+@l-Sprite-addr [ 0000 ] [ 0000 ] [ addr 00 ] [ 03 ] .Sprite.addr
+@l-Sprite-root
+@l-Sprite-color .l-Sprite-addr .l-Sprite-x [ color 00 ] [ 01 ] .Sprite.color
+@l-Sprite-x [ 0000 ] .l-Sprite-y [ x 00 ] [ 03 ] .Sprite.x
+@l-Sprite-y [ 0000 ] [ 0000 ] [ y 00 ] [ 03 ] .Sprite.y
+@l-System [ 0000 ] [ 0000 ] [ System 00 ] [ 80 ] .System .l-System-root
+@l-System-b [ 0000 ] [ 0000 ] [ b 00 ] [ 03 ] .System.b
+@l-System-root
+@l-System-g .l-System-b .l-System-r [ g 00 ] [ 03 ] .System.g
+@l-System-r [ 0000 ] [ 0000 ] [ r 00 ] [ 03 ] .System.r
+