commit 6b24c002a7b1fe509d8fd6d8577d938020a15056
parent 40e5f2b539d730729dc261b22db36eade9179a7b
Author: Andrew Alderwick <andrew@alderwick.co.uk>
Date:   Wed, 31 Mar 2021 23:55:02 +0100
Add beginnings of assembler project.
Diffstat:
4 files changed, 943 insertions(+), 2 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -4,4 +4,5 @@
 *gif~
 *bmp~
 /bin
-*io.bit
-\ No newline at end of file
+*io.bit
+*.bak
+\ No newline at end of file
diff --git a/etc/assembler-trees.lua b/etc/assembler-trees.lua
@@ -0,0 +1,295 @@
+local build_dag
+build_dag = function(t, dag, i, j, level)
+  if dag == nil then
+    dag = { }
+  end
+  if i == nil then
+    i = 1
+  end
+  if j == nil then
+    j = #t
+  end
+  if level == nil then
+    level = 0
+  end
+  if i > j then
+    return 
+  end
+  local mid = math.floor((i + j) / 2)
+  dag[t[mid]] = {
+    (build_dag(t, dag, i, mid - 1, level + 1)),
+    (build_dag(t, dag, mid + 1, j, level + 1))
+  }
+  return t[mid], dag
+end
+local append_dag
+append_dag = function(node, dag, k)
+  local i = k > node and 2 or 1
+  local next_node = dag[node][i]
+  if next_node then
+    return append_dag(next_node, dag, k)
+  end
+  dag[node][i] = k
+  dag[k] = { }
+end
+local build_dag_from_chars
+build_dag_from_chars = function(s, ...)
+  local t
+  do
+    local _accum_0 = { }
+    local _len_0 = 1
+    for i = 1, #s do
+      _accum_0[_len_0] = s:sub(i, i)
+      _len_0 = _len_0 + 1
+    end
+    t = _accum_0
+  end
+  table.sort(t)
+  local root, dag = build_dag(t)
+  for i = 1, select('#', ...) do
+    append_dag(root, dag, (select(i, ...)))
+  end
+  return root, dag
+end
+local check_terminals
+check_terminals = function(dag, s)
+  for i = 1, #s do
+    local k = s:sub(i, i)
+    assert(not dag[k][1], ('%s has left child node'):format(k))
+    assert(not dag[k][2], ('%s has right child node'):format(k))
+  end
+end
+local dump
+dump = function(f, root, dag, level)
+  if level == nil then
+    level = 0
+  end
+  if dag[root][1] then
+    dump(f, dag[root][1], dag, level + 1)
+  end
+  f:write(('    '):rep(level))
+  f:write(root)
+  f:write('\n')
+  if dag[root][2] then
+    return dump(f, dag[root][2], dag, level + 1)
+  end
+end
+local write_opcode_tree
+do
+  local byte_to_opcode = { }
+  local byte = false
+  for l in assert(io.lines('src/assembler.c')) do
+    if l:match('^%s*char%s+ops%[%]%[4%]') then
+      byte = 0
+    elseif l:match('%}') then
+      byte = false
+    elseif byte then
+      for opcode in l:gmatch('"([A-Z-][A-Z-][A-Z-])"') do
+        byte_to_opcode[byte] = opcode
+        byte = byte + 1
+      end
+    end
+  end
+  local order_to_opcode
+  do
+    local _accum_0 = { }
+    local _len_0 = 1
+    for i = 0, #byte_to_opcode do
+      if byte_to_opcode[i] ~= '---' then
+        _accum_0[_len_0] = byte_to_opcode[i]
+        _len_0 = _len_0 + 1
+      end
+    end
+    order_to_opcode = _accum_0
+  end
+  table.sort(order_to_opcode)
+  local root, opcode_to_links = build_dag(order_to_opcode)
+  write_opcode_tree = function(f)
+    for i = 0, #byte_to_opcode do
+      local opcode = byte_to_opcode[i]
+      f:write('\t')
+      if opcode == root then
+        f:write('$root   ')
+      elseif opcode ~= '---' then
+        f:write(('$op-%s '):format(opcode:lower()))
+      else
+        f:write('        ')
+      end
+      for j = 1, 2 do
+        if opcode ~= '---' and opcode_to_links[opcode][j] then
+          f:write(('.$op-%s '):format(opcode_to_links[opcode][j]:lower()))
+        else
+          f:write('[ 0000 ] ')
+        end
+      end
+      if i == 0 then
+        f:write('$disasm ')
+      else
+        f:write('        ')
+      end
+      if opcode ~= '---' then
+        f:write(('[ %s ]'):format(opcode))
+      else
+        f:write('[ ??? ]')
+      end
+      if i == 0 then
+        f:write(' $asm')
+      end
+      f:write('\n')
+    end
+  end
+end
+local type_byte
+type_byte = function(size, has_subtree)
+  local n1 = has_subtree and '8' or '0'
+  local n2
+  local _exp_0 = size
+  if '1' == _exp_0 then
+    n2 = '1'
+  elseif '2' == _exp_0 then
+    n2 = '3'
+  else
+    n2 = '0'
+  end
+  return n1 .. n2
+end
+local globals = { }
+local add_globals
+add_globals = function(root, dag, key_to_label, key_to_contents, pad_before, pad_after)
+  if pad_before == nil then
+    pad_before = ''
+  end
+  if pad_after == nil then
+    pad_after = ''
+  end
+  for k in pairs(dag) do
+    local l = ''
+    if k == root then
+      l = l .. ('@%s\n'):format(key_to_label('root'):gsub('%s', ''))
+    end
+    l = l .. ('@%s '):format(key_to_label(k))
+    for j = 1, 2 do
+      if dag[k][j] then
+        l = l .. ('.%s '):format(key_to_label(dag[k][j]))
+      else
+        l = l .. ('%s[ 0000 ]%s '):format(pad_before, pad_after)
+      end
+    end
+    l = l .. key_to_contents(k)
+    l = l .. '\n'
+    globals[key_to_label(k):gsub('%s', '')] = l
+  end
+  globals[key_to_label('root'):gsub('%s', '')] = ''
+end
+do
+  local root, dag = build_dag_from_chars('{}[]%@$;|=~,.^#"\0', '(', ')')
+  check_terminals(dag, ')')
+  local convert = {
+    ['.'] = 'dot',
+    ['\0'] = 'nul'
+  }
+  local label_name
+  label_name = function(s)
+    return ('first-char-%-3s'):format(convert[s] or s)
+  end
+  local label_value
+  label_value = function(k)
+    return ('[ %02x ]'):format(k:byte())
+  end
+  add_globals(root, dag, label_name, label_value, '  ', '     ')
+end
+local devices = { }
+local add_device
+add_device = function(name, fields)
+  local field_sizes
+  do
+    local _tbl_0 = { }
+    for k, size in fields:gmatch('(%S+) (%d+)') do
+      _tbl_0[k] = size
+    end
+    field_sizes = _tbl_0
+  end
+  field_sizes.pad = nil
+  local field_names
+  do
+    local _accum_0 = { }
+    local _len_0 = 1
+    for k in pairs(field_sizes) do
+      _accum_0[_len_0] = k
+      _len_0 = _len_0 + 1
+    end
+    field_names = _accum_0
+  end
+  table.sort(field_names)
+  local root, dag = build_dag(field_names)
+  local label_name
+  label_name = function(k)
+    return ('l-%-14s'):format(name .. '-' .. k)
+  end
+  local label_value
+  label_value = function(k)
+    return ('%-17s [ %s ] .%s.%s'):format(('[ %s 00 ]'):format(k), type_byte(field_sizes[k], false), name, k)
+  end
+  add_globals(root, dag, label_name, label_value, ' ', '        ')
+  return table.insert(devices, name)
+end
+local add_devices
+add_devices = function()
+  table.sort(devices)
+  local root, dag = build_dag(devices)
+  local label_name
+  label_name = function(k)
+    return ('l-%-14s'):format(k)
+  end
+  local label_value
+  label_value = function(k)
+    return ('%-17s [ %s ] .%s .l-%s-root'):format(('[ %s 00 ]'):format(k), type_byte(0, true), k, k)
+  end
+  return add_globals(root, dag, label_name, label_value, ' ', '        ')
+end
+local filename = 'projects/software/assembler.usm'
+local f = assert(io.open(('%s.tmp'):format(filename), 'w'))
+local state = 'normal'
+local machine = {
+  normal = function(l)
+    if l:match('%$disasm .*%$asm') then
+      write_opcode_tree(f)
+      state = 'opcode'
+    elseif l:match('^%@') then
+      if l == '@RESET' then
+        add_devices()
+      end
+      for k in l:gmatch('%@(%S+)') do
+        if globals[k] then
+          f:write(globals[k])
+          globals[k] = nil
+          return 
+        end
+      end
+      f:write(l)
+      return f:write('\n')
+    else
+      if l:match('^%|%x%x%x%x %;') then
+        add_device(l:match('%;(%S+) %{ (.*) %}'))
+      end
+      f:write(l)
+      return f:write('\n')
+    end
+  end,
+  opcode = function(l)
+    if not l:match('%[') then
+      f:write(l)
+      f:write('\n')
+      state = 'normal'
+    end
+  end
+}
+for l in assert(io.lines(filename)) do
+  machine[state](l)
+end
+for _, l in pairs(globals) do
+  f:write(l)
+end
+f:close()
+assert(0 == os.execute(('mv %s %s.bak'):format(filename, filename)))
+return assert(0 == os.execute(('mv %s.tmp %s'):format(filename, filename)))
diff --git a/etc/assembler-trees.moon b/etc/assembler-trees.moon
@@ -0,0 +1,180 @@
+build_dag = (t, dag = {}, i = 1, j = #t, level = 0) ->
+	if i > j
+		return
+	mid = math.floor (i + j) / 2
+	dag[t[mid]] = {
+		(build_dag t, dag, i, mid - 1, level + 1)
+		(build_dag t, dag, mid + 1, j, level + 1)
+	}
+	t[mid], dag
+append_dag = (node, dag, k) ->
+	i = k > node and 2 or 1
+	next_node = dag[node][i]
+	if next_node
+		return append_dag next_node, dag, k
+	dag[node][i] = k
+	dag[k] = {}
+build_dag_from_chars = (s, ...) ->
+	t = [ s\sub i, i for i = 1, #s ]
+	table.sort t
+	root, dag = build_dag t
+	for i = 1, select '#', ...
+		append_dag root, dag, (select i, ...)
+	return root, dag
+check_terminals = (dag, s) ->
+	for i = 1, #s
+		k = s\sub i, i
+		assert not dag[k][1], '%s has left child node'\format k
+		assert not dag[k][2], '%s has right child node'\format k
+dump = (f, root, dag, level = 0) ->
+	if dag[root][1]
+		dump f, dag[root][1], dag, level + 1
+	f\write '    '\rep level
+	f\write root
+	f\write '\n'
+	if dag[root][2]
+		dump f, dag[root][2], dag, level + 1
+
+-- deal with opcodes
+
+write_opcode_tree = do
+	byte_to_opcode = {}
+	byte = false
+	for l in assert io.lines 'src/assembler.c'
+		if l\match '^%s*char%s+ops%[%]%[4%]'
+			byte = 0
+		elseif l\match '%}'
+			byte = false
+		elseif byte
+			for opcode in l\gmatch '"([A-Z-][A-Z-][A-Z-])"'
+				byte_to_opcode[byte] = opcode
+				byte += 1
+	order_to_opcode = [ byte_to_opcode[i] for i = 0, #byte_to_opcode when byte_to_opcode[i] != '---' ]
+	table.sort order_to_opcode
+	root, opcode_to_links = build_dag order_to_opcode
+	(f) ->
+		for i = 0, #byte_to_opcode
+			opcode = byte_to_opcode[i]
+			f\write '\t'
+			if opcode == root
+				f\write '$root   '
+			elseif opcode != '---'
+				f\write '$op-%s '\format opcode\lower!
+			else
+				f\write '        '
+			for j = 1, 2
+				if opcode != '---' and opcode_to_links[opcode][j]
+					f\write '.$op-%s '\format opcode_to_links[opcode][j]\lower!
+				else
+					f\write '[ 0000 ] '
+			if i == 0
+				f\write '$disasm '
+			else
+				f\write '        '
+			if opcode != '---'
+				f\write '[ %s ]'\format opcode
+			else
+				f\write '[ ??? ]'
+			if i == 0
+				f\write ' $asm'
+			f\write '\n'
+
+type_byte = (size, has_subtree) ->
+	n1 = has_subtree and '8' or '0'
+	n2 = switch size
+		when '1'
+			'1'
+		when '2'
+			'3'
+		else
+			'0'
+	n1 .. n2
+
+globals = {}
+
+add_globals = (root, dag, key_to_label, key_to_contents, pad_before = '', pad_after = '') ->
+	for k in pairs dag
+		l = ''
+		if k == root
+			l ..= '@%s\n'\format key_to_label('root')\gsub '%s', ''
+		l ..= '@%s '\format key_to_label k
+		for j = 1, 2
+			if dag[k][j]
+				l ..= '.%s '\format key_to_label dag[k][j]
+			else
+				l ..= '%s[ 0000 ]%s '\format pad_before, pad_after
+		l ..= key_to_contents k
+		l ..= '\n'
+		globals[key_to_label(k)\gsub '%s', ''] = l
+	globals[key_to_label('root')\gsub '%s', ''] = ''
+
+do
+	root, dag = build_dag_from_chars '{}[]%@$;|=~,.^#"\0', '(', ')'
+	check_terminals dag, ')'
+-- 	dump io.stdout, root, dag
+	convert = {
+		['.']: 'dot'
+		['\0']: 'nul'
+	}
+	label_name = (s) -> 'first-char-%-3s'\format convert[s] or s
+	label_value = (k) -> '[ %02x ]'\format k\byte!
+	add_globals root, dag, label_name, label_value, '  ', '     '
+
+devices = {}
+
+add_device = (name, fields) ->
+	field_sizes = { k, size for k, size in fields\gmatch '(%S+) (%d+)' }
+	field_sizes.pad = nil
+	field_names = [ k for k in pairs field_sizes ]
+	table.sort field_names
+	root, dag = build_dag field_names
+	label_name = (k) -> 'l-%-14s'\format name .. '-' .. k
+	label_value = (k) -> '%-17s [ %s ] .%s.%s'\format '[ %s 00 ]'\format(k), type_byte(field_sizes[k], false), name, k
+	add_globals root, dag, label_name, label_value, ' ', '        '
+	table.insert devices, name
+
+add_devices = ->
+	table.sort devices
+	root, dag = build_dag devices
+	label_name = (k) -> 'l-%-14s'\format k
+	label_value = (k) -> '%-17s [ %s ] .%s .l-%s-root'\format '[ %s 00 ]'\format(k), type_byte(0, true), k, k
+	add_globals root, dag, label_name, label_value, ' ', '        '
+
+filename = 'projects/software/assembler.usm'
+
+f = assert io.open '%s.tmp'\format(filename), 'w'
+-- f = io.stdout
+state = 'normal'
+machine =
+	normal: (l) ->
+		if l\match '%$disasm .*%$asm'
+			write_opcode_tree f
+			state = 'opcode'
+		elseif l\match '^%@'
+			if l == '@RESET'
+				add_devices!
+			for k in l\gmatch '%@(%S+)'
+				if globals[k]
+					f\write globals[k]
+					globals[k] = nil
+					return
+			f\write l
+			f\write '\n'
+		else
+			if l\match '^%|%x%x%x%x %;'
+				add_device l\match '%;(%S+) %{ (.*) %}'
+			f\write l
+			f\write '\n'
+	opcode: (l) ->
+		if not l\match '%['
+			f\write l
+			f\write '\n'
+			state = 'normal'
+for l in assert io.lines filename
+	machine[state] l
+for _, l in pairs globals
+	f\write l
+f\close!
+assert 0 == os.execute 'mv %s %s.bak'\format filename, filename
+assert 0 == os.execute 'mv %s.tmp %s'\format filename, filename
+
diff --git a/projects/software/assembler.usm b/projects/software/assembler.usm
@@ -0,0 +1,465 @@
+;tree { search-key 2 max-key-len 1 }
+;assembler { pass 1 state 1 token 2 scope-len 1 scope 80 }
+
+%HCF { #0000 DIV }
+
+( devices )
+
+|0100 ;Console { pad 8 char 1 byte 1 short 2 string 2 }
+|0110 ;Screen { width 2 height 2 pad 4 x 2 y 2 color 1 }
+|0120 ;Sprite { pad 8 x 2 y 2 addr 2 color 1 }
+|0130 ;Controller { p1 1 }
+|0140 ;Keys { key 1 }
+|0150 ;Mouse { x 2 y 2 state 1 chord 1 }
+|0160 ;File { pad 8 name 2 length 2 load 2 save 2 }
+|01F0 ;System { pad 8 r 2 g 2 b 2 }
+
+( vectors )
+
+|0200 ,RESET JMP2
+|0204 BRK
+|0208 BRK
+
+@RESET
+	#b000 #c000 #0010 ,memcpy JSR2
+	HCF
+
+	,$token ,strlen JSR2
+	HCF
+
+	#00
+	$loop
+	DUP ,highest-bit JSR2
+	( )
+	POP
+	#01 ADD
+	DUP ^$loop JNZ
+	POP
+
+
+	,$token ^assemble-token JSR
+	,$token2 ^assemble-token JSR
+	,$token3 ^assemble-token JSR
+	~assembler.state
+	HCF
+
+	$token [ hello 00 ]
+	$token2 [ 00 ]
+	$token3 [ 00 ]
+
+@assemble-tokens ( string-ptr* -- )
+	DUP2 ^assemble-token JSR
+
+@assemble-token ( string-ptr* -- )
+	( get location of tree )
+	DUP2
+	,state-machine-pointers #00 ~assembler.state ,highest-bit JSR2 #0004 MUL2 ADD2
+	DUP2 STH2
+	( see if first char is recognised )
+	SWP2 #01 ,traverse-tree JSR2
+	^$not-found JNZ
+	( skip first character of token )
+	SWP2 #0001 ADD2 =assembler.token
+	( tail call handling function defined in tree )
+	POP2r JMP2
+
+	$not-found
+	( not interested in incoming-ptr )
+	POP2
+	=assembler.token
+	( tail call default handling function defined in state-machine-pointers )
+	LIT2r [ 0002 ] ADD2r LDR2r
+	JMP2r
+
+@parse-hex-length ( string-ptr* -- value 01 if one or two hex digits
+                                OR 00 otherwise )
+	DUP2 #0001 ADD2 PEK2 ^parse-hex-string-try-two JNZ
+	PEK2 ^parse-hex-digit JSR DUP #04 SFT ^parse-hex-string-fail1 JNZ
+	#01 JMP2r
+
+@parse-hex-string ( string-ptr* -- value* 02 if four hex digits
+                                OR value 01 if two hex digits
+                                OR 00 otherwise )
+	DUP2 #0004 ADD2 PEK2 #00 EQU ^$try-four JNZ
+	$try-two
+	DUP2 #0002 ADD2 PEK2 ^$fail2 JNZ
+	$known-two
+	DUP2 PEK2 ^parse-hex-digit JSR DUP #04 SFT ^$fail3 JNZ ROT ROT
+	#0001 ADD2 PEK2 ^parse-hex-digit JSR DUP #04 SFT ^$fail2 JNZ
+	SWP #40 SFT ORA #01 JMP2r
+
+	$fail3 POP
+	$fail2 POP
+	$fail1 POP #00 JMP2r
+
+	$try-four
+	DUP2 #0002 ADD2 ^$known-two JSR ^$maybe-four JNZ
+	^$try-two JMP
+
+	$maybe-four
+	ROT ROT ^$known-two JSR ^$four JNZ
+	^$fail1 JMP
+
+	$four
+	SWP #02 JMP2r
+
+@parse-hex-digit ( charcode -- 00-0f if valid hex
+                            -- 10-ff otherwise )
+	DUP #3a LTH ^$digit JNZ
+	DUP #60 GTH ^$lowercase JNZ
+	DUP #40 GTH ^$uppercase JNZ
+	JMP2r
+
+	$digit ( #30 is #00 )
+	#30 SUB JMP2r
+
+	$lowercase ( #61 is #0a )
+	#57 SUB JMP2r
+
+	$uppercase ( #41 is #0a )
+	#37 SUB JMP2r
+
+@find-opcode ( name* -- byte 00 if valid opcode name
+                     OR 01 if not found )
+	,opcodes-tree SWP2 #03 ^traverse-tree JSR
+	^$nomatch JNZ
+	,opcodes-asm SUB2 #0007 DIV2
+	SWP JMP2r
+
+	$nomatch
+	DUP2 EQU2 JMP2r
+
+@traverse-tree ( tree-ptr* search-key* max-key-len --
+		binary-ptr* 00 if key matched
+		OR incoming-ptr* 01 if key not found )
+	=tree.max-key-len =tree.search-key
+
+	$loop
+	DUP2 LDR2 #0000 NEQ2 ^$valid-node JNZ
+	#01 JMP2r
+
+	$valid-node
+	LDR2 DUP2 STH2 #0004 ADD2 ^strcmp-tree JSR
+	DUP ^$nomatch JNZ
+	POP2r JMP2r
+
+	$nomatch
+	#07 SFT #02 MUL #00 SWP
+	STH2r ADD2
+	^$loop JMP
+
+@strcmp-tree ( node-key* -- order if strings differ
+                         OR after-node-key* 00 if strings match )
+	~tree.search-key STH2
+	~tree.max-key-len
+
+	$loop ( node-key* key-len in wst, search-key* in rst )
+	DUP ^$keep-going JNZ
+
+	( exhausted key-len, match found )
+	POP2r
+	JMP2r
+
+	$keep-going
+	#01 OVR2 PEK2 DUP2r PEK2r STHr
+	DUP2 ORA ^$not-end JNZ
+
+	( end of C strings, match found )
+	POP2r POP ROT POP SWP ADD2 #00
+	JMP2r
+
+	$not-end
+	SUB DUP ^$nomatch JNZ
+	POP SUB
+	LIT2r [ 0001 ] ADD2r STH
+	LIT2  [ 0001 ] ADD2  STHr
+	^$loop JMP
+
+	$nomatch
+	STH POP2 POP2 STHr POP2r
+	JMP2r
+
+@memcpy ( src-ptr* dest-ptr* length* -- )
+	SWP2 STH2
+
+	$loop
+	DUP2 ORA ^$keep-going JNZ
+	POP2 POP2 POP2r
+	JMP2r
+
+	$keep-going
+	#0001 SUB2
+	SWP2 DUP2 PEK2 DUP2r STH2r POK2
+	#0001 ADD2 SWP2
+	LIT2r [ 0001 ] ADD2r
+	^$loop JMP
+
+@strlen ( string-ptr* -- length* )
+	DUP2 #0001 SUB2
+	$loop
+	#0001 ADD2
+	DUP2 PEK2 ^$loop JNZ
+	SWP2 SUB2
+	JMP2r
+
+
+
+
+@add-label ( string-ptr* label-flags -- )
+	( NYI )
+	POP POP2 JMP2r
+
+@highest-bit ( n -- 00 if n is 00
+                 OR 01 if n is 01
+                 OR 02 if n is 02..03
+                 OR 03 if n is 04..07
+                 OR 04 if n is 08..0f
+                 ..
+                 OR 08 if n is 80..ff )
+	DUP #00 NEQ JMP JMP2r
+	DUP #01 SFT ORA
+	DUP #02 SFT ORA
+	DUP #04 SFT ORA
+	#1d MUL #05 SFT #00 SWP ,$lookup ADD2 PEK2
+	JMP2r
+
+	$lookup
+	[ 01 06 02 07 05 04 03 08 ]
+
+@opcodes
+	(
+		The code for this section is automatically generated, and needs to be
+		regenerated when the opcode list in src/assembler.c is updated.
+
+		After editing src/assembler.c, run "lua etc/assembler-trees.lua"
+		and this file will be edited automatically.
+
+		This is the first example of a binary tree in this code, so let's
+		explore them in general. The format of a tree node in memory is:
+
+		left-node* right-node* node-key-cstring binary-data
+
+		and the general algorithm is to compare the key you're looking for
+		against node-key-cstring, and move to the node pointed to by left-node*
+		or right-node* if the keys don't match. If your key sorts earlier than
+		use left-node*, otherwise go to right-node*. When you find a node that
+		matches your key, traverse-bintree gives you a pointer to the
+		binary-data straight after the node-key-cstring. This data can contain
+		anything you want: fixed length fields, executable code... in this case
+		of this opcode tree, we store nothing. traverse-bintree is passed the
+		maximum length of node-key-cstring, not including the zero, so the zero
+		can be omitted if the string is at that maximum length.
+
+		If the key isn't present in the tree, you'll eventually get to a node
+		where the left-node* or right-node* pointer you'll need to follow is
+		null (0000). traverse-bintree will give you the location of that
+		pointer, so if you want to insert another node, you can write it to the
+		heap and overwrite the pointer with the new node's location. This
+		approach works even if the tree is completely empty and the pointer
+		you've provided to the root node is null, since that pointer gets
+		updated to point to the first node without needing any special logic.
+
+		The ordering of nodes in memory is totally arbitrary, so for pre-
+		prepared trees like this one we can have our own meaning for the order
+		of the nodes. By ordering the opcodes by their byte value, we can find
+		the byte by subtracting $asm from the binary-data pointer and dividing
+		by seven (the size of each node). By multiplying the byte value by seven
+		and adding to $disasm, we get the opcode name when disassembling too.
+	)
+	$tree   .$root
+	$op-brk .$op-add .$op-dup $disasm [ BRK ] $asm
+	$op-nop .$op-mul .$op-ovr         [ NOP ]
+	$op-lit [ 0000 ] [ 0000 ]         [ LIT ]
+	$op-pop [ 0000 ] [ 0000 ]         [ POP ]
+	$op-dup .$op-div .$op-eor         [ DUP ]
+	$op-swp [ 0000 ] [ 0000 ]         [ SWP ]
+	$op-ovr .$op-ora .$op-pek         [ OVR ]
+	$op-rot .$op-pop .$op-sft         [ ROT ]
+	$op-equ .$op-brk .$op-jnz         [ EQU ]
+	$op-neq [ 0000 ] [ 0000 ]         [ NEQ ]
+	$op-gth [ 0000 ] [ 0000 ]         [ GTH ]
+	$root   .$op-equ .$op-pok         [ LTH ]
+	$op-gts .$op-gth .$op-jmp         [ GTS ]
+	$op-lts [ 0000 ] [ 0000 ]         [ LTS ]
+	        [ 0000 ] [ 0000 ]         [ ??? ]
+	        [ 0000 ] [ 0000 ]         [ ??? ]
+	$op-pek [ 0000 ] [ 0000 ]         [ PEK ]
+	$op-pok .$op-nop .$op-sth         [ POK ]
+	$op-ldr .$op-jsr .$op-lit         [ LDR ]
+	$op-str [ 0000 ] [ 0000 ]         [ STR ]
+	$op-jmp [ 0000 ] [ 0000 ]         [ JMP ]
+	$op-jnz .$op-gts .$op-ldr         [ JNZ ]
+	$op-jsr [ 0000 ] [ 0000 ]         [ JSR ]
+	$op-sth .$op-rot .$op-sub         [ STH ]
+	$op-add [ 0000 ] .$op-and         [ ADD ]
+	$op-sub .$op-str .$op-swp         [ SUB ]
+	$op-mul .$op-lts .$op-neq         [ MUL ]
+	$op-div [ 0000 ] [ 0000 ]         [ DIV ]
+	$op-and [ 0000 ] [ 0000 ]         [ AND ]
+	$op-ora [ 0000 ] [ 0000 ]         [ ORA ]
+	$op-eor [ 0000 ] [ 0000 ]         [ EOR ]
+	$op-sft [ 0000 ] [ 0000 ]         [ SFT ]
+
+@state-machine-pointers
+( normal mode 00 )
+.first-char-root .nyi
+( FIXME 01 )
+.nyi .nyi
+( FIXME 02 )
+.nyi .nyi
+( FIXME 04 )
+.nyi .nyi
+( FIXME 08 )
+.nyi .nyi
+( FIXME 10 )
+.nyi .nyi
+( literal data 20 )
+[ 0000 ] .nyi
+( FIXME 40 )
+.nyi .nyi
+( comment 80 )
+.first-char-) .ignore
+
+(
+	Next up, we have the tree of code corresponding to each token's
+	first character. Here we do have a binary payload, which is
+	the code to run when the assembler considers the token.
+
+	Some special assembler modes have their own trees. Since comments
+	have a very simple tree that only understands the end of comments,
+	we reuse the terminal branch of the main tree as the root of
+	the comment tree.
+)
+
+(
+	Left and right parentheses start and end comment sections. They use the
+	highest bit in assembler state, so they receive highest priority: it
+	doesn't matter what other bits are set, a comment's a comment.
+)
+
+@first-char-(     [ 0000 ]      .first-char-)   [ 28 ]
+	~assembler.state #80 ORA =assembler.state
+JMP2r
+
+@first-char-)     [ 0000 ]        [ 0000 ]      [ 29 ]
+	~assembler.state #7f AND =assembler.state
+JMP2r
+
+(
+	Left and right square brackets start and end literal data sections.
+)
+
+@first-char-[   .first-char-@   .first-char-]   [ 5b ]
+	~assembler.state #20 ORA =assembler.state
+JMP2r
+
+@first-char-]     [ 0000 ]        [ 0000 ]      [ 5d ]
+	~assembler.state #df AND =assembler.state
+JMP2r
+
+(
+	Ampersands introduce global labels, and define the scope for any
+	local labels that follow.
+)
+
+@first-char-@     [ 0000 ]        [ 0000 ]      [ 40 ]
+	~assembler.pass ^$scope JNZ
+	DUP2 #00 ,add-label JSR2
+
+	$scope
+	DUP2 ,strlen JSR2
+	DUP2 =assembler.scope-len POP
+	,assembler.scope SWP2 JMP2
+
+@first-char-root
+@first-char-=   .first-char-$   .first-char-^   [ 3d ]
+@first-char-"   .first-char-nul .first-char-#   [ 22 ]
+@first-char-#     [ 0000 ]        [ 0000 ]      [ 23 ]
+@first-char-$   .first-char-"   .first-char-,   [ 24 ]
+@first-char-%     [ 0000 ]      .first-char-(   [ 25 ]
+@first-char-,   .first-char-%   .first-char-dot [ 2c ]
+@first-char-dot   [ 0000 ]      .first-char-;   [ 2e ]
+@first-char-;     [ 0000 ]        [ 0000 ]      [ 3b ]
+@first-char-^   .first-char-[   .first-char-|   [ 5e ]
+@first-char-{     [ 0000 ]        [ 0000 ]      [ 7b ]
+@first-char-|   .first-char-{   .first-char-}   [ 7c ]
+@first-char-}     [ 0000 ]      .first-char-~   [ 7d ]
+@first-char-~     [ 0000 ]        [ 0000 ]      [ 7e ]
+
+@first-char-nul   [ 0000 ]        [ 0000 ]      [ 00 ]
+@ignore
+JMP2r
+
+@nyi
+	,$string =Console.string
+	HCF
+
+	$string [ Not 20 implemented 0a 00 ]
+
+(
+	Here's the big set of trees relating to labels. Starting from l-root, all
+	the devices are stored here, perhaps some helper functions in the future,
+	too.
+
+	left-node* right-node* node-key-cstring binary-data
+
+	The node-keys are terminated with NUL since, unlike the opcodes and first
+	characters, the keys are variable length.
+
+	The binary-data is either three or five bytes long:
+		flags value* [ subtree-pointer* ]
+
+	The flags byte is divided up into bits:
+
+	bit 0: 01 means load or store helpers can be used,
+	bit 1: 02 means the helpers use STR/LDR, 00 means they use POK/PEK;
+	bits 2-6 are reserved; and
+	bit 7: 80 means there is a subtree.
+
+	If there is a subtree, it is searched when the reference contains a dot.
+)
+
+@l-Console         [ 0000 ]          [ 0000 ]         [ Console 00 ]    [ 80 ] .Console .l-Console-root
+@l-Console-byte    [ 0000 ]          [ 0000 ]         [ byte 00 ]       [ 01 ] .Console.byte
+@l-Console-root
+@l-Console-char   .l-Console-byte   .l-Console-short  [ char 00 ]       [ 01 ] .Console.char
+@l-Console-short   [ 0000 ]         .l-Console-string [ short 00 ]      [ 03 ] .Console.short
+@l-Console-string  [ 0000 ]          [ 0000 ]         [ string 00 ]     [ 03 ] .Console.string
+@l-Controller     .l-Console        .l-File           [ Controller 00 ] [ 80 ] .Controller .l-Controller-root
+@l-Controller-root
+@l-Controller-p1   [ 0000 ]          [ 0000 ]         [ p1 00 ]         [ 01 ] .Controller.p1
+@l-File            [ 0000 ]          [ 0000 ]         [ File 00 ]       [ 80 ] .File .l-File-root
+@l-File-length     [ 0000 ]          [ 0000 ]         [ length 00 ]     [ 03 ] .File.length
+@l-File-root
+@l-File-load      .l-File-length    .l-File-name      [ load 00 ]       [ 03 ] .File.load
+@l-File-name       [ 0000 ]         .l-File-save      [ name 00 ]       [ 03 ] .File.name
+@l-File-save       [ 0000 ]          [ 0000 ]         [ save 00 ]       [ 03 ] .File.save
+@l-root
+@l-Keys           .l-Controller     .l-Screen         [ Keys 00 ]       [ 80 ] .Keys .l-Keys-root
+@l-Keys-root
+@l-Keys-key        [ 0000 ]          [ 0000 ]         [ key 00 ]        [ 01 ] .Keys.key
+@l-Mouse           [ 0000 ]          [ 0000 ]         [ Mouse 00 ]      [ 80 ] .Mouse .l-Mouse-root
+@l-Mouse-chord     [ 0000 ]          [ 0000 ]         [ chord 00 ]      [ 01 ] .Mouse.chord
+@l-Mouse-root
+@l-Mouse-state    .l-Mouse-chord    .l-Mouse-x        [ state 00 ]      [ 01 ] .Mouse.state
+@l-Mouse-x         [ 0000 ]         .l-Mouse-y        [ x 00 ]          [ 03 ] .Mouse.x
+@l-Mouse-y         [ 0000 ]          [ 0000 ]         [ y 00 ]          [ 03 ] .Mouse.y
+@l-Screen         .l-Mouse          .l-Sprite         [ Screen 00 ]     [ 80 ] .Screen .l-Screen-root
+@l-Screen-color    [ 0000 ]         .l-Screen-height  [ color 00 ]      [ 01 ] .Screen.color
+@l-Screen-height   [ 0000 ]          [ 0000 ]         [ height 00 ]     [ 03 ] .Screen.height
+@l-Screen-root
+@l-Screen-width   .l-Screen-color   .l-Screen-x       [ width 00 ]      [ 03 ] .Screen.width
+@l-Screen-x        [ 0000 ]         .l-Screen-y       [ x 00 ]          [ 03 ] .Screen.x
+@l-Screen-y        [ 0000 ]          [ 0000 ]         [ y 00 ]          [ 03 ] .Screen.y
+@l-Sprite          [ 0000 ]         .l-System         [ Sprite 00 ]     [ 80 ] .Sprite .l-Sprite-root
+@l-Sprite-addr     [ 0000 ]          [ 0000 ]         [ addr 00 ]       [ 03 ] .Sprite.addr
+@l-Sprite-root
+@l-Sprite-color   .l-Sprite-addr    .l-Sprite-x       [ color 00 ]      [ 01 ] .Sprite.color
+@l-Sprite-x        [ 0000 ]         .l-Sprite-y       [ x 00 ]          [ 03 ] .Sprite.x
+@l-Sprite-y        [ 0000 ]          [ 0000 ]         [ y 00 ]          [ 03 ] .Sprite.y
+@l-System          [ 0000 ]          [ 0000 ]         [ System 00 ]     [ 80 ] .System .l-System-root
+@l-System-b        [ 0000 ]          [ 0000 ]         [ b 00 ]          [ 03 ] .System.b
+@l-System-root
+@l-System-g       .l-System-b       .l-System-r       [ g 00 ]          [ 03 ] .System.g
+@l-System-r        [ 0000 ]          [ 0000 ]         [ r 00 ]          [ 03 ] .System.r
+