commit 1a34fcefa9d967d35254ed22d057b3800e6c1672
parent 3496a38606ea497e6649059ade32d9d9eb39d528
Author: neauoire <aliceffekt@gmail.com>
Date: Mon, 7 Feb 2022 15:52:22 -0800
Added libraries for math32
Diffstat:
4 files changed, 815 insertions(+), 0 deletions(-)
diff --git a/projects/examples/demos/mandelbrot.tal b/projects/examples/demos/mandelbrot.tal
@@ -0,0 +1,137 @@
+( mandelbrot )
+
+%+ { ADD } %- { SUB } %* { MUL } %/ { DIV }
+%< { LTH } %> { GTH } %= { EQU } %! { NEQ }
+%++ { ADD2 } %-- { SUB2 } %** { MUL2 } %// { DIV2 }
+%<< { LTH2 } %>> { GTH2 } %== { EQU2 } %!! { NEQ2 }
+%AUTO-X { #01 .Screen/auto DEO }
+%NEXT-LINE { #0000 .Screen/x DEO2 .Screen/y DEI2k INC2 ROT DEO2 }
+
+%XMIN { #de69 } ( -8601 )
+%XMAX { #0b33 } ( 2867 )
+%YMIN { #ecc7 } ( -4915 )
+%YMAX { #1333 } ( 4915 )
+%MAXI { #20 } ( 32 )
+%DX { XMAX XMIN -- #004f // } ( (XMAX-XMIN)/79 )
+%DY { YMAX YMIN -- #0018 // } ( (YMAX-YMIN)/24 )
+%X { .x LDZ2 } %Y { .y LDZ2 }
+%X2 { .x2 LDZ2 } %Y2 { .y2 LDZ2 }
+
+%GTS2 { #8000 ++ SWP2 #8000 ++ << }
+
+%HALT { #010f DEO }
+%EMIT { #18 DEO }
+%PRINT { ;print-str JSR2 #0a EMIT }
+%DEBUG { ;print-hex/byte JSR2 #0a EMIT }
+%DEBUG2 { ;print-hex JSR2 #0a EMIT }
+
+|00 @System &vector $2 &wst $1 &rst $1 &eaddr $2 &ecode $1 &pad $1 &r $2 &g $2 &b $2 &debug $1 &halt $1
+|20 @Screen &vector $2 &width $2 &height $2 &auto $1 &pad $1 &x $2 &y $2 &addr $2 &pixel $1 &sprite $1
+
+|0000 ( zero-page )
+
+@x $2 @y $2
+@x2 $2 @y2 $2
+
+|0100 ( -> )
+
+ ( theme )
+ #048c .System/r DEO2
+ #048c .System/g DEO2
+ #048c .System/b DEO2
+
+ #0280 .Screen/width DEO2 ( 640 )
+ #01e0 .Screen/height DEO2 ( 480 )
+
+ #0000 .Screen/x DEO2
+ #0000 .Screen/y DEO2
+
+ AUTO-X
+ ;draw-mandel JSR2
+
+BRK
+
+@draw-mandel ( -- )
+
+ YMAX YMIN
+ &ver
+ DUP2 ,&y STR2
+ XMAX XMIN
+ &hor
+ DUP2 ,&x STR2
+ #0000 DUP2 DUP2 DUP2 .x STZ2 .y STZ2 .x2 STZ2 .y2 STZ2
+ MAXI #00
+ &loop
+ X Y ;smul2 JSR2 #0b SFT2 [ LIT2 &y $2 ] ++ .y STZ2
+ X2 Y2 -- [ LIT2 &x $2 ] ++ .x STZ2
+ X X ;smul2 JSR2 #0c SFT2 .x2 STZ2
+ Y Y ;smul2 JSR2 #0c SFT2 .y2 STZ2
+ X2 Y2 ++ >> #4000 ,&end JCN
+ INC GTHk ,&loop JCN
+ &end
+ NIP POP #03 .Screen/pixel DEO
+ DX ++ OVR2 OVR2 GTS2 ;&hor JCN2
+ POP2 POP2
+ NEXT-LINE
+ DY ++ OVR2 OVR2 GTS2 ;&ver JCN2
+ POP2 POP2
+
+JMP2r
+
+@print-hex ( value* -- )
+
+ SWP ,&byte JSR
+ &byte ( byte -- )
+ STHk #04 SFT ,&parse JSR #18 DEO
+ STHr #0f AND ,&parse JSR #18 DEO
+ JMP2r
+ &parse ( byte -- char ) DUP #09 GTH ,&above JCN #30 ADD JMP2r
+ &above #57 ADD JMP2r
+
+JMP2r
+
+@smul2 ( a* b* -- c* )
+
+ OVR2 POP #80 AND #07 SFT STH
+ OVR #80 AND #07 SFT STHr ADD #01 AND ,&sign STR
+ #10 SFT2 #01 SFT2
+ SWP2
+ #10 SFT2 #01 SFT2
+ MUL2
+ ,&sign LDR ,&flip JCN
+ JMP2r
+ &flip
+ #0000 SWP2 --
+
+JMP2r
+ &sign $1
+
+@sprites
+ 0000 0000 0000 0000 0000 0000 0000 0000
+ 0000 0018 1800 0000 0000 0000 0000 0000
+ 0000 183c 3c18 0000 0000 0000 0000 0000
+ 0018 3c7e 7e3c 1800 0000 0000 0000 0000
+ 183c 7eff ff7e 3c18 0000 0000 0000 0000
+ 3c7e ffff ffff 7e3c 0000 0000 0000 0000
+ 7eff ffff ffff ff7e 0000 0000 0000 0000
+ ffff ffff ffff ffff 0000 0000 0000 0000
+ ffff ffe7 e7ff ffff 0000 0018 1800 0000
+ ffff e7c3 c3e7 ffff 0000 183c 3c18 0000
+ ffe7 c381 81c3 e7ff 0018 3c7e 7e3c 1800
+ e7c3 8100 0081 c3e7 183c 7eff ff7e 3c18
+ c381 0000 0000 81c3 3c7e ffff ffff 7e3c
+ 8100 0000 0000 0081 7eff ffff ffff ff7e
+ 0000 0000 0000 0000 ffff ffff ffff ffff
+ 0000 0018 1800 0000 ffff ffff ffff ffff
+ 0000 183c 3c18 0000 ffff ffff ffff ffff
+ 0018 3c7e 7e3c 1800 ffff ffff ffff ffff
+ 183c 7eff ff7e 3c18 ffff ffff ffff ffff
+ 3c7e ffff ffff 7e3c ffff ffff ffff ffff
+ 7eff ffff ffff ff7e ffff ffff ffff ffff
+ ffff ffff ffff ffff ffff ffff ffff ffff
+ ffff ffe7 e7ff ffff ffff ffe7 e7ff ffff
+ ffff e7c3 c3e7 ffff ffff e7c3 c3e7 ffff
+ ffe7 c381 81c3 e7ff ffe7 c381 81c3 e7ff
+ e7c3 8100 0081 c3e7 e7c3 8100 0081 c3e7
+ c381 0000 0000 81c3 c381 0000 0000 81c3
+ 8100 0000 0000 0081 8100 0000 0000 0081
diff --git a/projects/library/helpers.tal b/projects/library/helpers.tal
@@ -0,0 +1,243 @@
+%BYE { #01 .System/halt DEO BRK }
+%DEBUG { #ab .System/debug DEO }
+%IN-RANGE { ROT INCk SWP SUB2 GTH }
+%MOD { DIVk MUL SUB }
+%MOD2 { DIV2k MUL2 SUB2 }
+%NL { #0a .Console/write DEO }
+%SP { #20 .Console/write DEO }
+
+@print-string ( string* -- )
+ LDAk ,¬-end JCN
+ POP2 JMP2r
+ ¬-end
+ LDAk .Console/write DEO
+ INC2
+ ,print-string JMP
+
+@print-short-decimal ( short* -- )
+ #03e8 DIV2k
+ DUP ,print-byte-decimal/second JSR
+ MUL2 SUB2
+ #0064 DIV2k
+ DUP ,print-byte-decimal/third JSR
+ MUL2 SUB2
+ NIP ,print-byte-decimal/second JMP
+
+@print-byte-decimal ( byte -- )
+ #64 DIVk DUP #30 ADD .Console/write DEO MUL SUB
+ &second
+ #0a DIVk DUP #30 ADD .Console/write DEO MUL SUB
+ &third
+ #30 ADD .Console/write DEO
+ JMP2r
+
+@print-32z-hex ( 32-zp -- )
+ #00 SWP
+ ,print-32-hex JMP
+
+@print-64z-hex ( 64-zp -- )
+ #00 SWP
+ ( fall through )
+
+@print-64-hex ( 64-ptr* -- )
+ DUP2 #0004 ADD2 SWP2 ( lo32-ptr* hi32-ptr* )
+ ,print-32-hex JSR
+ ( fall through )
+
+@print-32-hex ( 32-ptr* -- )
+ INC2k INC2 SWP2 ( lo-ptr* hi-ptr* )
+ LDA2 ,print-short-hex JSR
+ LDA2 ( fall through )
+
+@print-short-hex ( short* -- )
+ SWP ,print-byte-hex JSR
+ ( fall through )
+
+@print-byte-hex ( byte -- )
+ DUP #04 SFT ,print-nibble-hex JSR
+ #0f AND ( fall through )
+
+@print-nibble-hex ( nibble -- )
+ #30 ADD DUP #39 GTH #07 MUL ADD .Console/write DEO
+ JMP2r
+
+@next-input-byte ( -- number 00
+ OR 01 at end of file )
+ ,next-input-short JSR ,&eof JCN
+ NIP #00
+ JMP2r
+
+ &eof
+ #01
+ JMP2r
+
+@next-input-short ( -- number* 00
+ OR 01 at end of file )
+ LIT2 &ptr :heap
+ LIT2r 0000
+ &ffwd
+ LDAk #3039 IN-RANGE ,&number JCN
+ INC2k SWP2 LDA ,&ffwd JCN
+ ( eof )
+ POP2 POP2r
+ ;heap ,&ptr STR2
+ #01 JMP2r
+
+ &number
+ LIT2r 000a MUL2r
+ LDAk #30 SUB #00 STH STH ADD2r
+ INC2
+ LDAk #3039 IN-RANGE ,&number JCN
+
+ ,&ptr STR2
+ STH2r #00
+ JMP2r
+
+@add64 ( dest-ptr* src-ptr* -- carry )
+ OVR2 #0004 ADD2 OVR2 #0004 ADD2
+ ,add32 JSR
+ ( fall through )
+
+@adc32 ( dest-ptr* src-ptr* carry -- carry )
+ STH
+ OVR2 #0002 ADD2 OVR2 #0002 ADD2
+ STHr ,adc16 JSR
+ ,adc16 JMP ( tail call )
+
+@add64z ( dest-zp src-zp -- carry )
+ OVR #04 ADD OVR #04 ADD
+ ,add32z JSR
+ ( fall through )
+
+@adc32z ( dest-zp src-zp carry -- carry )
+ STH
+ OVR #02 ADD OVR #02 ADD
+ STHr ,adc16z JSR
+ ,adc16z JMP ( tail call )
+
+@add32z-short ( dest-zp src* -- carry )
+ #00 SWP SWP2 ROT
+ ( fall through )
+
+@add32-short ( dest-ptr* src* -- carry )
+ ,&short STR2
+ ;&src ,add32 JMP ( tail call )
+
+ &src 0000 &short 0000
+
+@add32 ( dest-ptr* src-ptr* -- carry )
+ OVR2 #0002 ADD2 OVR2 #0002 ADD2
+ ,add16 JSR
+ ( fall through )
+
+@adc16 ( dest-ptr* src-ptr* carry -- carry )
+ #00 EQU ,add16 JCN
+ OVR2 ;&one ,add16 JSR STH
+ ,add16 JSR
+ STHr ORA
+ JMP2r
+
+ &one 0001
+
+@add16 ( dest-ptr* src-ptr* -- carry )
+ OVR2 LDA2 DUP2 ROT2 LDA2 ( dest-ptr* dest* dest* src* )
+ ADD2 GTH2k STH NIP2 ( dest-ptr* sum* / carry )
+ SWP2 STA2 STHr ( carry )
+ JMP2r
+
+@add32z ( dest-zp src-zp -- carry )
+ OVR #02 ADD OVR #02 ADD
+ ,add16z JSR
+ ( fall through )
+
+@adc16z ( dest-zp src-zp carry -- carry )
+ #00 EQU ,add16z JCN
+ OVR #00 SWP ;adc16/one ,add16 JSR STH
+ ,add16z JSR
+ STHr ORA
+ JMP2r
+
+@add16z ( dest-zp src-zp -- carry )
+ OVR LDZ2 ROT LDZ2 OVR2 ( dest-zp dest* src* dest* )
+ ADD2 GTH2k STH NIP2 ( dest-zp sum* / carry )
+ ROT STZ2 STHr ( carry )
+ JMP2r
+
+@gth64 ( left-ptr* right-ptr* -- 01 if left > right
+ OR 00 otherwise )
+ OVR2 OVR2 ,gth32 JSR ,&greater JCN
+ OVR2 OVR2 SWP2 ,gth32 JSR ,&less JCN
+ #0004 ADD2 SWP2 #0004 ADD2 SWP2 ,gth32 JMP ( tail call )
+
+ &greater POP2 POP2 #01 JMP2r
+ &less POP2 POP2 #00 JMP2r
+
+@gth32z ( left-zp* right-zp* -- 01 if left > right
+ OR 00 otherwise )
+ #00 ROT ROT #00 SWP
+ ( fall through )
+
+@gth32 ( left-ptr* right-ptr* -- 01 if left > right
+ OR 00 otherwise )
+ OVR2 LDA2 OVR2 LDA2 ( left-ptr* right-ptr* left* right* )
+ EQU2k ,&lo JCN
+ GTH2 NIP2 NIP NIP
+ JMP2r
+
+ &lo
+ POP2 POP2
+ INC2 INC2 LDA2 SWP2 INC2 INC2 LDA2 ( right-lo* left-lo* )
+ LTH2
+ JMP2r
+
+@add32z-short-short-mul ( dest-zp a* b* -- carry )
+ STH2 STH2 #00 SWP STH2r STH2r
+ ( fall through )
+
+@add32-short-short-mul ( dest-ptr* a* b* -- carry )
+ LITr 00 STH LITr 00 STH ( dest-ptr* a* / blo* bhi* )
+ #00 ROT ROT #00 SWP ( dest-ptr* ahi* alo* / blo* bhi* )
+ STH2kr OVR2 MUL2 ,&alo-bhi STR2
+ OVR2 STH2r MUL2 ,&ahi-bhi STR2 ( dest-ptr ahi* alo* / blo* )
+ STH2kr MUL2 ,&alo-blo STR2 ( dest-ptr* ahi* / blo* )
+ STH2r MUL2 ,&ahi-blo STR2 ( dest-ptr* )
+ DUP2 ;&sum1 ;add32 JSR2 STH
+ DUP2 ;&sum2 ;add32 JSR2 STH
+ ;&sum3 ;add32 JSR2
+ STH2r ORA ORA
+ JMP2r
+
+ &sum1 &ahi-bhi 0000 &alo-blo 0000
+ &sum2 00 &ahi-blo 0000 00
+ &sum3 00 &alo-bhi 0000 00
+
+@zero64 ( ptr* -- )
+ #08 ,zero JMP ( tail call )
+
+@zero32z ( zp -- )
+ #00 SWP
+ ( fall through )
+
+@zero32 ( ptr* -- )
+ #04
+ ( fall through )
+
+@zero ( ptr* len -- )
+ #00 SWP ADD2k NIP2 SWP2
+ &loop
+ DUP2 #00 ROT ROT STA
+ INC2
+ GTH2k ,&loop JCN
+ POP2 POP2
+ JMP2r
+
+@is-nonzero64 ( ptr* -- flag )
+ DUP2 ,is-nonzero32 JSR STH
+ #0004 ADD2 ,is-nonzero32 JSR STHr ORA
+ JMP2r
+
+@is-nonzero32 ( ptr* -- flag )
+ LDA2k ORA STH
+ INC2 INC2 LDA2 ORA STHr ORA
+ JMP2r
+
diff --git a/projects/library/math32.tal b/projects/library/math32.tal
@@ -0,0 +1,435 @@
+( math32.tal )
+( )
+( This library supports arithmetic on 32-bit unsigned integers, )
+( also known as long values. )
+( )
+( 32-bit long values are represented by two 16-bit short values: )
+( )
+( decimal hexadecimal uxn literals )
+( 0 0x00000000 #0000 #0000 )
+( 1 0x00000001 #0000 #0001 )
+( 4660 0x00001234 #0000 #1234 )
+( 65535 0x0000ffff #0000 #ffff )
+( 65536 0x00010000 #0001 #0000 )
+( 16777215 0x00ffffff #00ff #ffff )
+( 4294967295 0xffffffff #ffff #ffff )
+( )
+( The most significant 16-bit, the "high bits", are stored first. )
+( We document long values as x** -- equivalent to xhi* xlo*. )
+( )
+( Operations supported: )
+( )
+( NAME STACK EFFECT DEFINITION )
+( add32 x** y** -> z** x + y )
+( sub32 x** y** -> z** x - y )
+( mul16 x* y* -> z** x * y )
+( mul32 x** y** -> z** x * y )
+( div32 x** y** -> q** x / y )
+( mod32 x** y** -> r** x % y )
+( divmod32 x** y** -> q** r** x / y, x % y )
+( gcd32 x** y** -> z** gcd(x, y) )
+( negate32 x** -> z** -x )
+( lshift32 x** n^ -> z** x<<n )
+( rshift32 x** n^ -> z** x>>n )
+( and32 x** y** -> z** x & y )
+( or32 x** y** -> z** x | y )
+( xor32 x** y** -> z** x ^ y )
+( complement32 x** -> z** ~x )
+( eq32 x** y** -> bool^ x == y )
+( ne32 x** y** -> bool^ x != y )
+( is-zero32 x** -> bool^ x == 0 )
+( non-zero32 x** -> bool^ x != 0 )
+( lt32 x** y** -> bool^ x < y )
+( gt32 x** y** -> bool^ x > y )
+( lteq32 x** y** -> bool^ x <= y )
+( gteq32 x** y** -> bool^ x >= y )
+( bitcount8 x^ -> bool^ floor(log2(x))+1 )
+( bitcount16 x* -> bool^ floor(log2(x))+1 )
+( bitcount32 x** -> bool^ floor(log2(x))+1 )
+( )
+( In addition to the code this file uses 44 bytes of registers )
+( to store temporary state: )
+( )
+( - shared memory, 16 bytes )
+( - mul32 memory, 12 bytes )
+( - _divmod32 memory, 16 bytes )
+
+%DEBUG { #ff #0e DEO }
+%RTN { JMP2r }
+%TOR { ROT ROT } ( a b c -> c a b )
+%COMPLEMENT32 { SWP2 #ffff EOR2 SWP2 #ffff EOR2 }
+%DUP4 { OVR2 OVR2 }
+%POP4 { POP2 POP2 }
+
+( bitcount: number of bits needed to represent number )
+( equivalent to floor[log2[x]] + 1 )
+
+@bitcount8 ( x^ -> n^ )
+ #00 SWP ( n x )
+ &loop
+ DUP #00 EQU ( n x x=0 )
+ ,&done JCN ( n x )
+ #01 SFT ( n x>>1 )
+ SWP INC SWP ( n+1 x>>1 )
+ ,&loop JMP
+ &done
+ POP ( n )
+ RTN
+
+@bitcount16 ( x* -> n^ )
+ SWP ( xlo xhi )
+ ;bitcount8 JSR2 ( xlo nhi )
+ DUP #00 NEQ ( xlo nhi nhi!=0 )
+ ,&hi-set JCN ( xlo nhi )
+ SWP ;bitcount8 JSR2 ADD ( nhi+nlo )
+ RTN
+ &hi-set
+ SWP POP #08 ADD ( nhi+8 )
+ RTN
+
+@bitcount32 ( x** -> n^ )
+ SWP2 ( xlo* xhi* )
+ ;bitcount16 JSR2 ( xlo* nhi )
+ DUP #00 NEQ ( xlo* nhi nhi!=0 )
+ ,&hi-set JCN ( xlo* nhi )
+ TOR ;bitcount16 JSR2 ADD RTN ( nhi+nlo )
+ &hi-set
+ TOR POP2 #10 ADD ( nhi+16 )
+ RTN
+
+( equality )
+
+( x == y )
+@eq32 ( xhi* xlo* yhi* ylo* -> bool^ )
+ ROT2 EQU2 STH
+ EQU2 STHr AND RTN
+
+( x != y )
+@ne32 ( xhi* xlo* yhi* ylo* -> bool^ )
+ ROT2 NEQ2 STH
+ NEQ2 STHr ORA RTN
+
+( x == 0 )
+@is-zero32 ( x** -> bool^ )
+ ORA2 #0000 EQU2 RTN
+
+( x != 0 )
+@non-zero32 ( x** -> bool^ )
+ ORA2 #0000 NEQ2 RTN
+
+( comparisons )
+
+( x < y )
+@lt32 ( x** y** -> bool^ )
+ ROT2 SWP2 ( xhi yhi xlo ylo )
+ LTH2 ,<-lo JCN ( xhi yhi )
+ LTH2 RTN
+ <-lo
+ GTH2 #00 EQU RTN
+
+( x <= y )
+@lteq32 ( x** y** -> bool^ )
+ ROT2 SWP2 ( xhi yhi xlo ylo )
+ GTH2 ,>-lo JCN ( xhi yhi )
+ GTH2 #00 EQU RTN
+ >-lo
+ LTH2 RTN
+
+( x > y )
+@gt32 ( x** y** -> bool^ )
+ ROT2 SWP2 ( xhi yhi xlo ylo )
+ GTH2 ,>-lo JCN ( xhi yhi )
+ GTH2 RTN
+ >-lo
+ LTH2 #00 EQU RTN
+
+( x > y )
+@gteq32 ( x** y** -> bool^ )
+ ROT2 SWP2 ( xhi yhi xlo ylo )
+ LTH2 ,<-lo JCN ( xhi yhi )
+ LTH2 #00 EQU RTN
+ <-lo
+ GTH2 RTN
+
+( bitwise operations )
+
+( x & y )
+@and32 ( xhi* xlo* yhi* ylo* -> xhi|yhi* xlo|ylo* )
+ ROT2 AND2 STH2 AND2 STH2r RTN
+
+( x | y )
+@or32 ( xhi* xlo* yhi* ylo* -> xhi|yhi* xlo|ylo* )
+ ROT2 ORA2 STH2 ORA2 STH2r RTN
+
+( x ^ y )
+@xor32 ( xhi* xlo* yhi* ylo* -> xhi|yhi* xlo|ylo* )
+ ROT2 EOR2 STH2 EOR2 STH2r RTN
+
+( ~x )
+@complement32 ( x** -> ~x** )
+ COMPLEMENT32 RTN
+
+( temporary registers )
+( shared by most operations, except mul32 and div32 )
+[ @x0 $1 @x1 $1 @x2 $1 @x3 $1
+ @y0 $1 @y1 $1 @y2 $1 @y3 $1
+ @z0 $1 @z1 $1 @z2 $1 @z3 $1
+ @w0 $1 @w1 $1 @w2 $2 ]
+
+( bit shifting )
+
+( x >> n )
+@rshift32 ( x** n^ -> x<<n )
+ DUP #08 LTH ;rshift32-0 JCN2 ( x n )
+ DUP #10 LTH ;rshift32-1 JCN2 ( x n )
+ DUP #18 LTH ;rshift32-2 JCN2 ( x n )
+ ;rshift32-3 JMP2 ( x n )
+ RTN
+
+( shift right by 0-7 bits )
+@rshift32-0 ( x** n^ -> x<<n )
+ STHk SFT ;z3 STA ( write z3 )
+ #00 STHkr SFT2 #00 ;z3 LDA ORA2 ;z2 STA2 ( write z2,z3 )
+ #00 STHkr SFT2 #00 ;z2 LDA ORA2 ;z1 STA2 ( write z1,z2 )
+ #00 STHr SFT2 #00 ;z1 LDA ORA2 ( compute z0,z1 )
+ ;z2 LDA2
+ RTN
+
+( shift right by 8-15 bits )
+@rshift32-1 ( x** n^ -> x<<n )
+ #08 SUB STH POP
+ STHkr SFT ;z3 STA ( write z3 )
+ #00 STHkr SFT2 #00 ;z3 LDA ORA2 ;z2 STA2 ( write z2,z3 )
+ #00 STHr SFT2 #00 ;z2 LDA ORA2 ( compute z1,z2 )
+ #00 TOR ;z3 LDA
+ RTN
+
+( shift right by 16-23 bits )
+@rshift32-2 ( x** n^ -> x<<n )
+ #10 SUB STH POP2
+ STHkr SFT ;z3 STA ( write z3 )
+ #00 STHr SFT2 #00 ;z3 LDA ORA2 ( compute z2,z3 )
+ #0000 SWP2
+ RTN
+
+( shift right by 16-23 bits )
+@rshift32-3 ( x** n^ -> x<<n )
+ #18 SUB STH POP2 POP ( x0 )
+ #00 SWP #0000 SWP2 ( 00 00 00 x0 )
+ STHr SFT
+ RTN
+
+( x << n )
+@lshift32 ( x** n^ -> x<<n )
+ DUP #08 LTH ;lshift32-0 JCN2 ( x n )
+ DUP #10 LTH ;lshift32-1 JCN2 ( x n )
+ DUP #18 LTH ;lshift32-2 JCN2 ( x n )
+ ;lshift32-3 JMP2 ( x n )
+ RTN
+
+( shift left by 0-7 bits )
+@lshift32-0 ( x** n^ -> x<<n )
+ #40 SFT STH ( stash n<<4 )
+ #00 SWP STHkr SFT2 ;z2 STA2 ( store z2,z3 )
+ #00 SWP STHkr SFT2 #00 ;z2 LDA ORA2 ;z1 STA2 ( store z1,z2 )
+ #00 SWP STHkr SFT2 #00 ;z1 LDA ORA2 ;z0 STA2 ( store z0,z1 )
+ STHr SFT ;z0 LDA ORA ( calculate z0 )
+ ;z1 LDA ;z2 LDA2
+ RTN
+
+( shift left by 8-15 bits )
+@lshift32-1 ( x** n^ -> x<<n )
+ #08 SUB #40 SFT STH ( stash [n-8]<<4 )
+ #00 SWP STHkr SFT2 ;z1 STA2 ( store z1,z2 )
+ #00 SWP STHkr SFT2 #00 ;z1 LDA ORA2 ;z0 STA2 ( store z0,z1 )
+ STHr SFT ;z0 LDA ORA ( calculate z0 )
+ SWP POP ( x0 unused )
+ ;z1 LDA2 #00
+ RTN
+
+( shift left by 16-23 bits )
+@lshift32-2 ( x** n^ -> x<<n )
+ #10 SUB #40 SFT STH ( stash [n-16]<<4 )
+ #00 SWP STHkr SFT2 ;z0 STA2 ( store z0,z1 )
+ STHr SFT ;z0 LDA ORA ( calculate z0 )
+ STH POP2 STHr
+ ;z1 LDA #0000
+ RTN
+
+( shift left by 24-31 bits )
+@lshift32-3 ( x** n^ -> x<<n )
+ #18 SUB #40 SFT ( x0 x1 x2 x3 r=[n-24]<<4 )
+ SFT ( x0 x1 x2 x3<<r )
+ SWP2 POP2 SWP POP #0000 #00
+ RTN
+
+( arithmetic )
+
+( x + y )
+@add32 ( xhi* xlo* yhi* ylo* -> zhi* zlo* )
+ ;y2 STA2 ;y0 STA2 ( save ylo, yhi )
+ ;x2 STA2 ;x0 STA2 ( save xlo, xhi )
+ #0000 #0000 ;z0 STA2 ;z2 STA2 ( reset zhi, zlo )
+
+ ( x3 + y3 => z2z3 )
+ #00 ;x3 LDA #00 ;y3 LDA ADD2 ;z2 STA2
+
+ ( x2 + y2 + z2 => z1z2 )
+ #00 ;x2 LDA ;z1 LDA2 ADD2 ;z1 STA2
+ #00 ;y2 LDA ;z1 LDA2 ADD2 ;z1 STA2
+
+ ( x1 + y1 + z1 => z0z1 )
+ #00 ;x1 LDA ;z0 LDA2 ADD2 ;z0 STA2
+ #00 ;y1 LDA ;z0 LDA2 ADD2 ;z0 STA2
+
+ ( x0 + y0 + z0 => z0 )
+ ;x0 LDA ;z0 LDA ADD ;z0 STA
+ ;y0 LDA ;z0 LDA ADD ;z0 STA
+
+ ( load zhi,zlo )
+ ;z0 LDA2 ;z2 LDA2
+ RTN
+
+( -x )
+@negate32 ( x** -> -x** )
+ COMPLEMENT32
+ INC2 ( ~xhi -xlo )
+ DUP2 #0000 NEQ2 ( ~xhi -xlo non-zero? )
+ ,&done JCN ( xlo non-zero => don't inc hi )
+ SWP2 INC2 SWP2 ( -xhi -xlo )
+ &done
+ RTN
+
+( x - y )
+@sub32 ( x** y** -> z** )
+ ;negate32 JSR2 ;add32 JSR2 RTN
+
+( 16-bit multiplication )
+@mul16 ( x* y* -> z** )
+ ;y1 STA ;y0 STA ( save ylo, yhi )
+ ;x1 STA ;x0 STA ( save xlo, xhi )
+ #0000 #00 ;z1 STA2 ;z3 STA ( reset z1,z2,z3 )
+ #0000 #00 ;w0 STA2 ;w2 STA ( reset w0,w1,w2 )
+
+ ( x1 * y1 => z1z2 )
+ #00 ;x1 LDA #00 ;y1 LDA MUL2 ;z2 STA2
+
+ ( x0 * y1 => z0z1 )
+ #00 ;x0 LDA #00 ;y1 LDA MUL2 ;z1 LDA2 ADD2 ;z1 STA2
+
+ ( x1 * y0 => w1w2 )
+ #00 ;x1 LDA #00 ;y0 LDA MUL2 ;w1 STA2
+
+ ( x0 * y0 => w0w1 )
+ #00 ;x0 LDA #00 ;y0 LDA MUL2 ;w0 LDA2 ADD2 ;w0 STA2
+
+ ( add z and a<<8 )
+ #00 ;z1 LDA2 ;z3 LDA
+ ;w0 LDA2 ;w2 LDA #00
+ ;add32 JSR2
+ RTN
+
+( x * y )
+@mul32 ( x** y** -> z** )
+ ,&y1 STR2 ,&y0 STR2 ( save ylo, yhi )
+ ,&x1 STR2 ,&x0 STR2 ( save xlo, xhi )
+ ,&y1 LDR2 ,&x1 LDR2 ;mul16 JSR2 ( [x1*y1] )
+ ,&z1 STR2 ,&z0 STR2 ( sum = x1*y1, save zlo, zhi )
+ ,&y1 LDR2 ,&x0 LDR2 MUL2 ( [x0*y1]<<16 )
+ ,&y0 LDR2 ,&x1 LDR2 MUL2 ( [x1*y0]<<16 )
+ ( [x0*y0]<<32 will completely overflow )
+ ADD2 ,&z0 LDR2 ADD2 ( sum += x0*y1<<16 + x1*y0<<16 )
+ ,&z1 LDR2
+ RTN
+[ &x0 $2 &x1 $2
+ &y0 $2 &y1 $2
+ &z0 $2 &z1 $2 ]
+
+@div32 ( x** y** -> q** )
+ ;_divmod32 JSR2
+ ;_divmod32/quo0 LDA2 ;_divmod32/quo1 LDA2
+ RTN
+
+@mod32 ( x** y** -> r** )
+ ;_divmod32 JSR2
+ ;_divmod32/rem0 LDA2 ;_divmod32/rem1 LDA2
+ RTN
+
+@divmod32 ( x** y** -> q** r** )
+ ;_divmod32 JSR2
+ ;_divmod32/quo0 LDA2 ;_divmod32/quo1 LDA2
+ ;_divmod32/rem0 LDA2 ;_divmod32/rem1 LDA2
+ RTN
+
+( calculate and store x / y and x % y )
+@_divmod32 ( x** y** -> )
+ ( store y and x for repeated use )
+ ,&div1 STR2 ,&div0 STR2 ( y -> div )
+ ,&rem1 STR2 ,&rem0 STR2 ( x -> rem )
+
+ ( if x < y then the answer is 0 )
+ ,&rem0 LDR2 ,&rem1 LDR2
+ ,&div0 LDR2 ,&div1 LDR2
+ ;lt32 JSR2 ,&is-zero JCN ,¬-zero JMP
+ &is-zero
+ #0000 ,&quo0 STR2 #0000 ,&quo1 STR2 RTN
+
+ ( x >= y so the answer is >= 1 )
+ ¬-zero
+ #0000 ,&quo0 STR2 #0000 ,&quo1 STR2 ( 0 -> quo )
+
+ ( bitcount[x] - bitcount[y] determines the largest multiple of y to try )
+ ,&rem0 LDR2 ,&rem1 LDR2 ;bitcount32 JSR2 ( rbits^ )
+ ,&div0 LDR2 ,&div1 LDR2 ;bitcount32 JSR2 ( rbits^ dbits^ )
+ SUB ( shift=rbits-dits )
+ #00 DUP2 ( shift 0 shift 0 )
+
+ ( 1<<shift -> cur )
+ #0000 #0001 ROT2 POP
+ ;lshift32 JSR2 ,&cur1 STR2 ,&cur0 STR2
+
+ ( div<<shift -> div )
+ ,&div0 LDR2 ,&div1 LDR2 ROT2 POP
+ ;lshift32 JSR2 ,&div1 STR2 ,&div0 STR2
+
+ ,&loop JMP
+
+ [ &div0 $2 &div1 $2
+ &rem0 $2 &rem1 $2
+ &quo0 $2 &quo1 $2
+ &cur0 $2 &cur1 $2 ]
+
+ &loop
+ ( if rem >= the current divisor, we can subtract it and add to quotient )
+ ,&rem0 LDR2 ,&rem1 LDR2 ,&div0 LDR2 ,&div1 LDR2 ;lt32 JSR2 ( is rem < div? )
+ ,&rem-lt JCN ( if rem < div skip this iteration )
+
+ ( since rem >= div, we have found a multiple of y that divides x )
+ ,&rem0 LDR2 ,&rem1 LDR2 ,&div0 LDR2 ,&div1 LDR2 ;sub32 JSR2 ,&rem1 STR2 ,&rem0 STR2 ( rem -= div )
+ ,&quo0 LDR2 ,&quo1 LDR2 ,&cur0 LDR2 ,&cur1 LDR2 ;add32 JSR2 ,&quo1 STR2 ,&quo0 STR2 ( quo += cur )
+
+ &rem-lt
+ ,&div0 LDR2 ,&div1 LDR2 #01 ;rshift32 JSR2 ,&div1 STR2 ,&div0 STR2 ( div >>= 1 )
+ ,&cur0 LDR2 ,&cur1 LDR2 #01 ;rshift32 JSR2 ,&cur1 STR2 ,&cur0 STR2 ( cur >>= 1 )
+ ,&cur0 LDR2 ,&cur1 LDR2 ;non-zero32 JSR2 ,&loop JCN ( if cur>0, loop. else we're done )
+ RTN
+
+( greatest common divisor - euclidean algorithm )
+@gcd32 ( x** y** -> z** )
+ &loop ( x y )
+ DUP4 ( x y y )
+ ;is-zero32 JSR2 ( x y y=0? )
+ ,&done JCN ( x y )
+ DUP4 ( x y y )
+ STH2 STH2 ( x y [y] )
+ ;mod32 JSR2 ( r=x%y [y] )
+ STH2r ( rhi rlo yhi [ylo] )
+ ROT2 ( rlo yhi rhi [ylo] )
+ ROT2 ( yhi rhi rlo [ylo] )
+ STH2r ( yhi rhi rlo ylo )
+ ROT2 ( yhi rlo ylo rhi )
+ ROT2 ( yhi ylo rhi rlo )
+ ,&loop JMP
+ &done
+ POP4 ( x )
+ RTN
diff --git a/untitled.chr b/untitled.chr
Binary files differ.