uxn

Varvara Ordinator, written in ANSI C(SDL2)
git clone https://git.eamoncaddigan.net/uxn.git
Log | Files | Refs | README | LICENSE

math32.tal (12649B)


      1 ( math32.tal )
      2 ( )
      3 ( This library supports arithmetic on 32-bit unsigned integers, )
      4 ( also known as long values. )
      5 ( )
      6 ( 32-bit long values are represented by two 16-bit short values: )
      7 ( )
      8 (      decimal  hexadecimal  uxn literals )
      9 (            0   0x00000000   #0000 #0000 )
     10 (            1   0x00000001   #0000 #0001 )
     11 (         4660   0x00001234   #0000 #1234 )
     12 (        65535   0x0000ffff   #0000 #ffff )
     13 (        65536   0x00010000   #0001 #0000 )
     14 (     16777215   0x00ffffff   #00ff #ffff )
     15 (   4294967295   0xffffffff   #ffff #ffff )
     16 ( )
     17 ( The most significant 16-bit, the "high bits", are stored first. )
     18 ( We document long values as x** -- equivalent to xhi* xlo*. )
     19 ( )
     20 ( Operations supported: )
     21 ( )
     22 (   NAME            STACK EFFECT        DEFINITION       )
     23 (   add32           x** y** -> z**      x + y            )
     24 (   sub32           x** y** -> z**      x - y            )
     25 (   mul16           x*  y*  -> z**      x * y            )
     26 (   mul32           x** y** -> z**      x * y            )
     27 (   div32           x** y** -> q**      x / y            )
     28 (   mod32           x** y** -> r**      x % y            )
     29 (   divmod32        x** y** -> q** r**  x / y, x % y     )
     30 (   gcd32           x** y** -> z**      gcd(x, y)        )
     31 (   negate32        x**     -> z**      -x               )
     32 (   lshift32        x** n^  -> z**      x<<n             )
     33 (   rshift32        x** n^  -> z**      x>>n             )
     34 (   and32           x** y** -> z**      x & y            )
     35 (   or32            x** y** -> z**      x | y            )
     36 (   xor32           x** y** -> z**      x ^ y            )
     37 (   complement32    x**     -> z**      ~x               )
     38 (   eq32            x** y** -> bool^    x == y           )
     39 (   ne32            x** y** -> bool^    x != y           )
     40 (   is-zero32       x**     -> bool^    x == 0           )
     41 (   non-zero32      x**     -> bool^    x != 0           )
     42 (   lt32            x** y** -> bool^    x < y            )
     43 (   gt32            x** y** -> bool^    x > y            )
     44 (   lteq32          x** y** -> bool^    x <= y           )
     45 (   gteq32          x** y** -> bool^    x >= y           )
     46 (   bitcount8       x^      -> bool^    floor(log2(x))+1 )
     47 (   bitcount16      x*      -> bool^    floor(log2(x))+1 )
     48 (   bitcount32      x**     -> bool^    floor(log2(x))+1 )
     49 ( )
     50 ( In addition to the code this file uses 44 bytes of registers )
     51 ( to store temporary state: )
     52 ( )
     53 (   - shared memory, 16 bytes )
     54 (   - mul32 memory, 12 bytes )
     55 (   - _divmod32 memory, 16 bytes )
     56 
     57 ( bitcount: number of bits needed to represent number )
     58 ( equivalent to floor[log2[x]] + 1 )
     59 
     60 @bitcount8 ( x^ -> n^ )
     61     #00 SWP ( n x )
     62     &loop
     63     DUP #00 EQU ( n x x=0 )
     64     ,&done JCN ( n x )
     65     #01 SFT ( n x>>1 )
     66     SWP INC SWP ( n+1 x>>1 )
     67     ,&loop JMP
     68     &done
     69     POP ( n )
     70     JMP2r
     71 
     72 @bitcount16 ( x* -> n^ )
     73     SWP ( xlo xhi )
     74     ;bitcount8 JSR2 ( xlo nhi )
     75     DUP #00 NEQ ( xlo nhi nhi!=0 )
     76     ,&hi-set JCN ( xlo nhi )
     77     SWP ;bitcount8 JSR2 ADD ( nhi+nlo )
     78     JMP2r 
     79     &hi-set
     80     SWP POP #08 ADD ( nhi+8 )
     81     JMP2r
     82 
     83 @bitcount32 ( x** -> n^ )
     84     SWP2 ( xlo* xhi* )
     85     ;bitcount16 JSR2 ( xlo* nhi )
     86     DUP #00 NEQ ( xlo* nhi nhi!=0 )
     87     ,&hi-set JCN ( xlo* nhi )
     88     ROT ROT ;bitcount16 JSR2 ADD JMP2r ( nhi+nlo )
     89     &hi-set
     90     ROT ROT POP2 #10 ADD ( nhi+16 )    
     91     JMP2r
     92 
     93 ( equality )
     94 
     95 ( x == y )
     96 @eq32 ( xhi* xlo* yhi* ylo* -> bool^ )
     97     ROT2 EQU2 STH
     98     EQU2 STHr AND JMP2r
     99 
    100 ( x != y )
    101 @ne32 ( xhi* xlo* yhi* ylo* -> bool^ )
    102     ROT2 NEQ2 STH
    103     NEQ2 STHr ORA JMP2r
    104 
    105 ( x == 0 )
    106 @is-zero32 ( x** -> bool^ )
    107     ORA2 #0000 EQU2 JMP2r
    108 
    109 ( x != 0 )
    110 @non-zero32 ( x** -> bool^ )
    111     ORA2 #0000 NEQ2 JMP2r
    112 
    113 ( comparisons )
    114 
    115 ( x < y )
    116 @lt32 ( x** y** -> bool^ )
    117     ROT2 SWP2 ( xhi yhi xlo ylo )
    118     LTH2 ,&lt-lo JCN ( xhi yhi )
    119     LTH2 JMP2r
    120     &lt-lo
    121     GTH2 #00 EQU JMP2r
    122 
    123 ( x <= y )
    124 @lteq32 ( x** y** -> bool^ )
    125     ROT2 SWP2 ( xhi yhi xlo ylo )
    126     GTH2 ,&gt-lo JCN ( xhi yhi )
    127     GTH2 #00 EQU JMP2r
    128     &gt-lo
    129     LTH2 JMP2r
    130 
    131 ( x > y )
    132 @gt32 ( x** y** -> bool^ )
    133     ROT2 SWP2 ( xhi yhi xlo ylo )
    134     GTH2 ,&gt-lo JCN ( xhi yhi )
    135     GTH2 JMP2r
    136     &gt-lo
    137     LTH2 #00 EQU JMP2r
    138 
    139 ( x > y )
    140 @gteq32 ( x** y** -> bool^ )
    141     ROT2 SWP2 ( xhi yhi xlo ylo )
    142     LTH2 ,&lt-lo JCN ( xhi yhi )
    143     LTH2 #00 EQU JMP2r
    144     &lt-lo
    145     GTH2 JMP2r
    146 
    147 ( bitwise operations )
    148 
    149 ( x & y )
    150 @and32 ( xhi* xlo* yhi* ylo* -> xhi|yhi* xlo|ylo* )
    151     ROT2 AND2 STH2 AND2 STH2r JMP2r
    152 
    153 ( x | y )
    154 @or32 ( xhi* xlo* yhi* ylo* -> xhi|yhi* xlo|ylo* )
    155     ROT2 ORA2 STH2 ORA2 STH2r JMP2r
    156 
    157 ( x ^ y )
    158 @xor32 ( xhi* xlo* yhi* ylo* -> xhi|yhi* xlo|ylo* )
    159     ROT2 EOR2 STH2 EOR2 STH2r JMP2r
    160 
    161 ( ~x )
    162 @complement32 ( x** -> ~x** )
    163     SWP2 #ffff EOR2 SWP2 #ffff EOR2 JMP2r
    164 
    165 ( temporary registers )
    166 ( shared by most operations, except mul32 and div32 )
    167 @m32 [ &x0 $1 &x1 $1 &x2 $1 &x3 $1
    168        &y0 $1 &y1 $1 &y2 $1 &y3 $1
    169        &z0 $1 &z1 $1 &z2 $1 &z3 $1
    170        &w0 $1 &w1 $1 &w2 $2 ]
    171 
    172 ( bit shifting )
    173 
    174 ( x >> n )
    175 @rshift32 ( x** n^ -> x<<n )
    176     DUP #08 LTH ;rshift32-0 JCN2 ( x n )
    177     DUP #10 LTH ;rshift32-1 JCN2 ( x n )
    178     DUP #18 LTH ;rshift32-2 JCN2 ( x n )
    179     ;rshift32-3 JMP2 ( x n )
    180     JMP2r
    181 
    182 ( shift right by 0-7 bits )
    183 @rshift32-0 ( x** n^ -> x<<n )
    184         STHk  SFT                      ;m32/z3 STA  ( write z3 )
    185     #00 STHkr SFT2 #00 ;m32/z3 LDA ORA2 ;m32/z2 STA2 ( write z2,z3 )
    186     #00 STHkr SFT2 #00 ;m32/z2 LDA ORA2 ;m32/z1 STA2 ( write z1,z2 )
    187     #00 STHr  SFT2 #00 ;m32/z1 LDA ORA2             ( compute z0,z1 )
    188     ;m32/z2 LDA2
    189     JMP2r
    190 
    191 ( shift right by 8-15 bits )
    192 @rshift32-1 ( x** n^ -> x<<n )
    193     #08 SUB STH POP 
    194         STHkr SFT                      ;m32/z3 STA  ( write z3 )
    195     #00 STHkr SFT2 #00 ;m32/z3 LDA ORA2 ;m32/z2 STA2 ( write z2,z3 )
    196     #00 STHr  SFT2 #00 ;m32/z2 LDA ORA2             ( compute z1,z2 )
    197     #00 ROT ROT ;m32/z3 LDA
    198     JMP2r
    199 
    200 ( shift right by 16-23 bits )
    201 @rshift32-2 ( x** n^ -> x<<n )
    202     #10 SUB STH POP2
    203         STHkr SFT                      ;m32/z3 STA ( write z3 )
    204     #00 STHr  SFT2 #00 ;m32/z3 LDA ORA2            ( compute z2,z3 )
    205     #0000 SWP2
    206     JMP2r
    207 
    208 ( shift right by 16-23 bits )
    209 @rshift32-3 ( x** n^ -> x<<n )
    210     #18 SUB STH POP2 POP ( x0 )
    211     #00 SWP #0000 SWP2 ( 00 00 00 x0 )
    212     STHr SFT
    213     JMP2r
    214 
    215 ( x << n )
    216 @lshift32 ( x** n^ -> x<<n )
    217     DUP #08 LTH ;lshift32-0 JCN2 ( x n )
    218     DUP #10 LTH ;lshift32-1 JCN2 ( x n )
    219     DUP #18 LTH ;lshift32-2 JCN2 ( x n )
    220     ;lshift32-3 JMP2 ( x n )
    221     JMP2r
    222 
    223 ( shift left by 0-7 bits )
    224 @lshift32-0 ( x** n^ -> x<<n )
    225     #40 SFT STH ( stash n<<4 )
    226     #00 SWP STHkr SFT2                     ;m32/z2 STA2 ( store z2,z3 )
    227     #00 SWP STHkr SFT2 #00 ;m32/z2 LDA ORA2 ;m32/z1 STA2 ( store z1,z2 )
    228     #00 SWP STHkr SFT2 #00 ;m32/z1 LDA ORA2 ;m32/z0 STA2 ( store z0,z1 )
    229             STHr  SFT      ;m32/z0 LDA ORA              ( calculate z0 )
    230     ;m32/z1 LDA ;m32/z2 LDA2
    231     JMP2r
    232 
    233 ( shift left by 8-15 bits )
    234 @lshift32-1 ( x** n^ -> x<<n )
    235     #08 SUB #40 SFT STH ( stash [n-8]<<4 )
    236     #00 SWP STHkr SFT2                     ;m32/z1 STA2 ( store z1,z2 )
    237     #00 SWP STHkr SFT2 #00 ;m32/z1 LDA ORA2 ;m32/z0 STA2 ( store z0,z1 )
    238             STHr  SFT      ;m32/z0 LDA ORA              ( calculate z0 )
    239     SWP POP ( x0 unused )
    240     ;m32/z1 LDA2 #00
    241     JMP2r
    242 
    243 ( shift left by 16-23 bits )
    244 @lshift32-2 ( x** n^ -> x<<n )
    245     #10 SUB #40 SFT STH ( stash [n-16]<<4 )
    246     #00 SWP STHkr SFT2                ;m32/z0 STA2 ( store z0,z1 )
    247             STHr  SFT  ;m32/z0 LDA ORA             ( calculate z0 )
    248     STH POP2 STHr
    249     ;m32/z1 LDA #0000
    250     JMP2r
    251 
    252 ( shift left by 24-31 bits )
    253 @lshift32-3 ( x** n^ -> x<<n )
    254     #18 SUB #40 SFT ( x0 x1 x2 x3 r=[n-24]<<4 )
    255     SFT ( x0 x1 x2 x3<<r )
    256     SWP2 POP2 SWP POP #0000 #00
    257     JMP2r
    258 
    259 ( arithmetic )
    260 
    261 ( x + y )
    262 @add32 ( xhi* xlo* yhi* ylo* -> zhi* zlo* )
    263     ;m32/y2 STA2 ;m32/y0 STA2 ( save ylo, yhi )
    264     ;m32/x2 STA2 ;m32/x0 STA2 ( save xlo, xhi )
    265     #0000 #0000 ;m32/z0 STA2 ;m32/z2 STA2 ( reset zhi, zlo )
    266 
    267     ( x3 + y3 => z2z3 )
    268     #00 ;m32/x3 LDA #00 ;m32/y3 LDA ADD2 ;m32/z2 STA2
    269 
    270     ( x2 + y2 + z2 => z1z2 )
    271     #00 ;m32/x2 LDA ;m32/z1 LDA2 ADD2 ;m32/z1 STA2
    272     #00 ;m32/y2 LDA ;m32/z1 LDA2 ADD2 ;m32/z1 STA2
    273 
    274     ( x1 + y1 + z1 => z0z1 )
    275     #00 ;m32/x1 LDA ;m32/z0 LDA2 ADD2 ;m32/z0 STA2
    276     #00 ;m32/y1 LDA ;m32/z0 LDA2 ADD2 ;m32/z0 STA2
    277 
    278     ( x0 + y0 + z0 => z0 )
    279     ;m32/x0 LDA ;m32/z0 LDA ADD ;m32/z0 STA
    280     ;m32/y0 LDA ;m32/z0 LDA ADD ;m32/z0 STA
    281 
    282     ( load zhi,zlo )
    283     ;m32/z0 LDA2 ;m32/z2 LDA2
    284     JMP2r
    285 
    286 ( -x )
    287 @negate32 ( x** -> -x** )
    288     ;complement32 JSR2
    289     INC2 ( ~xhi -xlo )
    290     DUP2 #0000 NEQ2 ( ~xhi -xlo non-zero? )
    291     ,&done JCN ( xlo non-zero => don't inc hi )
    292     SWP2 INC2 SWP2 ( -xhi -xlo )
    293     &done
    294     JMP2r
    295 
    296 ( x - y )
    297 @sub32 ( x** y** -> z** )
    298     ;negate32 JSR2 ;add32 JSR2 JMP2r
    299 
    300 ( 16-bit multiplication )
    301 @mul16 ( x* y* -> z** )
    302     ;m32/y1 STA ;m32/y0 STA ( save ylo, yhi )
    303     ;m32/x1 STA ;m32/x0 STA ( save xlo, xhi )
    304     #0000 #00 ;m32/z1 STA2 ;m32/z3 STA ( reset z1,z2,z3 )
    305     #0000 #00 ;m32/w0 STA2 ;m32/w2 STA ( reset w0,w1,w2 )
    306 
    307     ( x1 * y1 => z1z2 )
    308     #00 ;m32/x1 LDA #00 ;m32/y1 LDA MUL2 ;m32/z2 STA2
    309 
    310     ( x0 * y1 => z0z1 )
    311     #00 ;m32/x0 LDA #00 ;m32/y1 LDA MUL2 ;m32/z1 LDA2 ADD2 ;m32/z1 STA2
    312 
    313     ( x1 * y0 => w1w2 )
    314     #00 ;m32/x1 LDA #00 ;m32/y0 LDA MUL2 ;m32/w1 STA2
    315 
    316     ( x0 * y0 => w0w1 )
    317     #00 ;m32/x0 LDA #00 ;m32/y0 LDA MUL2 ;m32/w0 LDA2 ADD2 ;m32/w0 STA2
    318 
    319     ( add z and a<<8 )
    320     #00 ;m32/z1 LDA2 ;m32/z3 LDA
    321     ;m32/w0 LDA2 ;m32/w2 LDA #00
    322     ;add32 JSR2
    323     JMP2r
    324 
    325 ( x * y )
    326 @mul32 ( x** y** -> z** ) 
    327     ,&y1 STR2 ,&y0 STR2 ( save ylo, yhi )
    328     ,&x1 STR2 ,&x0 STR2 ( save xlo, xhi )
    329     ,&y1 LDR2 ,&x1 LDR2 ;mul16 JSR2 ( [x1*y1] )
    330     ,&z1 STR2 ,&z0 STR2 ( sum = x1*y1, save zlo, zhi )
    331     ,&y1 LDR2 ,&x0 LDR2 MUL2 ( [x0*y1]<<16 )
    332     ,&y0 LDR2 ,&x1 LDR2 MUL2 ( [x1*y0]<<16 )
    333     ( [x0*y0]<<32 will completely overflow )
    334     ADD2 ,&z0 LDR2 ADD2 ( sum += x0*y1<<16 + x1*y0<<16 )
    335     ,&z1 LDR2
    336     JMP2r
    337 [ &x0 $2 &x1 $2
    338   &y0 $2 &y1 $2
    339   &z0 $2 &z1 $2 ]
    340 
    341 @div32 ( x** y** -> q** )
    342     ;_divmod32 JSR2
    343     ;_divmod32/quo0 LDA2 ;_divmod32/quo1 LDA2
    344     JMP2r
    345 
    346 @mod32 ( x** y** -> r** )
    347     ;_divmod32 JSR2
    348     ;_divmod32/rem0 LDA2 ;_divmod32/rem1 LDA2
    349     JMP2r
    350 
    351 @divmod32 ( x** y** -> q** r** )
    352     ;_divmod32 JSR2
    353     ;_divmod32/quo0 LDA2 ;_divmod32/quo1 LDA2
    354     ;_divmod32/rem0 LDA2 ;_divmod32/rem1 LDA2
    355     JMP2r
    356 
    357 ( calculate and store x / y and x % y )
    358 @_divmod32 ( x** y** -> )
    359     ( store y and x for repeated use )
    360     ,&div1 STR2 ,&div0 STR2 ( y -> div )
    361     ,&rem1 STR2 ,&rem0 STR2 ( x -> rem )
    362 
    363     ( if x < y then the answer is 0 )
    364     ,&rem0 LDR2 ,&rem1 LDR2
    365     ,&div0 LDR2 ,&div1 LDR2
    366     ;lt32 JSR2 ,&is-zero JCN ,&not-zero JMP
    367     &is-zero
    368     #0000 ,&quo0 STR2 #0000 ,&quo1 STR2 JMP2r
    369 
    370     ( x >= y so the answer is >= 1 )
    371     &not-zero
    372     #0000 ,&quo0 STR2 #0000 ,&quo1 STR2 ( 0 -> quo )
    373 
    374     ( bitcount[x] - bitcount[y] determines the largest multiple of y to try )
    375     ,&rem0 LDR2 ,&rem1 LDR2 ;bitcount32 JSR2 ( rbits^ )
    376     ,&div0 LDR2 ,&div1 LDR2 ;bitcount32 JSR2 ( rbits^ dbits^ )
    377     SUB ( shift=rbits-dits )
    378     #00 DUP2 ( shift 0 shift 0 )
    379 
    380     ( 1<<shift -> cur )
    381     #0000 #0001 ROT2 POP
    382     ;lshift32 JSR2 ,&cur1 STR2 ,&cur0 STR2
    383     
    384     ( div<<shift -> div )
    385     ,&div0 LDR2 ,&div1 LDR2 ROT2 POP
    386     ;lshift32 JSR2 ,&div1 STR2 ,&div0 STR2 
    387 
    388     ,&loop JMP
    389 
    390     [ &div0 $2 &div1 $2
    391       &rem0 $2 &rem1 $2
    392       &quo0 $2 &quo1 $2
    393       &cur0 $2 &cur1 $2 ]
    394 
    395     &loop
    396     ( if rem >= the current divisor, we can subtract it and add to quotient )
    397     ,&rem0 LDR2 ,&rem1 LDR2 ,&div0 LDR2 ,&div1 LDR2 ;lt32 JSR2 ( is rem < div? )
    398     ,&rem-lt JCN ( if rem < div skip this iteration )
    399 
    400     ( since rem >= div, we have found a multiple of y that divides x )
    401     ,&rem0 LDR2 ,&rem1 LDR2 ,&div0 LDR2 ,&div1 LDR2 ;sub32 JSR2 ,&rem1 STR2 ,&rem0 STR2 ( rem -= div )
    402     ,&quo0 LDR2 ,&quo1 LDR2 ,&cur0 LDR2 ,&cur1 LDR2 ;add32 JSR2 ,&quo1 STR2 ,&quo0 STR2 ( quo += cur )
    403 
    404     &rem-lt
    405     ,&div0 LDR2 ,&div1 LDR2 #01 ;rshift32 JSR2 ,&div1 STR2 ,&div0 STR2 ( div >>= 1 )
    406     ,&cur0 LDR2 ,&cur1 LDR2 #01 ;rshift32 JSR2 ,&cur1 STR2 ,&cur0 STR2 ( cur >>= 1 )
    407     ,&cur0 LDR2 ,&cur1 LDR2 ;non-zero32 JSR2 ,&loop JCN ( if cur>0, loop. else we're done )
    408     JMP2r
    409 
    410 ( greatest common divisor - euclidean algorithm )
    411 @gcd32 ( x** y** -> z** )
    412     &loop ( x y )
    413     OVR2 OVR2 ( x y y )
    414     ;is-zero32 JSR2 ( x y y=0? )
    415     ,&done JCN ( x y )
    416     OVR2 OVR2 ( x y y )
    417     STH2 STH2 ( x y [y] )
    418     ;mod32 JSR2 ( r=x%y [y] )
    419     STH2r ( rhi rlo yhi [ylo] )
    420     ROT2 ( rlo yhi rhi [ylo] )
    421     ROT2 ( yhi rhi rlo [ylo] )
    422     STH2r ( yhi rhi rlo ylo )
    423     ROT2 ( yhi rlo ylo rhi )
    424     ROT2 ( yhi ylo rhi rlo )
    425     ,&loop JMP
    426     &done
    427     POP2 POP2 ( x )
    428     JMP2r