;----------------------------------------------------------- ; Single Precision Floating Point Multiply ; (does not support 0, -0, INF, -INF, NaN, denormalized #'s) ; ; inputs: R0-R1-R2-R3 = multiplicand1 (float) ; R4-R5-R6-R7 = multiplicand2 (float) ; ; output: R4-R5-R6-R7 = product (float) ; ; uses: bits - psw.5, c ; SFRs - acc, b, ma, mb, mc, mcnt1 ; ;----------------------------------------------------------- ;--------------------- determine sign ---------------------- ; [x] = Machine Cycle Count fmult: mov a, r0 ; [1] xrl a, r4 ; [1] rlc a ; [1] mov psw.5, c ; [2] store sign in GF1 ;-------------------- calc new exponent -------------------- mov a, r1 ; [1] setb acc.7 ; [2] assumed 1 before decimal xch a, r1 ; [1] rlc a ; [1] get lsbit of exponent mov a, r0 ; [1] get upper 7 of exp rlc a ; [1] exponent byte in acc mov r0, a ; [1] store exp to R0 mov a, r5 ; [1] setb acc.7 ; [2] assumed 1 before decimal xch a, r5 ; [1] rlc a ; [1] get lsbit of exponent mov a, r4 ; [1] get upper 7 of exp rlc a ; [1] exponent byte in acc add a, r0 ; [1] add exponents add a, #81h ; [2] subtract exponent bias mov r4, a ; [1] store to r4 ;---------------- multiply significands -------------------- orl mcnt1, #10h ; [3] CLM=1 clear MA, MB, MC mov a, r7 ; [1] mov b, r3 ; [2] mul ab ; [5] (A0 * B0) mov mc, b ; [3] msb of (A0*B0) into MC clr a ; [1] A=00 mov mb, r3 ; [2] ---------------------- mov mb, a ; [2] . mov ma, r6 ; [2] . mov ma, r5 ; [2] . nop ; [1] (B2B1 * A0) nop ; [1] . nop ; [1] . nop ; [1] . nop ; [1] . nop ; [1] ---------------------- mov mb, r2 ; [2] ---------------------- mov mb, r1 ; [2] . mov ma, r7 ; [2] . mov ma, a ; [2] . nop ; [1] (A2A1 * B0) nop ; [1] . nop ; [1] . nop ; [1] . nop ; [1] . nop ; [1] ---------------------- mov a, mc ; [2] msbyte MC not needed push mc ; [2] save push mc ; [2] save mov a, mc ; [2] add a, #40h ; [2] go ahead and round orl mcnt1, #10h ; [3] CLM=1 clear MA, MB, MC mov mc, a ; [2] reload 40-bit ACC jnc mc2 ; [3] carry from add? pop acc ; [2] yes. inc a ; [1] add carry. mov mc, a ; [2] jnz mc1 ; [3] carry from add? pop acc ; [2] yes. inc a ; [1] add carry mov mc, a ; [2] 40-bit ACC loaded sjmp mc0 ; [3] mc2: pop mc ; [2] finish loading bytes mc1: pop mc ; [2] mc0: mov mb, r2 ; [2] --------------------- mov mb, r1 ; [2] . mov ma, r6 ; [2] . mov ma, r5 ; [2] . nop ; [1] (A2A1 * B2B1) nop ; [1] . nop ; [1] . nop ; [1] . nop ; [1] . nop ; [1] --------------------- mov a, mc ; [2] msbyte MC not needed mov r5, mc ; [2] store 3 msbytes mov r6, mc ; [2] mov r7, mc ; [2] push mc ; [2] store for norm or round mov a, r5 ; [2] jnb acc.7, norm ; [4] need to normalize? pop acc ; [2] no cjne a, #0C0h, rnd ; [4] rnd: jc no_rnd ; [3] need to round? inc r7 ; [1] yes mov a, r7 ; [1] jnz no_rnd ; [3] did we carry? inc r6 ; [1] yes mov a, r6 ; [1] jnz no_rnd ; [3] carry again? inc r5 ; [1] yes no_rnd: inc r4 ; [1] inc exponent mov a, r4 ; [1] get exponent mov c, psw.5 ; [2] get sign rrc a ; [1] sign -> msbit mov r4, a ; [1] store byte in R4 jnc exp0 ; [3] lsbit of exponent = 1? ret ; [4] yes. norm: pop acc ; [2] rlc a ; [1] rotate msbit -> carry bit mov a, r7 ; [1] ------------------------- rlc a ; [1] . mov r7, a ; [1] . mov a, r6 ; [1] rotate each byte by 1bit rlc a ; [1] using the carry bit mov r6, a ; [1] . mov a, r5 ; [1] . rlc a ; [1] . mov r5, a ; [1] . mov a, r4 ; [1] ------------------------- mov c, psw.5 ; [2] get sign rrc a ; [1] sign -> msbit mov r4, a ; [1] store byte in R4 jc done ; [3] lsbit of exponent = 1? exp0: mov a, r5 ; [1] no. cpl acc.7 ; [2] lsbit of exponent = 0. mov r5, a ; [1] done: ret ; [4] ; ------- ; Total cycles min = [141] @40Mhz = 14.1us ; max = [168] @40Mhz = 16.8us