Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/crt/i48mulhu.src
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ __i48mulhu:
; minimum: 92F + 42R + 42W + 2
; maximum: 94F + 42R + 42W + 4
; including __i48mulu:
; minimum: 900F + 246R + 182W + 342
; maximum: 902F + 246R + 182W + 344
; minimum: 896F + 246R + 182W + 342
; maximum: 898F + 246R + 182W + 344
push ix
push iy
push bc
Expand Down
6 changes: 3 additions & 3 deletions src/crt/i48mulu.src
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
.type __i48mulu, @function

__i48mulu:
; CC: 202*r(PC)+51*r(SPL)+35*w(SPL)+85
; CC: 201 bytes | 202F + 51R + 35W + 85
; CC: 201*r(PC)+51*r(SPL)+35*w(SPL)+85
; CC: 200 bytes | 201F + 51R + 35W + 85
; backup af
push af
push ix
Expand All @@ -32,7 +32,7 @@ __i48mulu:
ld a, l ; a = b[0]
ld iy, (ix - 5) ; iy = b[1], b[2]

or a, a
; or a, a ; carry is already cleared
sbc hl, hl
push hl ; upper bytes of sum at -15
; Stack Use:
Expand Down
148 changes: 76 additions & 72 deletions src/crt/imulhu.src
Original file line number Diff line number Diff line change
Expand Up @@ -9,138 +9,142 @@
__imulhu:
; TODO: Optimize this routine as this is mostly just a copy paste of __i48mulu with some stuff removed.
;
; CC: 118*r(PC)+39*r(SPL)+38*w(SPL)+37
; CC: 117 bytes | 118F + 39R + 38W + 37
push de
; CC: 113*r(PC)+41*r(SPL)+34*w(SPL)+37
; CC: 112 bytes | 113F + 41R + 34W + 37

; backup af
push af
push ix
ld ix, 0
add ix, sp

; On stack to get upper byte when needed
push de ; de will also be used to perform the actual multiplication
push hl
push iy
push bc

; bc = a[0], a[1]
ld a, l ; a = b[0]
ld iy, (ix - 5) ; iy = b[1], b[2]
ld ix, 0
push ix ; upper bytes of sum at (ix + 0)
add ix, sp

; or a, a ; carry is already cleared
sbc hl, hl
push hl ; upper bytes of sum at -15
; Stack Use:
; ix-1 : deu b[5]
; ix-2 : d b[4]
; ix-3 : e b[3]
; ix-4 : hlu b[2]
; ix-5 : h b[1]
; ix-6 : l b[0]
; ix-7 : iyu a[5]
; ix-8 : iyh a[4]
; ix-9 : iyl a[3]
; ix-10 : bcu a[2]
; ix-11 : b a[1]
; ix-12 : c a[0]
; ix-13 : sum[5]
; ix-14 : sum[4]
; ix-15 : sum[3]
; ix-16 : sum[2]
; ix-17 : sum[1]
; ix-18 : sum[0]
; ix + 14 : deu X[5]
; ix + 13 : d X[4]
; ix + 12 : e X[3]
; ix + 11 : hlu X[2]
; ix + 10 : h X[1]
; ix + 9 : l X[0]
; ix + 8 : iyu Y[5]
; ix + 7 : iyh Y[4]
; ix + 6 : iyl Y[3]
; ix + 5 : bcu Y[2]
; ix + 4 : b Y[1]
; ix + 3 : c Y[0]
; ix + 2 : sum[5]
; ix + 1 : sum[4]
; ix + 0 : sum[3]
; ix - 1 : sum[2]
; ix - 2 : sum[1]
; ix - 3 : sum[0]

ld iy, (ix + 10) ; iy = X[1], X[2]
; bc = Y[0], Y[1]
ld a, l ; a = X[0]

; ======================================================================
; sum[0-1]

; a[0]*b[0]
ld d, c ; d = a[0]
ld e, a ; e = b[0]
mlt de
push de ; lower bytes of sum at -18
; X[0]*Y[0]
; l = X[0]
ld h, c ; h = Y[0]
mlt hl
push hl ; lower bytes of sum at (ix - 3)

; ======================================================================
; sum[1-2]
ld l, d ; hl will store current partial sum
ld l, h ; hl will store current partial sum
ld h, 0

; a[1]*b[0]
ld d, b ; d = a[1]
ld e, a ; e = b[0]
; X[0]*Y[1]
ld e, a ; e = X[0]
ld d, b ; d = Y[1]
mlt de
add hl, de

; a[0]*b[1]
ld d, c ; d = a[0]
ld e, iyl ; e = b[1]
; X[1]*Y[0]
ld e, iyl ; e = X[1]
ld d, c ; d = Y[0]
mlt de
add hl, de

ld (ix - 17), hl
ld (ix - 2), hl

; ======================================================================
; sum[2-3]
ld hl, (ix - 16) ; hl will store current partial sum
ld hl, (ix - 1) ; hl will store current partial sum

; a[0]*b[2]
ld d, c ; d = a[0]
ld e, iyh ; e = b[2]
; X[2]*Y[0]
ld e, iyh ; e = X[2]
ld d, c ; d = Y[0]
mlt de
add hl, de

; a[1]*b[1]
ld d, b ; d = a[1]
ld e, iyl ; e = b[1]
; X[1]*Y[1]
ld e, iyl ; e = X[1]
ld d, b ; d = Y[1]
mlt de
add hl, de

; a[2]*b[0]
ld d, (ix - 10) ; d = a[2]
ld e, a ; e = b[0]
; X[0]*Y[2]
ld e, a ; e = X[0]
ld d, (ix + 5) ; d = Y[2]
ld c, d ; c = Y[2]
mlt de
add hl, de
ld d, c ; d = Y[2]

ld (ix - 16), hl
ld (ix - 1), hl

; ======================================================================
; sum[3-4]
ld hl, (ix - 15) ; hl will store current partial sum
ld hl, (ix + 0) ; hl will store current partial sum

; a[1]*b[2]
ld d, b ; d = a[1]
ld e, iyh ; e = b[2]
mlt de
add hl, de
; X[2]*Y[1]
ld c, iyh ; c = X[2]
; b = Y[1]
mlt bc
add hl, bc

; a[2]*b[1]
ld d, (ix - 10) ; d = a[2]
ld e, iyl ; e = b[1]
; X[1]*Y[2]
ld e, iyl ; e = X[1]
; d = Y[2]
mlt de
add hl, de

ld (ix - 15), hl
ld (ix + 0), hl

; ======================================================================
; sum[4-5]
ld hl, (ix - 14) ; hl will store current partial sum
ld hl, (ix + 1) ; hl will store current partial sum

; a[2]*b[2]
ld d, (ix - 10) ; d = a[2]
ld e, iyh ; e = b[2]
; X[2]*Y[2]
ld e, iyh ; e = X[2]
ld d, (ix + 5) ; d = Y[2]
mlt de
add hl, de

ld (ix - 14), l
ld (ix - 13), h
ld a, l ; ld (ix + 1), l
ld (ix + 2), h

; clean up stack and restore registers
pop de
pop hl ; reset SP
pop hl
ld h, a
pop bc
pop iy

ld sp, ix
pop de ; reset SP
pop de ; restore DE

pop ix
pop af
pop de
ret
20 changes: 10 additions & 10 deletions src/crt/llmulhu.src
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ __llmulhu:
ld h, b
ld.s sp, hl

exx

lea hl, iy + 21
ld b, 8
.L.push_loop:
Expand All @@ -39,18 +41,17 @@ __llmulhu:
or a, a ; cf=0
djnz .L.push_loop

sbc hl, hl
ld e, l
ld d, h

exx
ld c, b
sbc hl, hl
ex de, hl
sbc hl, hl
ld c, l
ld b, l

exx

sbc hl, hl
ld e, l
ld d, h

.L.byte_loop:
scf
adc a, a
Expand All @@ -60,9 +61,8 @@ __llmulhu:

add ix, ix
adc hl, hl
ex de, hl
adc.s hl, hl
ex de, hl
rl e
rl d

exx
adc hl, hl
Expand Down
18 changes: 10 additions & 8 deletions src/crt/lmulhu.src
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,22 @@ __lmulhu:
ld iyl, a
push iy
push bc
ld iyl, iyh ; ld iy, 0
lea bc, iy + 0
inc de
dec.s de
ld d, b
ld c, e
lea de, iy + 0 ; UDE = 0, D = 0
ld e, c
ld c, d
ld b, d
call __llmulu
; E = B
; UHL = C
; H = UDE
; L = D
add iy, sp
scf
sbc hl, hl
add hl, sp
push de
ld e, (iy - 1) ; H = UDE
ld (iy - 1), c ; UHL = C
ld e, (hl) ; H = UDE
ld (hl), c ; UHL = C
pop hl ; UHL = C
ld h, e ; H = UDE
ld l, d ; L = D
Expand Down
Loading