From: Alan Cox Date: Mon, 11 Feb 2019 23:31:10 +0000 (+0000) Subject: 8080: 'fast' fork copier: 24 clocks/byte X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=600551efd9aea7cd1c6bd83caa064df4c9ef0762;p=FUZIX.git 8080: 'fast' fork copier: 24 clocks/byte Don't read this code after a large lunch --- diff --git a/Kernel/platform-v8080/tricks.s b/Kernel/platform-v8080/tricks.s index bf9250ae..cc070e9b 100644 --- a/Kernel/platform-v8080/tricks.s +++ b/Kernel/platform-v8080/tricks.s @@ -4,86 +4,81 @@ #include "../lib/8080fixedbank.s" .sect .common -! -! Copy all the user memory from bank a to bank c -! + +.define bankfork bankfork: - lxi d,0x18FE - lxi h,0 - mov b,a - ! We do D loops of E blocks. 8080 hasn't quite got enough - ! registers to do it in one go so we have to push/pop d thus - ! resetting E each cycle -outer: - push d -inner: - ! We do 8 bytes per loop and 254 loops per inner loop, so - ! 24 inner loops per run copies the needed space and a tiny shade - ! over (which in this case is fine as it's udata which we will - ! copy from common to common so do nothing to) - mov a,b - out 21 - mov d,m - mov a,c - out 21 - mov m,d - inx h - mov a,b - out 21 - mov d,m - mov a,c - out 21 - mov m,d - inx h - mov a,b - out 21 - mov d,m - mov a,c - out 21 - mov m,d - inx h - mov a,b - out 21 - mov d,m - mov a,c - out 21 - mov m,d - inx h - mov a,b - out 21 - mov d,m - mov a,c - out 21 - mov m,d - inx h - mov a,b - out 21 - mov d,m - mov a,c - out 21 - mov m,d - inx h - mov a,b - out 21 - mov d,m + sta patch1+1 mov a,c - out 21 - mov m,d - inx h - mov a,b - out 21 - mov d,m - mov a,c - out 21 - mov m,d - inx h - dcr e - jnz inner - mvi a,'@' - out 1 - pop d - dcr d - jnz outer + sta patch2+1 + lxi h,0 + dad sp + shld copy_done+1 ! patch stack restore in + ! Go from the break to 0-5 + lhld U_DATA__U_BREAK + lxi d,-6 ! move down 6 for the copier loop + dad d + sphl + mvi a,0xff ! end between 5 and 0 (which is fine) + sta patch3+1 + lxi h,copy_stack + jmp copier + ! + ! Go from BE00 to the stack pointer + ! +copy_stack: + lxi sp,0xBE00-6 + ! Trickier .. need to work out where to stop + lhld U_DATA__U_SYSCALL_SP + lxi d,-0x0106 ! 6 for the underrun 0x100 for the round down + dad d + mov a,h + sta patch3+1 + lxi h,copy_done + jmp copier +copy_done: + lxi h,0 + sphl ret + +copier: + shld patch4+1 +loop: + ! sp points to top of block +patch1: + mvi a,0 ! 7 + out 21 ! source bank 10 + pop h ! 10 + pop d ! 10 + pop b ! 10 +patch2: + mvi a,0 ! 7 + out 21 ! dest bank 10 + push b ! 11 + push d ! 11 + push h ! sp now back where it started 11 + lxi h,-6 ! 10 + dad sp ! 10 + sphl ! sp ready for next burst 5 + mov a,h ! 5 +patch3: + cpi 0 ! wrapped to FFFx 7 + jnz loop ! 10 + +! +! 144 cycles per 6 bytes = 24 per byte which is actually not far off +! a naive Z80 implementation and about half a good one. Still means +! a second to do the fork() bank copy on a 1MHz 8080. Not quite so bad +! on a 6MHz 8085 though 8) +! +! We halt at somewhere around xx05-xx00 so we have to tidy up by hand +! or accept an underrun. We go the overlap approach on the grounds +! it's cheap and our main overcopy is at most 5 bytes in common, +! whlist the bank to bank overcopy is harmless and small +! +! + xra a + out 21 +patch4: + jmp 0