From f01c20021c7d7942c2d0b9b221675ca7d139267b Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 12 Mar 2019 00:07:14 +0000 Subject: [PATCH] 8085: some basic 8085 optimizing work --- Kernel/cpu-8085/cpu.h | 1 + Kernel/cpu-8085/image.mk | 7 + Kernel/cpu-8085/rules.mk | 21 ++ Kernel/cpu-8085/std-commonmem.s | 45 +++ Kernel/cpu-8085/stdarg.h | 22 ++ Kernel/cpu-8085/stdbool.h | 8 + Kernel/lowlevel-8085.s | 597 ++++++++++++++++++++++++++++++++ Kernel/usermem_std-8085.s | 236 +++++++++++++ 8 files changed, 937 insertions(+) create mode 100644 Kernel/cpu-8085/cpu.h create mode 100644 Kernel/cpu-8085/image.mk create mode 100644 Kernel/cpu-8085/rules.mk create mode 100644 Kernel/cpu-8085/std-commonmem.s create mode 100644 Kernel/cpu-8085/stdarg.h create mode 100644 Kernel/cpu-8085/stdbool.h create mode 100644 Kernel/lowlevel-8085.s create mode 100644 Kernel/usermem_std-8085.s diff --git a/Kernel/cpu-8085/cpu.h b/Kernel/cpu-8085/cpu.h new file mode 100644 index 00000000..63eefc17 --- /dev/null +++ b/Kernel/cpu-8085/cpu.h @@ -0,0 +1 @@ +#include <../cpu-8080/cpu.h> diff --git a/Kernel/cpu-8085/image.mk b/Kernel/cpu-8085/image.mk new file mode 100644 index 00000000..d1053325 --- /dev/null +++ b/Kernel/cpu-8085/image.mk @@ -0,0 +1,7 @@ + +tools/8080map: tools/8080map.c + +tools/ack2kernel: tools/ack2kernel.c + +fuzix.bin: target $(OBJS) tools/8080map tools/ack2kernel + +$(MAKE) -C platform-$(TARGET) image diff --git a/Kernel/cpu-8085/rules.mk b/Kernel/cpu-8085/rules.mk new file mode 100644 index 00000000..1ed5489e --- /dev/null +++ b/Kernel/cpu-8085/rules.mk @@ -0,0 +1,21 @@ +export ACK_ROOT=$(shell tools/findack) + +export CROSS_AS=ack -mfuzix +export CROSS_LD=$(ACK_ROOT)/lib/ack/em_led +export CROSS_CC=ack +export CROSS_CCOPTS= -mfuzix -c -O2 -I$(ROOT_DIR)/cpu-$(CPU) -I$(ROOT_DIR)/platform-$(TARGET) -I$(ROOT_DIR)/include +export CROSS_CC_SEG2= +export CROSS_CC_SEG3= +export CROSS_CC_SEG4= +export CROSS_CC_SEGDISC= +export CROSS_CC_FONT= +export CROSS_CC_VIDEO= +export CROSS_CC_SYS1= +export CROSS_CC_SYS2= +export CROSS_CC_SYS3= +export CROSS_CC_SYS4= +export CROSS_CC_SYS5= +export ASOPTS=-c +export ASMEXT = .s +export BINEXT = .o +export BITS=16 diff --git a/Kernel/cpu-8085/std-commonmem.s b/Kernel/cpu-8085/std-commonmem.s new file mode 100644 index 00000000..0ec44574 --- /dev/null +++ b/Kernel/cpu-8085/std-commonmem.s @@ -0,0 +1,45 @@ +! +! +! Standard 8080 common memory area blocks. +! +! Must remain a multiple of 256 bytes +! +.define _ub +.define _udata +.define kstack_top +.define istack_top +.define istack_switched_sp + +_ub: ! first 512 bytes: starts with struct u_block, with the kernel stack working down from above +_udata: +kstack_base: + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +kstack_top: + + ! next 256 bytes: 254 byte interrupt stack, then 2 byte saved stack pointer +istack_base: + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .data1 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +istack_top: +istack_switched_sp: .data2 0 diff --git a/Kernel/cpu-8085/stdarg.h b/Kernel/cpu-8085/stdarg.h new file mode 100644 index 00000000..dc36919d --- /dev/null +++ b/Kernel/cpu-8085/stdarg.h @@ -0,0 +1,22 @@ +/* + * stdarg.h - variable arguments + * + * From the ack cc + * + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Id$ */ + +#ifndef _STDARG_H +#define _STDARG_H + +typedef char* va_list; + +#define __vasz(x) ((sizeof(x)+sizeof(int)-1) & ~(sizeof(int) -1)) + +#define va_start(ap, parmN) (ap = (va_list)&parmN + __vasz(parmN)) +#define va_arg(ap, type) (*((type *)(void *)((ap += __vasz(type)) - __vasz(type)))) +#define va_end(ap) + +#endif diff --git a/Kernel/cpu-8085/stdbool.h b/Kernel/cpu-8085/stdbool.h new file mode 100644 index 00000000..0172aefd --- /dev/null +++ b/Kernel/cpu-8085/stdbool.h @@ -0,0 +1,8 @@ +#ifndef __STDBOOL_H +#define __STDBOOL_H + +typedef unsigned char bool; + +#define true 1 +#define false 0 +#endif diff --git a/Kernel/lowlevel-8085.s b/Kernel/lowlevel-8085.s new file mode 100644 index 00000000..7e920adc --- /dev/null +++ b/Kernel/lowlevel-8085.s @@ -0,0 +1,597 @@ +# +! +! 8085 low level code +! +! Much the same as the Z80 code except we don't provide in and out +! helpers because it's impossible to make them re-entrant for any port +! + +#include "kernel-8080.def" + +.sect .common + +deliver_signals: + lda U_DATA__U_CURSIG + ora a + rz +deliver_signals_2: + mov l,a + mvi h,0 + dad h + lxi d,U_DATA__U_SIGVEC + dad d + mov e,m + inx h + mov d,m + + mov c,a ! save the signal number to pass into the helper + + ! Build the return frame + lxi b,signal_return + push b + + xra a + sta U_DATA__U_CURSIG + ! + ! Do we need to zero check de here ? + ! + mov a,d + ora e + jz signal_return ! raced + ! + ! Off we go. DE = vector B = signal + ! + ! FIXME: if we ever have 8080 binaries with different load + ! addresses we will need to fix this + ! + EI + lhld PROGLOAD+16 ! signal vector + pchl +signal_return: + DI + lxi h,0 + dad sp + shld U_DATA__U_SYSCALL_SP + lxi sp,kstack_top + mvi a,1 + sta _int_disabled + call map_kernel_di + call _chksigs + call map_process_always_di + lhld U_DATA__U_SYSCALL_SP + sphl + jmp deliver_signals + +.define unix_syscall_entry + +unix_syscall_entry: + push b ! Must preserve the frame pointer + + + sta U_DATA__U_CALLNO + ! Oh for LDIR + ! Unroll this for speed. Syscall arguments into constant locations + ldsi 6 ! Find arguments on stack frame into DE + lhlx + shld U_DATA__U_ARGN + ldsi 8 + lhlx + shld U_DATA__U_ARGN+2 + ldsi 10 + lhlx + shld U_DATA__U_ARGN+4 + ldsi 12 + lhlx + shld U_DATA__U_ARGN+6 + + DI + + ! We are now in kernel space + mvi a,1 + sta U_DATA__U_INSYS + ! Switch stacks + ! On 8080 this is a bit more long winded as we have to go via HL + ldsi 0 ! effectively DE = SP + xchg + shld U_DATA__U_SYSCALL_SP + lxi sp, kstack_top + ! + ! Now map the kernel and call it + ! + call map_kernel_di + EI + call _unix_syscall + xchg + ! + ! Remember fork and execve don't necessarily return this way and fork + ! can do it twice + ! + DI + call map_process_always + xra a + sta U_DATA__U_INSYS + ! Switch stack back + lhld U_DATA__U_SYSCALL_SP + sphl + lhld U_DATA__U_RETVAL + xchg + lhld U_DATA__U_ERROR + ! + ! Signal check + ! + lda U_DATA__U_CURSIG + ora a + jnz via_signal +unix_return: + mov a,h + ora l + jz not_error + stc + ! Carry and errno in HL as expected + jmp unix_pop +not_error: + ! Retval in HL as the Z80 kernel returns it + xchg +unix_pop: + pop b + ! ret must directly follow the ei + EI + ret +via_signal: + ! + ! Stack the state (a signal doing a syscall will change the + ! U_DATA fields but we must return the old error/status) + ! + lhld U_DATA__U_ERROR + push h + lhld U_DATA__U_RETVAL + push h + ! + ! And into the signal delivery path + ! + call deliver_signals_2 + pop d + pop h + jmp unix_return + +! +! Called when execve() completes to transition to the user, as we +! don't return from execve() via the syscall path +! +! +.define _doexec + +_doexec: + DI + call map_process_always + pop b + pop d + lhld U_DATA__U_ISP + sphl + xra a + sta U_DATA__U_INSYS + xchg + lxi d,PROGLOAD + EI + pchl +! +! NULL trap. Must live in common space +! +! FIXME: Rewrite 68000 style as a synchronous trap +! +.define null_handler + +null_handler: + lda U_DATA__U_INSYS + ora a + jnz trap_illegal + lda _inint + ora a + jnz trap_illegal + lxi h,7 + push h + lhld U_DATA__U_PTAB + ldhi P_TAB__P_PID_OFFSET + lhlx + push h + lxi h,39 + push h + call unix_syscall_entry + lxi h,0xffff ! exit -1 + push h + dcx h + push h + call unix_syscall_entry + ! Never returns + +trap_illegal: + lxi h,illegalmsg +traphl: + call outstring + call _platform_monitor + +.define nmi_handler + +nmi_handler: + call map_kernel_di + lxi h,nmimsg + jmp traphl + +illegalmsg: + .asciz '[illegal]' +nmimsg: + .asciz '[NMI]' + +! +! Interrupts are similar to Z80 but we have a lot less state +! to store, and rather trickier juggling to get signals nice +! +.define interrupt_handler + +interrupt_handler: + push psw + INT_ENTER + push b + push d + push h + call platform_interrupt_all + ! Switch stacks + ldsi 0 + xchg + shld istack_switched_sp + lxi sp,istack_top + + ! + ! Map the kernel + ! + lda 0 + call map_save_kernel + cpi 0xC3 + cnz null_pointer_trap + ! + ! Set up state and enter kernel + ! + mvi a,1 + sta _inint + sta U_DATA__U_ININTERRUPT + sta _int_disabled + ! + ! What we avoid in register saves over Z80 we make up for in + ! runtime stuff + ! + lhld .retadr + push h + lhld .bcreg + push h + lhld .tmp1 + push h + lhld .areg + push h + call _platform_interrupt + pop h + mov a,l + sta .areg ! FIXME: add a pad byte to .areg instead + pop h + shld .tmp1 + pop h + shld .bcreg + pop h + shld .retadr + ! + ! Undo state + ! + xra a + sta _inint + ! + ! Do we need to task switch ? + ! + lda _need_resched + ora a + jnz preemption + ! + ! Switch stacks back + ! + call map_restore + lhld istack_switched_sp + sphl +intout: + xra a + sta U_DATA__U_ININTERRUPT + lda U_DATA__U_INSYS + ora a + jnz interrupt_pop + call deliver_signals + ! + ! Restore registers and done + ! +interrupt_pop: + xra a + sta _int_disabled + pop h + pop d + pop b + INT_EXIT + pop psw + ei + ret + +null_pointer_trap: + mvi a,0xc3 + sta 0 + lxi h,11 +trap_signal: + push h + lhld U_DATA__U_PTAB + push h + call _ssig + pop h + pop h + ret + +! +! Now the scary stuff - preempting +! +preemption: + xra a + sta _need_resched + ! + ! Save our original stack in syscall_s + ! Move to our kernel stack (free because we don't preempt + ! in kernel + ! + lhld istack_switched_sp + shld U_DATA__U_SYSCALL_SP + lxi sp,kstack_top + ! + ! Mark ourselves as in a system call + ! + mvi a,1 + sta U_DATA__U_INSYS + call _chksigs + lhld U_DATA__U_PTAB + mvi a,P_RUNNING + cmp m + jnz not_running + mvi m,P_READY +not_running: + ! + ! We will disappear into this and reappear somewhere else. In + ! time we will reappear here + ! + call _platform_switchout + ! + ! We are back in the land of the living so no longer in + ! syscall or interrupt state + ! + xra a + sta U_DATA__U_ININTERRUPT + sta U_DATA__U_INSYS + ! + ! Get our mapping back + ! + call map_process_always_di + ! + ! And our stack + ! + lhld U_DATA__U_SYSCALL_SP + sphl + lda U_DATA__U_CURSIG + ora a + cnz deliver_signals_2 + jmp interrupt_pop + +! +! Debug code +! +.define outstring + +outstring: + mov a,m + ora a + rz + call outchar + inx h + jmp outstring + +.define outstringhex + +outstringhex: + mov a,m + ora a + rz + call outcharhex + mvi a,0x20 + call outchar + inx h + jmp outstringhex + +.define outnewline + +outnewline: + mvi a,0x0d + call outchar + mvi a,0x0a + jmp outchar + +.define outhl + +outhl: + push psw + mov a,h + call outcharhex + mov a,l + call outcharhex + pop psw + ret + +.define outde + +outde: + push psw + mov a,d + call outcharhex + mov a,e + call outcharhex + pop psw + ret + +.define outbc + +outbc: + push psw + mov a,b + call outcharhex + mov a,c + call outcharhex + pop psw + ret + +.define outcharhex + +outcharhex: + push b + push psw + mov c,a + rar + rar + rar + rar + call outnibble + mov a,c + call outnibble + pop psw + pop b + ret + +outnibble: + ani 0x0f + cpi 10 + jc numeral + adi 7 +numeral: + adi 0x30 ! '0' + jmp outchar + + +.define ___hard_ei + +___hard_ei: + xra a + sta _int_disabled + EI + ret + +.define ___hard_di + +___hard_di: + lxi h, _int_disabled + di + mov a,m + mvi m,1 + mov e,a + ret + +.define ___hard_irqrestore + +___hard_irqrestore: + ldsi 2 + di + ldax d + sta _int_disabled + ora a + rnz + ei + ret + +! +! Identify 8080 variants. We don't worry about Z80 variants. The 8080 +! kernel doesn't work on a Z80 so we don't care which one we have. +! +.define _cpu_detect + +_cpu_detect: + ! Ok start with the flags + mvi a,255 + inr a + push psw + pop h + mov a,l + ani 0x82 + cpi 0x80 + jz lr35902 + ora a + jnz is808x + lxi d,0 ! Z80: we don't care which kind. It's simply not allowed + ret +lr35902: + lxi d,0 ! also not allowed + ret +is808x: + xra a + rim ! no-op on 8080 + ora a + jnz is8085 ! it changed must be an 8085 + ! + ! But it could really be 0 + ! + inr a + rim + ora a + jz is8085 + ! + ! TODO: check for KP580M1 + ! + lxi d,8080 + ! + ! But wait it might be a 9080 + ! + mvi a,255 + ani 255 + push psw + pop h + mov a,l + ani 0x10 ! half carry is zero on AMD + rnz + mvi d,0x90 + ret +is8085: + lxi d,0x8085 + ret + + +! +! We need to worry about bits of this in interrupt save and restore +! + +.define .trapproc, .retadr,.bcreg,.areg,.tmp1 + +.trapproc: .data2 0 +.retadr: .data2 0 +.bcreg: .data2 0 +.areg: .data1 0 +.tmp1: .data2 0 + +! +! Errors from the runtime +! +.define eunimpl,eoddz,ecase,eidivz + +eunimpl: + lxi h,unimp + jmp kerboom +eoddz: + lxi h,ddz + jmp kerboom +ecase: + lxi h,case + jmp kerboom +eidivz: + lxi h,divz +kerboom: + call outstring + call _platform_monitor + +unimp: .asciz 'rt:unimp' +ddz: .asciz 'rt:ddz' +case: .asciz 'rt:case' +divz: .asciz 'rt:div0' diff --git a/Kernel/usermem_std-8085.s b/Kernel/usermem_std-8085.s new file mode 100644 index 00000000..82a89f3b --- /dev/null +++ b/Kernel/usermem_std-8085.s @@ -0,0 +1,236 @@ +#include "kernel-8080.def" + +! +! Simple implementation for now. Should be optimized +! + +.sect .common + +.define __uputc + + +.define __uputc + +__uputc: + ldsi 2 + ldax d + ldsi 4 + lhlx + mov d,a + call map_process_always + mov m,d + lxi d,0 + jmp map_kernel + +.define __uputw + +__uputw: + ldsi 2 + lhlx ! data into HL + push h + ldsi 6 ! 4 but we added a new 2 byte push + lhlx ! address into HL + xchg ! address now DE + pop h ! data now HL + call map_process_always + shlx ! (DE)=HL + lxi d,0 + jmp map_kernel + +.define __ugetc + +__ugetc: + ldsi 2 + lhlx + call map_process_always + mov e,m + mvi d,0 + jmp map_kernel + +.define __ugetw + +__ugetw: + ldsi 2 + lhlx ! address into HL + call map_process_always + xchg ! address now DE + lhlx ! data into HL + xchg ! and into DE + jmp map_kernel + +.define __uget + +! +! Stacked arguments are src.w, dst.w, count.w +! +__uget: + push b + lxi h,9 ! End of count argument + dad sp + mov b,m + dcx h + mov c,m + mov a,c + ora b + jz nowork + dcx h + mov d,m ! Destination + dcx h + mov e,m + dcx h + mov a,m + dcx h + mov l,m + mov h,a + ! + ! So after all that work we have HL=src DE=dst BC=count + ! and we know count != 0. + ! + ! Simple unoptimized copy loop for now. Horribly slow for + ! things like 512 byte disk blocks + ! +ugetcopy: + call map_process_always + mov a,m + call map_kernel + stax d + inx h + inx d + dcx b + mov a,b + ora c + jnz ugetcopy +nowork: + pop b + lxi d,0 + ret + + +.define __uput + +__uput: + push b + lxi h,9 ! End of count argument + dad sp + mov b,m + dcx h + mov c,m + mov a,c + ora b + jz nowork + dcx h + mov d,m ! Destination + dcx h + mov e,m + dcx h + mov a,m + dcx h + mov l,m + mov h,a + ! + ! So after all that work we have HL=src DE=dst BC=count + ! and we know count != 0. + ! + ! Simple unoptimized copy loop for now. Horribly slow for + ! things like 512 byte disk blocks + ! +uputcopy: + mov a,m + call map_process_always + stax d + call map_kernel + inx h + inx d + dcx b + mov a,b + ora c + jnz uputcopy + pop b + lxi d,0 + ret + + +.define __uzero + +__uzero: + push b + ldsi 6 ! length + xchg + mov c,m ! check timing versus lhlx and moves + inx h + mov b,m + ldsi 4 + lhlx ! HL is now pointer + + mov a,b + ora c + jz nowork + + call map_process_always + + xchg ! pointer into DE so we can use SHLX + + lxi h,0 + + mov a,b ! Divide length by 2 + rar + mov b,a + mov a,c + rar + mov c,a + jnc nosingle ! If we have an odd byte zero it + xra a + stax d + inx d +nosingle: ! Divide from words to dwords + mov a,b + rar + mov b,a + mov a,c + rar + mov c,a + jnc nopair + shlx ! Zero the odd word + inx d + inx d +nopair: + mov a,b ! Divide from dwords to 8 bytes + rar + mov b,a + mov a,c + rar + mov c,a + jnc noquad + shlx ! Zero the odd dword + inx d + inx d + shlx + inx d + inx d +noquad: + mov a,b ! Strip any rotated in carry bits + ani 0x1F + mov b,a + ora c ! Check if there is any work left after we did the + ! small bits + jz noquads + dcx b ! NK works for FFFF not 0000 +loop: + shlx ! Finally let rip and zero each 8 byte block + inx d + inx d + shlx + inx d + inx d + shlx + inx d + inx d + shlx + inx d + inx d + dcx b + jnk loop +noquads: + call map_kernel + pop b + ret -- 2.34.1