From: Alan Cox <alan@linux.intel.com>
Date: Sat, 14 Jul 2018 00:30:37 +0000 (+0100)
Subject: trs80m1: remove double buffering from buffers, speed up user copies
X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=cc6f3193819dd86c06aa1c802c75cf254096fa87;p=FUZIX.git

trs80m1: remove double buffering from buffers, speed up user copies
---

diff --git a/Kernel/platform-trs80m1/buffers.c b/Kernel/platform-trs80m1/buffers.c
index dc2f6721..25c7e415 100644
--- a/Kernel/platform-trs80m1/buffers.c
+++ b/Kernel/platform-trs80m1/buffers.c
@@ -17,20 +17,19 @@ void blkfromk(void *kaddr, struct blkbuf *buf, uint16_t off, uint16_t len)
 }
 
 /*
- *	Slow crap implementation purely for testing. We need to go direct
- *	between bank 2 and user
+ *	This works because our uput and uget (see trs80-bank.s) switch
+ *	to kernel logical bank 2 when copying, as kernel bank 1 is only
+ *	code so it knows that any copy must be to common or bank 2. Without
+ *	that this would need a double buffer.
  */
-static uint8_t scratch2[512];
 void blktou(void *uaddr, struct blkbuf *buf, uint16_t off, uint16_t len)
 {
-    __builtin_memcpy(scratch2, buf->__bf_data + off, len);
-    uput(scratch2, uaddr, len);
+    uput(buf->__bf_data + off, uaddr, len);
 }
 
 void blkfromu(void *uaddr, struct blkbuf *buf, uint16_t off, uint16_t len)
 {
-    uget(uaddr, scratch2, len);
-    __builtin_memcpy(buf->__bf_data + off, scratch2, len);
+    uget(uaddr, buf->__bf_data + off , len);
 }
 
 static uint8_t scratchbuf[64];
diff --git a/Kernel/platform-trs80m1/fuzix.lnk b/Kernel/platform-trs80m1/fuzix.lnk
index 0c4157dc..56474c15 100644
--- a/Kernel/platform-trs80m1/fuzix.lnk
+++ b/Kernel/platform-trs80m1/fuzix.lnk
@@ -15,7 +15,6 @@ start.rel
 version.rel
 lowlevel-z80-banked.rel
 usermem.rel
-usermem_std-z80-banked.rel
 platform-trs80m1/tricks.rel
 platform-trs80m1/main.rel
 timer.rel
diff --git a/Kernel/platform-trs80m1/trs80-bank.s b/Kernel/platform-trs80m1/trs80-bank.s
index 542615b0..09b2010d 100644
--- a/Kernel/platform-trs80m1/trs80-bank.s
+++ b/Kernel/platform-trs80m1/trs80-bank.s
@@ -25,6 +25,8 @@
             .globl _procmem
 	    .globl vtbufinit
 	    .globl _trs80_mapper
+	    .globl ___hard_di
+	    .globl ___hard_irqrestore
 
 	    .globl bankpatch1
 	    .globl bankpatch2
@@ -32,6 +34,15 @@
 	    .globl s__COMMONMEM
 	    .globl l__COMMONMEM
 
+
+	    .globl __uget
+            .globl __ugetc
+	    .globl __ugetw
+            .globl __uput
+            .globl __uputc
+            .globl __uputw
+            .globl __uzero
+
             .include "kernel.def"
             .include "../kernel.def"
 
@@ -147,6 +158,8 @@ mapper_selector:
 	    ld a,#0x1f
 	    ld (bankpatch1 + 1),a
 	    ld (bankpatch2 + 1),a
+	    ld (bankpatch3 + 1),a
+	    ld (bankpatch4 + 1),a
 	    ret
 
 ;------------------------------------------------------------------------------
@@ -393,3 +406,184 @@ stub_ret:
 	push bc
 	ret			; and ret - can't use jp (ix) or jp (hl) here
 callhl:	jp (hl)
+
+;
+;	Fast inter-bank copier. Copies from kernel common, data or banked
+;	data (logical bank2) to/from userspace
+;
+	.area _COMMONMEM
+
+;
+;	We could use sp and make this faster still but it gets ugly and
+;	complicated. Might be worth doing pairs of bytes though.
+;
+b2bcopy:
+	ld a, b
+	ld (patch0 + 1), a	; source bank
+	ld a, c
+	ld (patch1 + 1), a	; destination bank
+	exx
+b2b_loop:
+patch0:
+	ld a,#0
+bankpatch3:
+	out (0x43),a
+	ld a,(hl)
+	inc hl
+	ex af,af'
+patch1:
+	ld a,#0
+bankpatch4:
+	out (0x43),a
+	ex af,af'
+	ld (de),a
+	inc de
+	djnz b2b_loop
+	dec c
+	jr nz, b2b_loop
+	ret
+
+uputget:
+        ; load DE with the byte count
+        ld e, 10(ix) ; byte count
+        ld d, 11(ix)
+	ld a, d
+	or e
+	ret z		; no work
+	dec de		; we return BC as a count for two 8bit loops
+	ld b, e		; not a 16bit value
+	inc b		; See http://map.grauw.nl/articles/fast_loops.php
+	inc d
+	ld c, d
+        ; load HL with the source address
+        ld l, 6(ix) ; src address
+        ld h, 7(ix)
+        ; load DE with destination address (in userspace)
+        ld e, 8(ix)
+        ld d, 9(ix)
+	ld a, b
+	or c
+	ret
+
+__uput:
+	push ix
+	ld ix,#0
+	add ix,sp
+	call uputget
+	jr z, uput_out
+	exx
+	; Disable interrupts, returns old state in HL
+	; Use helper as NMOS Z80 has bugs in this area
+	call ___hard_di
+	; Our banks
+	ld a, (U_DATA__U_PAGE)
+	ld c, a
+	ld a, (map_bank2 + 1)	; kernel is source
+	ld b, a
+ucopier:
+	; Save the mapping
+	ld de, (map_reg)	; save old mapping
+	push hl
+	push de
+	call b2bcopy
+	pop bc			; port and value
+	out (c), b
+	call ___hard_irqrestore
+	pop hl
+uput_out:
+	pop ix
+	ld hl,#0
+	ret
+
+__uget:
+	push ix
+	ld ix, #0
+	add ix, sp
+	call uputget
+	jr z, uput_out
+	exx
+	call ___hard_di
+	ld a, (U_DATA__U_PAGE)
+	ld b, a
+	ld a, (map_bank2 + 1)	; kernel is destination
+	ld c, a
+	jr ucopier
+
+__uputc:
+	pop iy	;	bank
+	pop bc	;	return
+	pop de	;	char
+	pop hl	;	dest
+	push hl
+	push de
+	push bc
+	push iy
+	call map_process_save
+	ld (hl), e
+uputc_out:
+	jp map_kernel_restore			; map the kernel back below common
+
+__uputw:
+	pop iy
+	pop bc	;	return
+	pop de	;	word
+	pop hl	;	dest
+	push hl
+	push de
+	push bc
+	push iy
+	call map_process_save
+	ld (hl), e
+	inc hl
+	ld (hl), d
+	jp map_kernel_restore
+
+__ugetc:
+	pop de
+	pop bc	; return
+	pop hl	; address
+	push hl
+	push bc
+	push de
+	call map_process_save
+        ld l, (hl)
+	ld h, #0
+	jp map_kernel_restore
+
+__ugetw:
+	pop de
+	pop bc	; return
+	pop hl	; address
+	push hl
+	push bc
+	push de
+	call map_process_save
+        ld a, (hl)
+	inc hl
+	ld h, (hl)
+	ld l, a
+	jp map_kernel_restore
+
+__uzero:
+	pop iy
+	pop de	; return
+	pop hl	; address
+	pop bc	; size
+	push bc
+	push hl
+	push de
+	push iy
+	ld a, b	; check for 0 copy
+	or c
+	ret z
+	call map_process_save
+	ld (hl), #0
+	dec bc
+	ld a, b
+	or c
+	jp z, uputc_out
+	ld e, l
+	ld d, h
+	inc de
+	ldir
+	jp uputc_out