--- /dev/null
+/*
+ * This module provides bank support for the WDC65C816
+ *
+ * The low 64K is assumed to contain the I/O space, some RAM used
+ * for stacks and DP areas. We don't make any direct attempt to use the
+ * rest of the low 64K, although on a box with lots of low RAM you could
+ * put the kernel there too.
+ *
+ * Banks 1...n hold processes (each has 64K) or the kernel. We could but
+ * don't support split I/D (different code/data page) at this point.
+ *
+ * Beyond that it's a normal banked platform. The oddities are that we
+ * have no common, and that we must also swap the ZP/Stack separately
+ * from bank 0, as well as the program bank.
+ *
+ * Define MAP_SIZE for the space in the 64K available to the process,
+ * this needs to be no more than 63.5K (udata and kstack copies...)
+ */
+
+#include <kernel.h>
+#include <kdata.h>
+#include <printf.h>
+
+#ifdef CONFIG_BANK_65C812
+
+uint16_t pzero[NPROC];
+/* Kernel is 0, apps 1,2,3 etc */
+static unsigned char pfree[MAX_MAPS];
+static unsigned char pfptr = 0;
+static unsigned char pfmax;
+
+void pagemap_add(uint8_t page)
+{
+ pfree[pfptr++] = page;
+ pfmax = pfptr;
+}
+
+void pagemap_free(ptptr p)
+{
+ if (p->p_page == 0)
+ panic(PANIC_FREE0);
+ pfree[pfptr++] = p->p_page;
+}
+
+int pagemap_alloc(ptptr p)
+{
+#ifdef SWAPDEV
+ if (pfptr == 0) {
+ swapneeded(p, 1);
+ }
+#endif
+ if (pfptr == 0)
+ return ENOMEM;
+ p->p_page = pfree[--pfptr];
+ return 0;
+}
+
+/* Realloc is trivial - we can't do anything useful */
+int pagemap_realloc(usize_t size)
+{
+ if (size > MAP_SIZE)
+ return ENOMEM;
+ return 0;
+}
+
+usize_t pagemap_mem_used(void)
+{
+ return (pfmax - pfptr) * (MAP_SIZE >> 10);
+}
+
+#ifdef SWAPDEV
+/*
+ * Swap out the memory of a process to make room
+ * for something else
+ *
+ * FIXME: we can write out base - p_top, then the udata providing
+ * we also modify our read logic here as well
+ */
+int swapout(ptptr p)
+{
+ uint16_t page = p->p_page;
+ uint16_t blk;
+ uint16_t map;
+
+ if (!page)
+ panic(PANIC_ALREADYSWAP);
+#ifdef DEBUG
+ kprintf("Swapping out %x (%d)\n", p, p->p_page);
+#endif
+ /* Are we out of swap ? */
+ map = swapmap_alloc();
+ if (map == 0)
+ return ENOMEM;
+ blk = map * SWAP_SIZE;
+ /* Write the kstack and zero page to disk */
+ swapwrite(SWAPDEV, blk, 512, pzero[p-ptab], 0);
+ /* Write the app (and possibly the uarea etc..) to disk */
+ swapwrite(SWAPDEV, blk, SWAPTOP - SWAPBASE, SWAPBASE, p->p_page);
+ pagemap_free(p);
+ p->p_page = 0;
+ p->p_page2 = map;
+#ifdef DEBUG
+ kprintf("%x: swapout done %d\n", p, p->p_page);
+#endif
+ return 0;
+}
+
+/*
+ * Swap ourself in: must be on the swap stack when we do this
+ */
+void swapin(ptptr p, uint16_t map)
+{
+ uint16_t blk = map * SWAP_SIZE;
+
+#ifdef DEBUG
+ kprintf("Swapin %x, %d\n", p, p->p_page);
+#endif
+ if (!p->p_page) {
+ kprintf("%x: nopage!\n", p);
+ return;
+ }
+ /* Read the kstack and zero page from disk */
+ swapread(SWAPDEV, blk, 512, pzero[p-ptab], 0);
+ /* Read the process back in */
+ swapread(SWAPDEV, blk, SWAPTOP - SWAPBASE, SWAPBASE, p->p_page);
+#ifdef DEBUG
+ kprintf("%x: swapin done %d\n", p, p->p_page);
+#endif
+}
+
+#endif
+
+#endif
--- /dev/null
+/*
+ * We treat the 65c816 as a glorified 6502.
+ * Processes are executed in 65c816 mode with their own 64K bank set by
+ * the bank registers. The CPU stack is 256 bytes in low 64K, and the
+ * direct page likewise.
+ *
+ * Because cc65 stores temporaries and return addresses on the CPU stack,
+ * and uses ZP for register variables (who in C you cannot take the
+ * address of) this works fine for CC65 apps and the kernel likewise only
+ * has to worry about 16bit pointers except for user copies and asm bits.
+ */
+
+typedef unsigned long uint32_t;
+typedef signed long int32_t;
+typedef unsigned short uint16_t;
+typedef signed short int16_t;
+typedef unsigned char uint8_t;
+typedef signed char int8_t;
+typedef unsigned int size_t;
+typedef signed int ssize_t;
+
+typedef uint8_t irqflags_t;
+
+typedef int16_t arg_t;
+typedef uint16_t uarg_t; /* Holds arguments */
+typedef uint16_t usize_t; /* Largest value passed by userspace */
+typedef int16_t susize_t;
+typedef uint16_t uaddr_t;
+typedef uint16_t uptr_t; /* User pointer equivalent */
+
+#define uputp uputw /* Copy user pointer type */
+#define ugetp ugetw /* between user and kernel */
+#define uputi uputw /* Copy user int type */
+#define ugeti ugetw /* between user and kernel */
+
+extern void * __fastcall__ memcpy(void *, void *, size_t);
+extern void * __fastcall__ memset(void *, int, size_t);
+extern size_t __fastcall__ strlen(const char *);
+
+#define EMAGIC 0x4C /* Header of executable (JMP) */
+#define EMAGIC_2 0x38 /* SEC BCS foo */
+/* We use SEC BCS not CLC BCC because CLC is 0x18 which is the Z80 JR header
+ so the two would be identical - not good! */
+
+
+/* High byte is saved, low byte is a mystery so take worst case. Also allow
+ a bit less as C stack is not return stack */
+#define brk_limit() ((((uint16_t)udata.u_syscall_sp) | 0xFF) - 384)
+
+#define staticfast static
+
+/* User's structure for times() system call */
+typedef unsigned long clock_t;
+
+typedef struct {
+ uint32_t low;
+ uint32_t high;
+} time_t;
+
+typedef union { /* this structure is endian dependent */
+ clock_t full; /* 32-bit count of ticks since boot */
+ struct {
+ uint16_t low; /* 16-bit count of ticks since boot */
+ uint16_t high;
+ } h;
+} ticks_t;
+
+/* Sane behaviour for unused parameters */
+#define used(x)
+
+/* No support for inline */
+#define inline
+
+/* FIXME: should swap a/b inline ??? */
+#define ntohs(x) ((((x) & 0xFF) << 8) | (((x) & 0xFF00) >> 8))
--- /dev/null
+fuzix.bin: target $(OBJS)
+ +make -C platform-$(TARGET) image
--- /dev/null
+export CROSS_AS=ca65
+export CROSS_LD=cl65
+export CROSS_CC=cl65
+export CROSS_CCOPTS=-c -O -t none -I$(ROOT_DIR)/cpu-65c816 -I$(ROOT_DIR)/cpu-6502 -I$(ROOT_DIR)/platform-$(TARGET) -I$(ROOT_DIR)/include
+#
+# The 6502 compiler produces what is mostly threadcode and is quite determined
+# that the runtime lives in the code segment. As we want the runtime in common
+# memory we use SEG1/SEG2 names for all the kernel code.
+#
+export CROSS_CC_SEG1=--code-name SEG1
+export CROSS_CC_SEG2=--code-name SEG2
+# 6502 we need a real SEG3 to make it fit
+export CROSS_CC_SEG3=--code-name SEG3
+export CROSS_CC_SYS1=--code-name SYS1
+export CROSS_CC_SYS2=--code-name SYS2
+export CROSS_CC_SYS3=--code-name SYS3
+export CROSS_CC_SYS4=--code-name SYS4
+export CROSS_CC_SYS5=--code-name SYS5
+export CROSS_CC_VIDEO=--code-name SEG3
+export CROSS_CC_SEGDISC=--code-name DISCARD --rodata-name DISCARDDATA
+export ASMEXT = .s
+export BINEXT = .o
+export BITS=16
--- /dev/null
+; Keep these in sync with struct u_data!!
+U_DATA__U_PTAB .set (U_DATA+0) ; struct p_tab*
+U_DATA__U_PAGE .set (U_DATA+2) ; uint16_t
+U_DATA__U_PAGE2 .set (U_DATA+4) ; uint16_t
+U_DATA__U_INSYS .set (U_DATA+6) ; bool
+U_DATA__U_CALLNO .set (U_DATA+7) ; uint8_t
+U_DATA__U_SYSCALL_SP .set (U_DATA+8) ; void *
+U_DATA__U_RETVAL .set (U_DATA+10) ; int16_t
+U_DATA__U_ERROR .set (U_DATA+12) ; int16_t
+U_DATA__U_SP .set (U_DATA+14) ; void *
+U_DATA__U_ININTERRUPT .set (U_DATA+16) ; bool
+U_DATA__U_CURSIG .set (U_DATA+17) ; int8_t
+U_DATA__U_ARGN .set (U_DATA+18) ; uint16_t
+U_DATA__U_ARGN1 .set (U_DATA+20) ; uint16_t
+U_DATA__U_ARGN2 .set (U_DATA+22) ; uint16_t
+U_DATA__U_ARGN3 .set (U_DATA+24) ; uint16_t
+U_DATA__U_ISP .set (U_DATA+26) ; void * (initial stack pointer when _exec()ing)
+U_DATA__U_TOP .set (U_DATA+28) ; uint16_t
+U_DATA__U_BREAK .set (U_DATA+30) ; uint16_t
+U_DATA__U_SIGVEC .set (U_DATA+32) ; table of function pointers (void *)
+
+; Keep these in sync with struct p_tab!!
+P_TAB__P_STATUS_OFFSET .set 0
+P_TAB__P_TTY_OFFSET .set 1
+P_TAB__P_PID_OFFSET .set 2
+P_TAB__P_PAGE_OFFSET .set 14
+
+P_RUNNING .set 1 ; value from include/kernel.h
+P_READY .set 2 ; value from include/kernel.h
+
+OS_BANK .set 0 ; value from include/kernel.h
+
+EAGAIN .set 11 ; value from include/kernel.h
--- /dev/null
+
+
+ .P816
+ .I8
+ .A8
+
+ .export unix_syscall_entry
+ .export _doexec
+ .export interrupt_handler
+ .export nmi_handler
+
+ .export outstring
+ .export outstringhex
+ .export outnewline
+ .export outcharhex
+ .export outxa
+ .export stash_zp
+
+ .export _need_resched
+
+ .import outchar
+ .import _kernel_flag
+ .import _unix_syscall_i
+ .import map_restore
+ .import map_save
+ .import map_process_always
+ .import map_kernel
+ .import _platform_interrupt_i
+ .import platform_doexec
+ .import _inint
+ .import CTemp
+ .import _trap_monitor
+
+ .include "platform/zeropage.inc"
+ .include "platform/kernel.def"
+ .include "kernel816.def"
+
+ .segment "COMMONMEM"
+;
+; Unlike Z80 we need to deal with systems that have no overlapping
+; memory banks. We pass the arguments is a single pointer therefore
+; we expect the platform code to have copied the syscall arguments into
+; udata then called us it also saves any registers etc for us (as it will
+; need them too)
+;
+; Called with interrupts off, on the kernel stack
+; On completion U_DATA__U_ERROR an U_DATA__U_RETVAL hold the returns
+;
+; Caller is expected to set 65C816 to I8A8
+;
+; FIXME: do we want 6502 binaries syscall or to implement a cleaner
+; 65c816 brk based syscall ?
+;
+syscall_entry:
+ php
+ sei
+ cld
+ sep #$30
+ .i16
+ lda #KERNEL_BANK
+ pha
+ plb
+ rep #$30
+ stx U_DATA__U_CALLNO
+ cpy #0
+ beq noargs
+
+ FIXME: inter bank move the arguments
+
+noargs:
+ rep #$10
+ .i16
+ ldx sp
+ stx U_DATA__U_SYSCALL_SP
+ tsx
+ stx U_DATA__U_PAGE+2 ; ewww.. FIXME
+ ; FIXME: kstack actually depends on process number so needs to be
+ ; a look up
+ ldx #kstack_top
+ stx sp
+ cli
+
+ sep #$10
+ .i8
+ lda #1
+ sta _kernel_flag ; In kernel mode
+ cli ; Interrupts now ok
+ jsr _unix_syscall_i ; Enter C space via the __interrupt wrapper
+ sei ; Interrupts back off
+ stz _kernel_flag
+ rep #$10
+ .i16
+ ldx U_DATA__U_PAGE+2
+ txs
+ ldx U_DATA__U_SYSCALL_SP
+ stx sp
+ .i8
+ sep #$10
+ lda U_DATA__U_CURSIG
+ bne signal_out
+ plp
+ ; We may now be in decimal !
+ ldy U_DATA__U_RETVAL
+ ldx U_DATA__U_RETVAL+1
+ ; also sets z for us
+ lda U_DATA__U_ERROR
+ rts
+signal_out:
+ tay
+ clz U_DATA__U_CURSIG
+ rep #$10
+ .i16
+ tsx
+ dex ; move past existing frame
+ dex
+ dex
+ dex ; FIXME check is 4 bytes
+ txs
+ setp #$10
+ .i8
+ ; Stack the signal return (the signal itself can cause syscalls)
+ lda U_DATA__U_ERROR
+ pha
+ lda U_DATA__U_RETVAL
+ pha
+ lda U_DATA__U_RETVAL+1
+ pha
+ lda #>sigret ; needs to be a fixed address in user
+ pha
+ lda #<sigret ; FIXME
+ pha
+ phy
+ asl a
+ tay
+ rep #$10
+ .i16
+ ldx U_DATA__U_SIGVEC,y
+ clz U_DATA__U_SIGVEC,y
+ phx
+ lda U_DATA__U_PAGE
+ pha
+ ldx #PROGLOAD+20
+ phx
+ sep #$10
+ .i8
+ rtl ; return into user app
+
+;
+; On 6502 the platform code is responsible for invoking the
+; signal dispatch (as it may have to be in the stub in the
+; process space if we have no common)
+;
+
+
+;
+; doexec is a special case syscall exit path. As we may have no
+; common we have to hand the last bits off to the platform code
+; x,a holds the target address. This routine is in common and is the
+; one case we can and do want to have fastcall.
+;
+_doexec:
+ ; FIXME set up U_DATA__U_PAGE+2 here by lookup
+ sei
+ stz _kernel_flag
+ ; where to save address ... ? (or is it always bank:0 anyway ?)
+ rep #$30
+ .i16
+ .a16
+ lda U_DATA__U_PAGE+2 ; CPU stack
+ tcs ; Set CPU stack at xxFF
+ ina ; Zp follows
+ xba ; now in the form 00xx as we need
+ pha
+ pld
+ ; We are now on the correct DP and CPU stack
+ ldy U_DATA__U_ISP
+ sty sp ; sp is in DP so we write user version
+ sep #$10
+ .i8
+ .a8
+ lda U_DATA__U_PAGE ; bank
+ pha
+ lda #0 ; should be passed address but will
+ pha ; I think always be zero !
+ pha
+ cli
+ tax ; bank is 0
+ tay ; ZP base is 0
+ rtl
+
+;
+; The C world here is fairly ugly. We have to stash various bits of
+; zero page magic because its not re-entrant.
+;
+interrupt_handler:
+ rep #$30
+ pha
+ phx
+ phy
+ sep #$30 ; 8i8a
+ cld
+
+ ; FIXME: rewrite this in 16i mode
+ ; Save our ZP in case we are in kernel mode and using it
+ ; (we could saacrifice 256 bytes to IRQ handling instead which
+ ; might be smarter FIXME)
+ jsr stash_zp ; side effect saves sp
+
+ .i16
+ rep #$10
+ tsx
+ stx istack_switched_sp
+ ldx istack
+ txs
+ ldx #istack
+ stx sp ; C stack is now right
+
+ sep #$10
+
+ .i8
+
+ lda #1
+ sta _inint
+ jsr _platform_interrupt_i ; call via C int wrapper
+ stz _inint
+ jsr map_process_always ; may have switched task
+ jmp int_switch
+int_switch:
+ stz _inint
+
+ ; Restore the stack we arrived on
+
+ .i16
+ rep #$10
+ ldx istack_sitched_sp
+ txs
+ sep #$10
+ .i8
+ jsr_stash_zp
+ ; TODO .. pre-emption
+
+ ; Signal return path
+ ; The basic idea here is that if a signal is pending we
+ ; build a new stack frame under the real one and rti to that. The
+ ; hook code in low user memory will then clean up the real frame
+ lda U_DATA__U_CURSIG
+ clz U_DATA__U_CURSIG
+ bne signal_exit
+ rep #$30
+ ply
+ plx
+ pla
+ rti
+
+signal_exit:
+ tay ; save signal 8bits
+
+
+ ; Move down the stack frame
+ ; FIXME: we need to mangle the frame to take out the page
+ ; so we can match syscall (or modify syscall!)
+ .i16
+ rep #$10
+ tsx
+ dex ; 7 bites FIXME check
+ dex
+ dex
+ dex
+ dex
+ dex
+ dex
+ txs
+ ldx #irqout
+ phx ; return vector
+ sep #$10
+ .i8
+ phy ; signal code
+ tya
+ asl a
+ tay
+ rep #$30
+ .i16
+ .a16
+ lda U_DATA__U_SIGVEC,y
+ clz U_DATA__U_SIGVEC,y
+ pha
+ lda #PROGLOAD+20
+ pha
+ sep #$30
+ .i8
+ .a8
+ lda U_DATA__U_PAGE
+ pha
+ lda #$30 ; i8a8
+ pha
+ rti
+;
+; We can make the map routines generic as all 65c816 are the same
+; mapping model. We don't change anything but instead track the page
+; we need to use for the'far' operations.
+;
+map_process_always:
+ lda U_DATA__U_PAGE
+ sta userpage
+ rts
+map_process:
+ cmp #0
+ bne map_process_2
+ cmpx #0
+ bne map_process_2
+map_kernel:
+ rts
+map_process_2:
+ sta ptr1
+ stx ptr1+1
+ lda (ptr1) ; 4 bytes if needed
+ sta userpage
+ rts
+userpage:
+ .byte 0
+
+;
+; The following is taken from the debugger example as referenced in
+; the compiler documentation. We swap a stashed ZP in our commondata
+; with an IRQ handler one. The commondata is per process and we depend
+; upon this to make it all work
+;
+; Swap the C temporaries
+;
+stash_zp:
+ ldy #zpsavespace-1
+Swap1: ldx CTemp,y
+ lda <sp,y
+ sta CTemp,y
+ txa
+ sta sp,y
+ dey
+ bpl Swap1
+ rts
+
+nmi_handler:
+ ldx #>nmi_trap
+ lda #<nmi_trap
+ jsr outstring
+nmi_stop:
+ jmp _trap_monitor
+nmi_trap:
+ .byte "NMI!", 0
+
+outstring:
+ sta ptr1
+ stx ptr1+1
+ ldy #0
+outstringl:
+ lda (ptr1),y
+ cmp #0
+ beq outdone1
+ jsr outchar
+ iny
+ jmp outstringl
+
+outstringhex: ; string in X,A
+ sta ptr1
+ stx ptr1+1
+ ldy #0
+outstringhexl:
+ lda (ptr1),y
+ cmp #0
+ beq outdone1
+ jsr outcharhex
+ iny
+ jmp outstringhexl
+
+outnewline:
+ pha
+ lda #10
+ jsr outchar
+ lda #10
+ jsr outchar
+ pla
+outdone1:
+ rts
+
+outcharhex:
+ pha
+ and #$f0
+ lsr a
+ lsr a
+ lsr a
+ lsr a
+ cmp #10
+ bcc deci1
+ clc
+ adc #7
+deci1:
+ clc
+ adc #48 ; ascii zero
+ jsr outchar
+ pla
+ and #$0f
+ cmp #10
+ bcc deci2
+ clc
+ adc #7
+deci2:
+ clc
+ adc #48
+ jmp outchar
+
+outxa: pha
+ txa
+ jsr outcharhex
+ pla
+ jmp outcharhex
+
+_need_resched:
+ .byte 0
--- /dev/null
+ .include "platform/kernel.def"
+ .include "kernel02.def"
+ .include "platform/zeropage.inc"
+
+ .export __uget, __ugetc, __ugetw, __ugets
+ .export __uput, __uputc, __uputw, __uzero
+
+ .import map_kernel, map_process_always
+ .import outxa, popax
+ .importzp ptr2, tmp2
+;
+; 65c816 specific usermem access functions. These should use and know we
+; are using the processor bank register but are not yet optimized for
+; this.
+;
+; ptr1 and tmp1 are reserved for map_* functions
+;
+;
+ .segment "COMMONMEM"
+
+; user, dst, count(count in ax)
+;
+; Decidedly unoptimised (even the 6502 could manage a word a switch)
+;
+__uget: sta tmp2
+ stx tmp2+1 ; save the count
+ jsr popax ; pop the destination
+ sta ptr2 ; (ptr2) is our target
+ stx ptr2+1
+ jsr popax ; (ptr2) is our source
+ sta ptr3
+ stx ptr3+1
+
+ ldy #0 ; counter
+
+ ldx tmp2+1 ; how many 256 byte blocks
+ beq __uget_tail ; if none skip to the tail
+
+__uget_blk:
+ jsr map_process_always ; map the user process in
+ lda (ptr3), y ; get a byte of user data
+ jsr map_kernel ; map the kernel back in
+ sta (ptr2), y ; save it to the kernel buffer
+ iny ; move on one
+ bne __uget_blk ; not finished a block ?
+ inc ptr2+1 ; move src ptr 256 bytes on
+ inc ptr3+1 ; move dst ptr the same
+ dex ; one less block to do
+ bne __uget_blk ; out of blocks ?
+
+__uget_tail: cpy tmp2 ; finished ?
+ beq __uget_done
+
+ jsr map_process_always ; map the user process
+ lda (ptr3),y ; get a byte of user data
+ jsr map_kernel ; map the kernel back in
+ sta (ptr2),y ; save it to the kernel buffer
+ iny ; move on
+ bne __uget_tail ; always taken (y will be non zero)
+
+__uget_done:
+ lda #0
+ tax
+ rts
+
+__ugets: sta tmp2
+ stx tmp2+1 ; save the count
+ jsr popax ; pop the destination
+ sta ptr2 ; (ptr2) is our target
+ stx ptr2+1
+ jsr popax ; (ptr2) is our source
+ sta ptr3
+ stx ptr3+1
+
+ ldy #0 ; counter
+
+ ldx tmp2+1 ; how many 256 byte blocks
+ beq __uget_tail ; if none skip to the tail
+
+__ugets_blk:
+ jsr map_process_always ; map the user process in
+ lda (ptr3), y ; get a byte of user data
+ beq __ugets_end
+ jsr map_kernel ; map the kernel back in
+ sta (ptr2), y ; save it to the kernel buffer
+ iny ; move on one
+ bne __ugets_blk ; not finished a block ?
+ inc ptr3+1 ; move src ptr 256 bytes on
+ inc ptr2+1 ; move dst ptr the same
+ dex ; one less block to do
+ bne __ugets_blk ; out of blocks ?
+
+__ugets_tail: cpy tmp2 ; finished ?
+ beq __ugets_bad
+
+ jsr map_process_always ; map the user process
+ lda (ptr3),y ; get a byte of user data
+ beq __ugets_end
+ jsr map_kernel ; map the kernel back in
+ sta (ptr2),y ; save it to the kernel buffer
+ iny ; move on
+ bne __ugets_tail ; always taken (y will be non zero)
+
+__ugets_bad:
+ dey
+ lda #0
+ sta (ptr2), y ; terminate kernel buffer
+ lda #$FF ; string too large
+ tax ; return $FFFF
+ rts
+
+__ugets_end:
+ jsr map_kernel
+ lda #0
+ sta (ptr2), y
+ tax
+ rts
+
+__ugetc: sta ptr2
+ stx ptr2+1
+__uget_ptr2:
+ jsr map_process_always
+ ldy #0
+ lda (ptr2),y
+ jmp map_kernel
+
+__ugetw: sta ptr2
+ stx ptr2+1
+ jsr map_process_always
+ ldy #1
+ lda (ptr2),y
+ tax
+ dey
+ lda (ptr2),y
+ jmp map_kernel
+
+
+__uput: sta tmp2
+ stx tmp2+1
+ jsr popax ; dest
+ sta ptr2
+ stx ptr2+1
+ jsr popax ; source
+ sta ptr3
+ stx ptr3+1
+
+ ldy #0
+
+ ldx tmp2+1
+ beq __uput_tail
+__uput_blk:
+ jsr map_kernel
+ lda (ptr3), y
+ jsr map_process_always
+ sta (ptr2), y
+ iny
+ bne __uput_blk
+ inc ptr2+1
+ inc ptr3+1
+ dex
+ bne __uput_blk
+
+__uput_tail: cpy tmp2
+ beq __uput_done
+ jsr map_kernel
+ lda (ptr3),y
+ jsr map_process_always
+ sta (ptr2),y
+ iny
+ bne __uput_tail
+
+__uput_done:
+ jsr map_kernel
+ lda #0
+ tax
+ rts
+
+__uputc: sta ptr2
+ stx ptr2+1
+ jsr map_process_always
+ jsr popax
+ ldy #0
+ sta (ptr2),y
+ jmp map_kernel
+
+__uputw: sta ptr2
+ stx ptr2+1
+ jsr map_process_always
+ jsr popax
+ ldy #0
+ sta (ptr2),y
+ txa
+ iny
+ sta (ptr2),y
+ jmp map_kernel
+
+__uzero: sta tmp2
+ stx tmp2+1
+ jsr map_process_always
+ jsr popax ; ax is now the usermode address
+ sta ptr2
+ stx ptr2+1
+
+ ldy #0
+ tya
+
+ ldx tmp2+1 ; more than 256 bytes
+ beq __uzero_tail ; no - just do dribbles
+__uzero_blk:
+ sta (ptr2),y
+ iny
+ bne __uzero_blk
+ inc ptr2+1 ; next 256 bytes
+ dex ; are we done with whole blocks ?
+ bne __uzero_blk
+
+__uzero_tail:
+ cpy tmp2
+ beq __uzero_done
+ sta (ptr2),y
+ iny
+ bne __uzero_tail
+__uzero_done: jmp map_kernel
+
+