From 36765fc8ae1dc3fae66d8e17a02d48343bff52a9 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 2 Oct 2017 00:48:17 +0100 Subject: [PATCH] 65c816: assorted low level fixes With these fixed we get to a shell prompt and can run some commands. To get further is going to need cc65 modifications as cc65 generates the following code in /bin/ls STA $08 3 STX $09 3 JMP $0008 5 which of course goes castors up on the 65C816 set up because DP is not B:0000 nor can it be. So far this seems to be the only problem case and it looks as if generating a PHX 3 PHA 3 RTS 6 would be within one clock for 65C02 or later. 6502 compatible code will need an uglier fixup I fear. STA $08 3 TXA 2 PHA 3 LDA $08 3 PHA 3 RTS 6 or similar which isn't pretty but works (self modifying code isn't on because we might re-enter mid execution) --- Kernel/lib/65c816.s | 59 +++++++++++++---------- Kernel/lowlevel-65c816.s | 95 +++++++++++++++++++++++++++++++++---- Kernel/usermem_std-65c816.s | 25 ++++++---- 3 files changed, 137 insertions(+), 42 deletions(-) diff --git a/Kernel/lib/65c816.s b/Kernel/lib/65c816.s index fcfef3b6..4ee07213 100644 --- a/Kernel/lib/65c816.s +++ b/Kernel/lib/65c816.s @@ -79,7 +79,7 @@ slow_path: .i8 .a8 lda U_DATA__U_PAGE - sta switch_patch_1+2 ; target bank of save + sta switch_patch_1+1 ; target bank of save rep #$30 .i16 .a16 @@ -100,37 +100,39 @@ switch_patch_1: ; FIXME: add swap support ; _switchin: + ; not in theory needed + sep #$30 + .i8 + .a8 + sei sta ptr1 stx ptr1+1 - sep #$30 - .i8 - .a8 ldy #P_TAB__P_PAGE_OFFSET lda (ptr1),y ; bank of target ; If this is zero we need swapping so the swapper checks go here ; FIXME - sta switch_patch_2+1 ; source bank of retrieve + sta switch_patch_2+2 ; source bank of retrieve rep #$30 .i16 .a16 ; Set our stack pointer. We must not use it until the mvn ; is completed - ldx U_DATA__U_SP ; correct stack pointer - txs ldx #U_DATA_STASH - ldx #U_DATA + ldy #U_DATA lda #U_DATA__TOTALSIZE-1 switch_patch_2: ; FIXME check syntax required for bank value ?? - mvn 0,KERNEL_FAR + mvn KERNEL_FAR,0 ; after the MVN our data bank is KERNEL_DATA ; Our stack is now valid and we may use it again, our UDATA ; is for the new process + ldx U_DATA__U_SP ; correct stack pointer + txs ldx U_DATA__U_PTAB cpx ptr1 bne switchinfail ; wrong process !! @@ -142,7 +144,7 @@ switch_patch_2: ; This will only be needed once we swap, and we will need to ; do a few other fixups too lda P_TAB__P_PAGE_OFFSET,x - sta U_DATA__U_PAGE,x + sta U_DATA__U_PAGE plx ; stacked kernel space C sp stx sp sep #$10 @@ -151,10 +153,13 @@ switch_patch_2: beq notisr cli ; interrupts back on notisr: - plx ; return code - pla + pla ; return code + plx rts switchinfail: + sep #$30 + .a8 + .i8 lda ptr1+1 jsr outcharhex lda ptr1 @@ -176,19 +181,19 @@ _dofork: stx ptr1+1 lda U_DATA__U_PAGE sta fork_patch+2 ; source bank (parent) + sta fork_patch_2+1 ; destination udata stash asl a adc #STACK_BANKOFF - sta tmp1+1 ; source for S and DP - stz tmp1 + sta ptr2+1 ; source for S and DP + stz ptr2 ldy #P_TAB__P_PAGE_OFFSET lda (ptr1),y sta fork_patch+1 ; destination bank (child) - sta fork_patch_2+1 asl a adc #STACK_BANKOFF ; find our S and DP banks as ; those need copying too - sta tmp2+1 ; dest for S and DP - stz tmp2 + sta ptr3+1 ; dest for S and DP + stz ptr3 rep #$20 .a16 @@ -203,7 +208,7 @@ _dofork: tsx stx U_DATA__U_SP ; Stack pointer in udata - ; Our context is now a valid child stack frame so we can save stuff + ; Our context is now a valid stack frame so we can save stuff ldx #0 txy lda #MAP_SIZE ; 64K - udata shadow @@ -217,8 +222,8 @@ fork_patch_2: mvn KERNEL_FAR,0 plb ; back to kernel bank - ldx tmp1 - ldy tmp2 + ldx ptr2 + ldy ptr3 lda #$01FF ; DP and stack mvn 0,0 @@ -227,16 +232,22 @@ fork_patch_2: ; versus parent so we restore it correctly ; + sep #$30 + .a8 + .i8 lda U_DATA__U_SYSCALL_SP+1 - clc - adc tmp1 sec - sbc tmp2 + sbc ptr2+1 + clc + adc ptr3+1 sta U_DATA__U_SYSCALL_SP+1 + rep #$10 + .i16 + ; At this point we have copied the parent into the child bank ; and copied the current uarea into the child uarea - plx ; discard frame we build for child + plx ; discard frame we build for parent plx sep #$30 ; back to 8bit mode for C diff --git a/Kernel/lowlevel-65c816.s b/Kernel/lowlevel-65c816.s index 1c134280..8a10c9dd 100644 --- a/Kernel/lowlevel-65c816.s +++ b/Kernel/lowlevel-65c816.s @@ -327,8 +327,8 @@ signal_out: ; registers and return directly to the start of the user process ; _doexec: - sta tmp1 - stx tmp1+1 ; address to execute from + sta ptr1 + stx ptr1+1 ; address to execute from sei stz _kernel_flag @@ -336,7 +336,7 @@ _doexec: .i16 .a16 - ldx tmp1 ; target address + ldx ptr1 ; target address sep #$20 .a8 @@ -403,6 +403,8 @@ _doexec: ; caller already saved the CPU state. Interrupts are off at this point ; as it we got here via a trap. ; +; FIXME save the right registers to return correctly if caught +; shoot_myself: sep #$30 .a8 @@ -450,6 +452,8 @@ interrupt_handler: pha phx phy + phb + phd cld ; no funnies with decimal ; Now switch our data and DP to the kernel ones @@ -495,12 +499,11 @@ join_interrupt_path: lda _kernel_flag beq ret_to_user - lda #KERNEL_BANK - pha - plb - ldx #KERNEL_DP - phx + ; Kernel interrupt path may change B and D itself so we must + ; preserve them + pld + plb rep #$20 .a16 @@ -516,6 +519,13 @@ join_interrupt_path: ret_to_user: ; TODO .. pre-emption + ; Discard saved B and D - for user we will compute the correct + ; one (we could optimize this a shade and only throw on a + ; pre-empt FIXME) + + plx + pla + ; Signal return path ; The basic idea here is that if a signal is pending we @@ -855,3 +865,72 @@ syscall_vector: jsl KERNEL_FAR+syscall_entry rts +; +; Relocation stub (as it's easier to do relocation when in the user +; bank than bouncing around and we have no common!). Called with i16 +; and X holding the binary start, tmp1 the shift and Y the code to +; process +; +; Returns with X pointing to end zero (so we can run it twice to do +; ZP). We wipe the data as we go since it will become BSS. +; +; Our relocation table lives in BSS start and is a byte table in the +; format +; 0,0 end +; 0,n skip n bytes and read next table entry +; 1-255 skip 1-255 bytes and relocate the byte now pointed at +; then read next table entry +; +; Our first table is high bytes of 16bit addresses to relocate (we +; keep page alignment). Our second is ZP addresses to relocate. +; +; Call with interrupts off until sure our irq code will get odd +; code banks right. +; +; FIXME: move out of stubs - run with code = normal dp = kernel +; b = user and it'll work better +; +; FIXME: consider checking if run off end with either X or Y +; +relocate: + .i16 + .a8 + stz tmp2+1 ; FIXME +reloc_loop: + lda 0,x + stz 0,x + beq multi + sta tmp2 + ; skip 1-255 bytes and then relocate one byte + rep #$20 + .a16 + tya + adc tmp2 + tay + sep #$20 + .a8 + ; now relocate + lda 0,y + clc + adc tmp1 + sta 0,y + bra reloc_loop +multi: + .i16 + .a8 + inx + lda 0,x + stz 0,x + beq endofreloc ; (0,0 -> end) + sta tmp2 + rep #$20 + .a16 + tya + adc tmp2 + tay + sep #$20 + .a8 + bra reloc_loop +endofreloc: + rtl ; as called from bank KERNEL + diff --git a/Kernel/usermem_std-65c816.s b/Kernel/usermem_std-65c816.s index 103745b1..65a1405c 100644 --- a/Kernel/usermem_std-65c816.s +++ b/Kernel/usermem_std-65c816.s @@ -14,8 +14,7 @@ ; ; ptr1 and tmp1 are reserved for map_* functions in 6502 but -; are actually free here. We keep the convention however in case -; of future changes +; are actually free here. ; .code @@ -24,23 +23,23 @@ ; Compiler glue is not pretty - might be worth having some optimized ; 16bit aware stack handlers ; -__uget: sta tmp2 - stx tmp2+1 ; save the count +__uget: sta ptr1 + stx ptr1+1 ; save the count jsr popax ; pop the destination sta ptr2 ; (ptr2) is our target stx ptr2+1 - jsr popax ; (ptr2) is our source + jsr popax ; (ptr3) is our source sta ptr3 stx ptr3+1 lda U_DATA__U_PAGE - sta ugetpatch+1 + sta ugetpatch+2 phb .i16 .a16 rep #$30 ldx ptr3 ; source ldy ptr2 ; destination - lda tmp2 + lda ptr1 beq ug_nomov ; 0 means 64K! dec ; need 1 less than size ugetpatch: @@ -145,8 +144,8 @@ __ugetw: __uput: - sta tmp2 - stx tmp2+1 + sta ptr1 + stx ptr1+1 jsr popax ; dest sta ptr2 stx ptr2+1 @@ -161,7 +160,7 @@ __uput: rep #$30 ldx ptr3 ; source ldy ptr2 ; destination - lda tmp2 + lda ptr1 beq up_nomov ; 0 means 64K! dec ; need 1 less than size uputpatch: @@ -180,11 +179,15 @@ __uputc: stx ptr2+1 jsr popax phb + pha lda U_DATA__U_PAGE pha plb + pla sta (ptr2) plb + lda #0 + tax rts __uputw: @@ -202,6 +205,8 @@ __uputw: ldy #1 sta (ptr2),y plb + lda #0 + tax rts __uzero: -- 2.34.1