Use .los4 in lar 4 and .sts4 in sar 4.
authorGeorge Koehler <xkernigh@netscape.net>
Mon, 13 Feb 2017 20:22:00 +0000 (15:22 -0500)
committerGeorge Koehler <xkernigh@netscape.net>
Mon, 13 Feb 2017 20:22:00 +0000 (15:22 -0500)
Our libem had two implementations of loading a block from a stack, one
for lar 4 and one for los 4.  Now lar 4 and los 4 share the code in
.los4.  Likewise, sar 4 and sts 4 share the code in .sts4.

Rename .los to .los4 and .sts to .sts4, because they implement los 4
and sts 4.  Remove the special case for loading or storing 4 bytes,
because we can do it with 1 iteration of the loop.  Remove the lines
to "align size" where the size must already be a multiple of 4.

Fix the upper bound check in .aar4.

Change .aar4, .lar4, .los4, .sar4, .sts4 to pass all operands on the
real stack, except that .los4 and .sts4 take the size in register r3.
Have .aar4 set r3 to the size of the array element.  So lar 4 is just
.aar4 then .los4, and sar 4 is just .aar4 then .sts4.

ncg no longer calls .lar4 and .sar4 in libem, because it inlines the
code; but I keep .lar4 and .sar4 in libem, because mcg references
them.  They might or might not work in mcg.

mach/powerpc/libem/aar4.s
mach/powerpc/libem/build.lua
mach/powerpc/libem/lar4.s
mach/powerpc/libem/los.s [deleted file]
mach/powerpc/libem/los4.s [new file with mode: 0644]
mach/powerpc/libem/sar4.s
mach/powerpc/libem/sts.s [deleted file]
mach/powerpc/libem/sts4.s [new file with mode: 0644]
mach/powerpc/ncg/table

index 5e41550..fc8620d 100644 (file)
@@ -1,14 +1,9 @@
 .sect .text
 
-! Index into a bounds-checked array.
+! Get address of element of bounds-checked array.
 !
-! On entry:
-!    r3 = ptr to descriptor
-!    r4 = index
-!    r5 = address of array
-! Yields:
-!    r3 = address of element
-!    r0 = size of element (used by .lar4, .sar4)
+! Stack: ( array-adr index descriptor-adr -- element-adr )
+! Sets r3 = size of element for .los4, .sts4
 ! Preserves r10 for .lar4, .sar4
 
 .define .aar4
        ori r0, r0, lo16[.trap_earray]
        mtspr ctr, r0            ! load CTR with trap address
 
-       lwz r0, 0(r3)
-       subf. r4, r0, r4         ! adjust range
-       bltctr                   ! check lower bound
+       lwz r4, 0(sp)            ! r4 = address of descriptor
+       lwz r5, 4(sp)            ! r5 = index
+       lwz r6, 8(sp)            ! r6 = address of array
 
-       lwz r0, 4(r3)
-       cmplw r4, r3
-       bgectr                   ! check upper bound
+       lwz r0, 0(r4)
+       subf. r5, r0, r5         ! subtract lower bound from index
+       bltctr                   ! check lower bound
 
-       lwz r0, 8(r3)
-       mullw r4, r4, r0         ! scale index
-       add r3, r4, r5           ! calculate element address
+       lwz r0, 4(r4)
+       cmplw r5, r0
+       bgtctr                   ! check upper bound
 
+       lwz r3, 8(r4)            ! r3 = size of element
+       mullw r5, r5, r3         ! scale index by size
+       add r6, r6, r5
+       stw r6, 8(sp)            ! push address of element
+       addi sp, sp, 8
        blr
index 466a28f..3d1a106 100644 (file)
@@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do
        acklibrary {
                name = "lib_"..plat,
                srcs = {
-                       "./*.s",
+                       "./*.s", -- los4.s, sts4.s
                },
                vars = { plat = plat },
                deps = {
index 2f5c368..27ae5a6 100644 (file)
@@ -2,39 +2,13 @@
 
 ! Load from bounds-checked array.
 !
-! On entry:
-!    r3 = ptr to descriptor
-!    r4 = index
-!    r5 = address of array
+! Stack: ( array-adr index descriptor-adr -- element )
 
 .define .lar4
 .lar4:
        mfspr r10, lr
        bl .aar4
+       ! pass r3 = size from .aar4 to .los4
+       bl .los4
        mtspr lr, r10
-       ! r3 = ptr to element
-       ! r0 = size of element
-
-       cmpwi r0, 1
-       bne 1f
-       ! Load 1 byte.
-       lbz r4, 0(r3)
-       stwu r4, -4(sp)
-       blr
-1:
-       cmpwi r0, 2
-       bne 2f
-       ! Load 2 bytes.
-       lhz r4, 0(r3)
-       stwu r4, -4(sp)
-       blr
-2:
-       ! Load r0 bytes, where r0 must be a positive multiple of 4.
-       subf sp, r0, sp         ! move stack pointer down
-       or r5, r0, r0           ! index r5 = length r0
-3:
-       addic. r5, r5, -4       ! r5 -= 4
-       lwzx r4, r5, r3
-       stwx r4, r5, sp
-       bgt 3b                  ! loop if r5 > 0
        blr
diff --git a/mach/powerpc/libem/los.s b/mach/powerpc/libem/los.s
deleted file mode 100644 (file)
index 2d412bc..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-.sect .text
-
-! Loads a variable-sized structure onto the stack.
-!
-!     r3 = size
-!     r4 = address
-
-.define .los
-.los:
-       ! These sizes are handled specially.
-
-       cmplwi r3, 1
-       ble size1
-
-       cmplwi r3, 2
-       ble size2
-
-       cmplwi r3, 4
-       ble size4
-
-       ! Variable-sized structure.
-
-       addi r3, r3, 3
-       clrrwi r3, r3, 2         ! align size
-
-       add r4, r4, r3           ! adjust address to top of block
-
-       srwi r3, r3, 2           ! convert size to the number of words
-       mtspr ctr, r3
-
-1:
-       lwzu r5, -4(r4)
-       stwu r5, -4(sp)
-       bdnz 1b                  ! decrement CTR, jump if non-zero
-       blr
-
-size1:
-       lbz r3, 0(r4)
-       b 1f
-size2:
-       lhz r3, 0(r4)
-       b 1f
-size4:
-       lwz r3, 0(r4)
-1:
-       stwu r3, -4(sp)
-       blr
diff --git a/mach/powerpc/libem/los4.s b/mach/powerpc/libem/los4.s
new file mode 100644 (file)
index 0000000..9b53d11
--- /dev/null
@@ -0,0 +1,34 @@
+.sect .text
+
+! Loads a variable-sized block onto the stack.
+!
+! On entry: r3 = size
+! Stack: ( address -- block )
+! Preserves r10 for .lar4
+
+.define .los4
+.los4:
+       lwz r4, 0(sp)            ! r4 = address
+
+       ! Sizes 1 and 2 are handled specially.
+       cmplwi r3, 1
+       ble 1f
+       cmplwi r3, 2
+       ble 2f
+
+       ! Else the size must be a multiple of 4.
+       srwi r5, r3, 2
+       mtspr ctr, r5            ! ctr = number of words
+       addi sp, sp, 4
+       add r4, r4, r3           ! adjust address to end of block
+4:     lwzu r5, -4(r4)
+       stwu r5, -4(sp)
+       bdnz 4b                  ! decrement ctr, jump if non-zero
+       blr
+
+1:     lbz r5, 0(r4)
+       stw r5, 0(sp)
+       blr
+2:     lhz r5, 0(r4)
+       stw r5, 0(sp)
+       blr
index 7c97789..d8917ab 100644 (file)
@@ -2,41 +2,13 @@
 
 ! Store to bounds-checked array.
 !
-! On entry:
-!    r3 = ptr to descriptor
-!    r4 = index
-!    r5 = address of array
+! Stack: ( element array-adr index descriptor-adr -- )
 
 .define .sar4
 .sar4:
        mfspr r10, lr
        bl .aar4
+       ! pass r3 = size from .aar4 to .sts4
+       bl .sts4
        mtspr lr, r10
-       ! r3 = ptr to element
-       ! r0 = size of element
-
-       cmpwi r0, 1
-       bne 1f
-       ! Store 1 byte.
-       lwz r4, 0(sp)
-       addi sp, sp, 4
-       stb r4, 0(r3)
-       blr
-1:
-       cmpwi r0, 2
-       bne 2f
-       ! Store 2 bytes.
-       lwz r4, 0(sp)
-       addi sp, sp, 4
-       sth r4, 0(r3)
-       blr
-2:
-       ! Store r0 bytes, where r0 must be a positive multiple of 4.
-       or r5, r0, r0           ! index r5 = length r0
-3:
-       addic. r5, r5, -4       ! r5 -= 4
-       lwzx r4, r5, sp
-       stwx r4, r5, r3
-       bgt 3b                  ! loop if r5 > 0
-       add sp, r0, sp          ! move stack pointer up
        blr
diff --git a/mach/powerpc/libem/sts.s b/mach/powerpc/libem/sts.s
deleted file mode 100644 (file)
index 411b0fb..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-.sect .text
-
-! Stores a variable-sized structure from the stack.
-!
-!     r3 = size
-!     r4 = address
-
-.define .sts
-.sts:
-       ! These sizes are handled specially.
-
-       lwz r5, 0(sp)
-
-       cmplwi r3, 1
-       ble size1
-
-       cmplwi r3, 2
-       ble size2
-
-       cmplwi r3, 4
-       ble size4
-
-       ! Variable-sized structure.
-
-       addi r3, r3, 3
-       clrrwi r3, r3, 2         ! align size
-
-       srwi r3, r3, 2           ! convert size to the number of words
-       mtspr ctr, r3
-
-1:
-       lwz r5, 0(sp)
-       addi sp, sp, 4
-       stw r5, 0(r4)
-       addi r4, r4, 4
-
-       bdnz 1b                  ! decrement CTR, jump if non-zero
-       blr
-
-size1:
-       stb r5, 0(r4)
-       b 1f
-size2:
-       sth r5, 0(r4)
-       b 1f
-size4:
-       stw r5, 0(r4)
-1:
-       addi sp, sp, 4
-       blr
diff --git a/mach/powerpc/libem/sts4.s b/mach/powerpc/libem/sts4.s
new file mode 100644 (file)
index 0000000..82bdc4d
--- /dev/null
@@ -0,0 +1,35 @@
+.sect .text
+
+! Stores a variable-sized block from the stack.
+!
+! On entry: r3 = size
+! Stack: ( block address -- )
+! Preserves r10 for .sar4
+
+.define .sts4
+.sts4:
+       lwz r4, 0(sp)            ! r4 = address
+
+       ! Sizes 1 and 2 are handled specially.
+       cmplwi r3, 1
+       ble 1f
+       cmplwi r3, 2
+       ble 2f
+
+       ! Else the size must be a multiple of 4.
+       srwi r5, r3, 2
+       mtspr ctr, r5            ! ctr = number of words
+       addi r4, r4, -4          ! adjust address to before block
+4:     lwzu r5, 4(sp)
+       stwu r5, 4(r4)
+       bdnz 4b                  ! decrement ctr, jump if non-zero
+       addi sp, sp, 4
+       blr
+
+1:     lwz r5, 4(sp)
+       stb r5, 0(r4)
+       b 3f
+2:     lwz r5, 4(sp)
+       sth r5, 0(r4)
+3:     addi sp, sp, 8
+       blr
index a5ac862..5dda77c 100644 (file)
@@ -1042,13 +1042,13 @@ PATTERNS
        pat loi                            /* Load arbitrary size */
                leaving
                        loc $1
-                       los INT32
+                       los 4
 
-       pat los $1==INT32                  /* Load arbitrary size */
-               with GPR3 GPR4 STACK
+       pat los $1==4                      /* Load arbitrary size */
+               with GPR3 STACK
                        kills ALL
                        gen
-                               bl {LABEL, ".los"}
+                               bl {LABEL, ".los4"}
 
        pat sti $1==INT8                   /* Store byte indirect */
                with REG REG
@@ -1141,13 +1141,13 @@ PATTERNS
        pat sti                            /* Store arbitrary size */
                leaving
                        loc $1
-                       sts INT32
+                       sts 4
 
-       pat sts $1==INT32                  /* Store arbitrary size */
-               with GPR3 GPR4 STACK
+       pat sts $1==4                      /* Store arbitrary size */
+               with GPR3 STACK
                        kills ALL
                        gen
-                               bl {LABEL, ".sts"}
+                               bl {LABEL, ".sts4"}
 
 
 /* Arithmetic wrappers */
@@ -1459,40 +1459,40 @@ PATTERNS
                        yields %a
 
 
-
 /* Arrays */
 
-       pat aar $1==INT32                  /* Index array */
-               with GPR3 GPR4 GPR5
+       pat aar $1==4                      /* Address of array element */
+               leaving
+                       cal ".aar4"
+
+       pat lar $1==4                      /* Load from array */
+               with STACK
                        kills ALL
                        gen
                                bl {LABEL, ".aar4"}
-                       yields R3
+                               /* pass r3 = size from .aar4 to .los4 */
+                               bl {LABEL, ".los4"}
 
-       pat lae lar $2==INT32 && nicesize(rom($1, 3)) /* Load array */
+       pat lae lar $2==4 && nicesize(rom($1, 3))
                leaving
                        lae $1
-                       aar INT32
+                       aar 4
                        loi rom($1, 3)
 
-       pat lar $1==INT32                  /* Load array */
-               with GPR3 GPR4 GPR5 STACK
+       pat sar $1==4                      /* Store to array */
+               with STACK
                        kills ALL
                        gen
-                               bl {LABEL, ".lar4"}
+                               bl {LABEL, ".aar4"}
+                               /* pass r3 = size from .aar4 to .sts4 */
+                               bl {LABEL, ".sts4"}
 
-       pat lae sar $2==INT32 && nicesize(rom($1, 3)) /* Store array */
+       pat lae sar $2==4 && nicesize(rom($1, 3))
                leaving
                        lae $1
-                       aar INT32
+                       aar 4
                        sti rom($1, 3)
 
-       pat sar $1==INT32                  /* Store array */
-               with GPR3 GPR4 GPR5 STACK
-                       kills ALL
-                       gen
-                               bl {LABEL, ".sar4"}
-
 
 /* Sets */