In /vm_asm.py implement imm.iXX and imm.fXX constant optimization using cvt.XX
authorNick Downing <nick@ndcode.org>
Mon, 18 Dec 2023 10:16:31 +0000 (21:16 +1100)
committerNick Downing <nick@ndcode.org>
Mon, 18 Dec 2023 11:03:17 +0000 (22:03 +1100)
.gitignore
Makefile
f128_limits.c [moved from quadmath.c with 88% similarity]
fxx_limits.c [new file with mode: 0644]
vm_asm.py

index f3ef142..e1f947f 100644 (file)
@@ -4,9 +4,10 @@
 *.xml
 __pycache__
 /element.py
+/f128_limits
+/fxx_limits
 /lex_yy.py
 /pretty_print.py
-/quadmath
 /t_def.py
 /vm
 /y_tab.py
index 5695af2..6214a09 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,15 @@
 CFLAGS=-Wall
 
-all: element.py lex_yy.py t_def.py y_tab.py pretty_print.py vm vm_test.bin quadmath
+all: \
+element.py \
+lex_yy.py \
+t_def.py \
+y_tab.py \
+pretty_print.py \
+vm \
+vm_test.bin \
+fxx_limits \
+f128_limits
 
 element.py:
        pitree --install-element
@@ -25,13 +34,17 @@ vm: vm.o
        ${CC} -o $@ $<
 
 vm_test.bin: vm_test.asm
-       ./vm_asm.py -l vm_test.lst -o vm_test.bin vm_test.asm
+       ./vm_asm.py -l vm_test_128.lst -o $@ $^
 
-quadmath.o: quadmath.c
-quadmath: quadmath.o
+fxx_limits.o: fxx_limits.c
+fxx_limits: fxx_limits.o
+       ${CC} -o $@ $<
+
+f128_limits.o: f128_limits.c
+f128_limits: f128_limits.o
        ${CC} -o $@ $< -lquadmath
 
 clean:
        rm -f \
 element.py lex_yy.py t_def.py y_tab.py pretty_print.py \
-vm quadmath *.o *.bin
+vm fxx_limits f128_limits *.o *.bin
similarity index 88%
rename from quadmath.c
rename to f128_limits.c
index 33365fb..2995fb7 100644 (file)
@@ -51,5 +51,10 @@ int main ()
   if ((size_t) n < sizeof buf)
     printf ("%s\n", buf);
 
+  uint64_t d[2] = {0xffffffffffffffffUL, 0x7ffeffffffffffffUL}; // largest representable nunmber
+  n = quadmath_snprintf (buf, sizeof buf, "%.36Qe", *(__float128 *)d);
+  if ((size_t) n < sizeof buf)
+    printf ("%s\n", buf);
+
   return 0;
 }
diff --git a/fxx_limits.c b/fxx_limits.c
new file mode 100644 (file)
index 0000000..bc2a1f1
--- /dev/null
@@ -0,0 +1,15 @@
+#include <stdio.h>
+
+int main(void) {
+  int a = 0xfeffffff;
+  int b = 0x7effffff;
+  int c = 0x00000001;
+  printf("%.8e %.8e %.8e\n", *(float *)&a, *(float *)&b, *(float *)&c);
+
+  long d = 0xfeffffffffffffffL;
+  long e = 0x7effffffffffffffL;
+  long f = 0x0000000000000001L;
+  printf("%.16e %.16e %.16e\n", *(double *)&d, *(double *)&e, *(double *)&f);
+
+  return 0;
+}
index 0de9eaa..d4551c5 100755 (executable)
--- a/vm_asm.py
+++ b/vm_asm.py
@@ -195,48 +195,97 @@ mnemonics = {
   'trap':              (0x8a, OPERAND_TYPE_NONE),
 }
 
-# these opcodes can be automatically generated by an OPERAND_TYPE_AUTO
+# for optimizing immediates
+OP_IMM_I8 = 0x00
+OP_IMM_F32 = 0x05
+OP_CVT_I8 = 0x08
+OP_CVT_F32 = 0x12
+
+# these opcodes can be automatically generated for optimizations
 op_to_mnemonic = {
+  0x00: 'imm.i8',
+  0x01: 'imm.i16',
+  0x02: 'imm.i32',
+  0x03: 'imm.i64',
+  0x04: 'imm.i128',
+  0x05: 'imm.f32',
+  0x06: 'imm.f64',
+  0x07: 'imm.f128',
+
+  0x08: 'cvt.i8',
+  0x09: 'cvt.u.i8',
+  0x0a: 'cvt.i16',
+  0x0b: 'cvt.u.i16',
+  0x0c: 'cvt.i32',
+  0x0d: 'cvt.u.i32',
+  0x0e: 'cvt.i64',
+  0x0f: 'cvt.u.i64',
+  0x10: 'cvt.i128',
+  0x11: 'cvt.u.i128',
+  0x12: 'cvt.f32',
+  0x13: 'cvt.u.f32',
+  0x14: 'cvt.f64',
+  0x15: 'cvt.u.f64',
+  0x16: 'cvt.f128',
+  0x17: 'cvt.u.f128',
+
   0x23: 'offpc.i8',
   0x24: 'offpc.i16',
   0x25: 'offpc.i32',
+
   0x26: 'adjpc.i8',
   0x27: 'adjpc.i16',
   0x28: 'adjpc.i32',
+
   0x2b: 'offsp.i8',
   0x2c: 'offsp.i16',
   0x2d: 'offsp.i32',
+
   0x2e: 'adjsp.i8',
   0x2f: 'adjsp.i16',
   0x30: 'adjsp.i32',
+
   0x33: 'offix.i8',
   0x34: 'offix.i16',
   0x35: 'offix.i32',
+
   0x36: 'adjix.i8',
   0x37: 'adjix.i16',
   0x38: 'adjix.i32',
+
   0x3b: 'offiy.i8',
   0x3c: 'offiy.i16',
   0x3d: 'offiy.i32',
+
   0x3e: 'adjiy.i8',
   0x3f: 'adjiy.i16',
   0x40: 'adjiy.i32',
+
   0x7f: 'adjpc.f.i8',
   0x80: 'adjpc.f.i16',
   0x81: 'adjpc.f.i32',
+
   0x82: 'adjpc.t.i8',
   0x83: 'adjpc.t.i16',
   0x84: 'adjpc.t.i32',
+
   0x86: 'call.i8',
   0x87: 'call.i16',
   0x88: 'call.i32',
 }
 
-# for auto operands
-size_to_operand_type = {
+size_to_operand_type_i = {
   1: OPERAND_TYPE_I8,
   2: OPERAND_TYPE_I16,
   4: OPERAND_TYPE_I32,
+  8: OPERAND_TYPE_I64,
+  0x10: OPERAND_TYPE_I128,
+}
+
+size_to_operand_type_f = {
+  4: OPERAND_TYPE_F32,
+  8: OPERAND_TYPE_F64,
+  0x10: OPERAND_TYPE_F128,
 }
 
 operand_type_to_size = {
@@ -250,6 +299,24 @@ operand_type_to_size = {
   OPERAND_TYPE_F128: 0x10,
 }
 
+F32_CONTEXT = gmpy2.ieee(32)
+F32_MIN = gmpy2.mpfr('-1.70141173e+38', context = F32_CONTEXT)
+F32_MAX = gmpy2.mpfr('1.70141173e+38', context = F32_CONTEXT)
+
+F64_CONTEXT = gmpy2.ieee(64)
+F64_MIN = gmpy2.mpfr('-5.4861240687936881e+303', context = F64_CONTEXT)
+F64_MAX = gmpy2.mpfr('5.4861240687936881e+303', context = F64_CONTEXT)
+
+F128_CONTEXT = gmpy2.ieee(128)
+F128_MIN = gmpy2.mpfr(
+  '-1.189731495357231765085759326628007016e+4932',
+  context = F128_CONTEXT
+)
+F128_MAX = gmpy2.mpfr(
+  '1.189731495357231765085759326628007016e+4932',
+  context = F128_CONTEXT
+)
+
 list_file = None
 out_file = None
 while True:
@@ -392,11 +459,14 @@ while True:
             operand_mpfr = None
             operand_int = 0
             if operand_type == OPERAND_TYPE_F32:
-              operand_mpfr = gmpy2.mpfr(operand_str, context = gmpy2.ieee(32))
+              operand_mpfr = gmpy2.mpfr(operand_str, context = F32_CONTEXT)
+              assert operand_mpfr >= F32_MIN and operand_mpfr <= F32_MAX
             elif operand_type == OPERAND_TYPE_F64:
-              operand_mpfr = gmpy2.mpfr(operand_str, context = gmpy2.ieee(64))
+              operand_mpfr = gmpy2.mpfr(operand_str, context = F64_CONTEXT)
+              assert operand_mpfr >= F64_MIN and operand_mpfr <= F64_MAX
             elif operand_type == OPERAND_TYPE_F128:
-              operand_mpfr = gmpy2.mpfr(operand_str, context = gmpy2.ieee(128))
+              operand_mpfr = gmpy2.mpfr(operand_str, context = F128_CONTEXT)
+              assert operand_mpfr >= F128_MIN and operand_mpfr <= F128_MAX
             else:
               pc_rel = False
               operand_str2 = operand_str
@@ -454,7 +524,7 @@ while True:
                   autos[auto] = size
                 auto += 1
 
-                operand_type = size_to_operand_type[size]
+                operand_type = size_to_operand_type_i[size]
                 op += operand_type - OPERAND_TYPE_I8
                 mnemonic = op_to_mnemonic[op]
 
@@ -468,9 +538,112 @@ while True:
                   next_pc += size
                   operand_int -= next_pc
 
-            if op == -1: # .type pseudo-op
+            cvt_op = -1
+            if op < OP_IMM_I8: # .type pseudo-op
+              # we will not write an op, only the operand
+              # therefore, put the mnemonic on the same line as the operand
               operand_str = f'{mnemonic:s} {operand_str:s}'
             else:
+              # we will write an op, then the operand and possible conversion
+              if op < OP_IMM_F32 : # imm.iXX op
+                size = operand_type_to_size[operand_type]
+                mask = (1 << (size << 3)) - 1
+                offset = 1 << ((size << 3) - 1)
+                operand_int = ((operand_int + offset) & mask) - offset
+                operand_int_u = operand_int & mask
+                if operand_int >= -0x80 and operand_int < 0x80:
+                  new_size = 1
+                  unsigned = False
+                elif operand_int_u < 0x100:
+                  new_size = 1
+                  unsigned = True
+                elif operand_int >= -0x8000 and operand_int < 0x8000:
+                  new_size = 2
+                  unsigned = False
+                elif operand_int_u < 0x10000:
+                  new_size = 2
+                  unsigned = True
+                elif operand_int >= -0x80000000 and operand_int < 0x80000000:
+                  new_size = 4
+                  unsigned = False
+                elif operand_int_u < 0x100000000:
+                  new_size = 4
+                  unsigned = True
+                elif (
+                  operand_int >= -0x8000000000000000 and
+                  operand_int < 0x8000000000000000
+                ):
+                  new_size = 8
+                  unsigned = False
+                elif operand_int_u < 0x10000000000000000:
+                  new_size = 8
+                  unsigned = True
+                else:
+                  new_size = 0x10
+                  unsigned = False # won't require conversion, so ignored
+                assert new_size <= size
+
+                if _pass == 0:
+                  autos.append(new_size)
+                elif new_size < autos[auto]:
+                  # auto immediates cannot get smaller
+                  # this prevents infinite passes due to alignment requirements
+                  new_size = autos[auto]
+                elif new_size != autos[auto]:
+                  modified = True
+                  fuzz += new_size - autos[auto]
+                  autos[auto] = new_size
+                auto += 1
+
+                if new_size < size:
+                  cvt_op = OP_CVT_I8 + int(unsigned) + (
+                    operand_type - OPERAND_TYPE_I8
+                  ) * 2
+                  operand_type = size_to_operand_type_i[new_size]
+                  op = OP_IMM_I8 + operand_type - OPERAND_TYPE_I8
+                  mnemonic = op_to_mnemonic[op]
+              elif op < OP_CVT_I8: # imm.fXX op
+                size = operand_type_to_size[operand_type]
+                if (
+                  operand_mpfr >= F32_MIN and
+                  operand_mpfr <= F32_MAX and
+                  struct.unpack(
+                    '<f',
+                    struct.pack('<f', float(operand_mpfr))
+                  )[0] == operand_mpfr
+                ):
+                  new_size = 4
+                elif (
+                  operand_mpfr >= F64_MIN and
+                  operand_mpfr <= F64_MAX and
+                  struct.unpack(
+                    '<d',
+                    struct.pack('<d', float(operand_mpfr))
+                  )[0] == operand_mpfr
+                ):
+                  new_size = 8
+                else:
+                  new_size = 16
+                assert new_size <= size
+
+                if _pass == 0:
+                  autos.append(new_size)
+                elif new_size < autos[auto]:
+                  # auto immediates cannot get smaller
+                  # this prevents infinite passes due to alignment requirements
+                  new_size = autos[auto]
+                elif new_size != autos[auto]:
+                  modified = True
+                  fuzz += new_size - autos[auto]
+                  autos[auto] = new_size
+                auto += 1
+
+                if new_size < size:
+                  cvt_op = OP_CVT_F32 + (operand_type - OPERAND_TYPE_F32) * 2
+                  operand_type = size_to_operand_type_f[new_size]
+                  op = OP_IMM_F32 + operand_type - OPERAND_TYPE_F32
+                  mnemonic = op_to_mnemonic[op]
+
               if flist is not None:
                 flist.write(f'{pc:08x} {op:02x}\t\t\t\t\t{mnemonic:s}\n')
               if fout is not None:
@@ -533,7 +706,7 @@ while True:
               pc += 0x10
             elif operand_type == OPERAND_TYPE_F32:
               data = struct.pack('<f', float(operand_mpfr))
-              operand_int = struct.unpack('<I', data)
+              operand_int, = struct.unpack('<I', data)
               if flist is not None:
                 flist.write(f'{pc:08x} {operand_int:08x}\t\t\t\t{operand_str:s}\n')
               if fout is not None:
@@ -541,7 +714,7 @@ while True:
               pc += 4
             elif operand_type == OPERAND_TYPE_F64:
               data = struct.pack('<d', float(operand_mpfr))
-              operand_int = struct.unpack('<Q', data)
+              operand_int, = struct.unpack('<Q', data)
               if flist is not None:
                 flist.write(f'{pc:08x} {operand_int:016x}\t\t\t{operand_str:s}\n')
               if fout is not None:
@@ -552,7 +725,8 @@ while True:
               exponent, mantissa = gmpy2.frexp(
                 gmpy2.set_sign(operand_mpfr, False)
               )
-              assert exponent >= -111 - 0x3ffe and exponent < 0x8000 - 0x3ffe
+              # note: exponent value 0x7fff is reserved for NaNs, infinities
+              assert exponent >= -111 - 0x3ffe and exponent < 0x7fff - 0x3ffe
               mantissa = int(mantissa * (1 << 113))
               if mantissa == 0:
                 assert exponent == 0
@@ -579,6 +753,14 @@ while True:
             else:
               assert False
 
+            if cvt_op >= 0:
+              mnemonic = op_to_mnemonic[cvt_op]
+              if flist is not None:
+                flist.write(f'{pc:08x} {cvt_op:02x}\t\t\t\t\t{mnemonic:s}\n')
+              if fout is not None:
+                fout.write(bytes([cvt_op]))
+              pc += 1
+
   if flist is not None:
     flist.close()
   if fout is not None: