From 3cc9bad5f94e08ade54ffc18ab521b8c343ffae4 Mon Sep 17 00:00:00 2001
From: Andrea Palmate <andrea.palmate@gmail.com>
Date: Fri, 28 Jul 2023 17:49:03 +0200
Subject: [PATCH] More SPE changes and tests..

---
 GNUmakefile.os4           |  5 +-
 README.md                 |  2 +-
 library/include/fenv.h    | 28 +++++------
 library/include/math.h    |  8 ++--
 library/math/e_sqrt.c     |  4 ++
 library/math/e_sqrtf.c    | 98 ++++++++++++++++++++++++++++++++++++++-
 library/math/finite.c     | 21 ++++++++-
 library/math/init_exit.c  |  2 +-
 library/math/s_isfinite.c |  1 +
 library/math/s_nanf.c     |  2 +-
 library/math/s_signbit.c  | 39 ----------------
 library/stdio/vfprintf.c  |  9 +++-
 12 files changed, 153 insertions(+), 66 deletions(-)

diff --git a/GNUmakefile.os4 b/GNUmakefile.os4
index c0bdf460..d5e14f63 100644
--- a/GNUmakefile.os4
+++ b/GNUmakefile.os4
@@ -119,9 +119,10 @@ AFLAGS     := -Wa,-mregnames -mstrict-align
 ifdef SPE
     CC := ppc-amigaos-gcc-6.4.0
     AS := ppc-amigaos-as-6.4.0
-    CFLAGS := $(CFLAGS) -D__SPE__ -mspe -mcpu=8540 -mabi=spe -mfloat-gprs=double -ffloat-store -fno-fast-math -fno-inline-functions -fno-partial-inlining \
+    CMATH := -mfpu=sp_full
+    CFLAGS := $(CFLAGS) -D__SPE__ -mspe -mcpu=8540 -mabi=spe -mfloat-gprs=double $(CMATH) -fno-inline-functions -fno-partial-inlining \
                 -fno-align-functions -fno-align-jumps -fno-align-loops -fno-align-labels -fno-inline-small-functions -fno-indirect-inlining -Wno-overflow -Wno-unused-but-set-variable -Wno-uninitialized #-Wdouble-promotion
-    AFLAGS := $(AFLAGS) -mvrsave -D__SPE__ -mspe -mcpu=8540 -ffloat-store -mfloat-gprs=double -fno-fast-math -Wno-overflow
+    AFLAGS := $(AFLAGS) -mvrsave -D__SPE__ -mspe -mcpu=8540 -mfloat-gprs=double $(CMATH) -Wno-overflow
 endif
 
 VERBOSE    ?= @
diff --git a/README.md b/README.md
index a5c51dec..b0a4fdfd 100755
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-A C runtime library for AmigaOS4
+ C runtime library for AmigaOS4
 
 [![Build Status](https://travis-ci.com/afxgroup/clib2.svg?branch=master)](https://travis-ci.org/afxgroup/clib2)
 [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause)
diff --git a/library/include/fenv.h b/library/include/fenv.h
index 7a4c8676..3ebe466e 100755
--- a/library/include/fenv.h
+++ b/library/include/fenv.h
@@ -8,6 +8,7 @@
 #include <stdint.h>
 #include <features.h>
 #include <endian.h>
+#include <stdbool.h>
 
 #ifndef	__fenv_static
 #define	__fenv_static	static
@@ -63,20 +64,16 @@ extern const fenv_t __fe_dfl_env;
 #define _ENABLE_MASK    ((FE_DIVBYZERO | FE_INEXACT | FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW) >> _FPUSW_SHIFT)
 
 #ifndef _SOFT_FLOAT
-#ifdef __SPE__
-#define	__mffs(__env) \
-	__asm __volatile("mfspr %0, 512" : "=r" ((__env)->__bits.__reg))
-#define	__mtfsf(__env) \
-	__asm __volatile("mtspr 512,%0;isync" :: "r" ((__env).__bits.__reg))
-#else
-#define	__mffs(__env) \
-	__asm __volatile("mffs %0" : "=f" ((__env)->__d))
-#define	__mtfsf(__env) \
-	__asm __volatile("mtfsf 255,%0" :: "f" ((__env).__d))
-#endif
+    #ifdef __SPE__
+        #define	__mffs(__env) __asm __volatile("mfspr %0, 512" : "=r" ((__env)->__bits.__reg))
+        #define	__mtfsf(__env) __asm __volatile("mtspr 512,%0;isync" :: "r" ((__env).__bits.__reg))
+    #else
+        #define	__mffs(__env) __asm __volatile("mffs %0" : "=f" ((__env)->__d))
+        #define	__mtfsf(__env) __asm __volatile("mtfsf 255,%0" :: "f" ((__env).__d))
+    #endif
 #else
-#define	__mffs(__env)
-#define	__mtfsf(__env)
+    #define	__mffs(__env)
+    #define	__mtfsf(__env)
 #endif
 
 union __fpscr {
@@ -92,6 +89,11 @@ union __fpscr {
     } __bits;
 };
 
+struct rm_ctx {
+    fenv_t env;
+    bool updated_status;
+};
+
 __fenv_static inline int
 feclearexcept(int __excepts)
 {
diff --git a/library/include/math.h b/library/include/math.h
index 3326bded..b7628027 100755
--- a/library/include/math.h
+++ b/library/include/math.h
@@ -166,10 +166,10 @@ extern int *__signgam(void);
 #define    isnan(x) __isnan(x)
 #endif
 
-extern int finitef (float);
-extern int finitel (long double);
-extern int isinff (float);
-extern int isnanf (float);
+#define finitef(x) __isfinite_float(x)
+#define finitel(x) __isfinite_long_double(x)
+#define isinff(x)  __isinff(x)
+#define isnanf(x)  __isnanf(x)
 
 extern float acosf(float x);
 extern float asinf(float x);
diff --git a/library/math/e_sqrt.c b/library/math/e_sqrt.c
index 8de59b66..9e88ebcd 100644
--- a/library/math/e_sqrt.c
+++ b/library/math/e_sqrt.c
@@ -11,6 +11,9 @@ static	const float	one	= 1.0, tiny = 1.0e-30;
 double
 __ieee754_sqrt(double x) {
     double z;
+#ifndef __SPE__
+    asm ("fsqrt %0,%1\n" :"=f" (z):"f" (x));
+#else
     int32_t sign = (int) 0x80000000;
     int32_t ix0, s0, q, m, t, i;
     uint32_t r, t1, s1, ix1, q1;
@@ -107,5 +110,6 @@ __ieee754_sqrt(double x) {
     if ((q & 1) == 1) ix1 |= sign;
     ix0 += (m << 20);
     INSERT_WORDS(z, ix0, ix1);
+#endif
     return z;
 }
diff --git a/library/math/e_sqrtf.c b/library/math/e_sqrtf.c
index f5a1e94d..c5d281c3 100644
--- a/library/math/e_sqrtf.c
+++ b/library/math/e_sqrtf.c
@@ -6,11 +6,30 @@
 #include "math_headers.h"
 #endif /* _MATH_HEADERS_H */
 
+#ifdef __SPE__
+/* CN = 1+2**27 = '41a0000002000000' IEEE double format.  Use it to split a
+   double for better accuracy.  */
+#define  CN   134217729.0
+#define  EMULV(x, y, z, zz)          \
+    ({  __typeof__ (x) __p, hx, tx, hy, ty;          \
+        __p = CN * (x);  hx = ((x) - __p) + __p;  tx = (x) - hx; \
+        __p = CN * (y);  hy = ((y) - __p) + __p;  ty = (y) - hy; \
+        z = (x) * (y); zz = (((hx * hy - z) + hx * ty) + tx * hy) + tx * ty; \
+    })
+
+
+#include "root.tbl"
+#else
 static const float one = 1.0, tiny = 1.0e-30;
+#endif
 
 float
 __ieee754_sqrtf(float x) {
     float z;
+#ifndef __SPE__
+    asm ("fsqrts %0,%1\n" :"=f" (z):"f" (x));
+#else
+#if 0
     int32_t sign = (int) 0x80000000;
     int32_t ix, s, q, m, t, i;
     uint32_t r;
@@ -24,7 +43,8 @@ __ieee754_sqrtf(float x) {
     }
     /* take care of zero */
     if (ix <= 0) {
-        if ((ix & (~sign)) == 0) return x;/* sqrt(+-0) = +-0 */
+        if ((ix & (~sign)) == 0)
+            return x;/* sqrt(+-0) = +-0 */
         else if (ix < 0)
             return (x - x) / (x - x);        /* sqrt(-ve) = sNaN */
     }
@@ -70,5 +90,81 @@ __ieee754_sqrtf(float x) {
     ix = (q >> 1) + 0x3f000000;
     ix += (m << 23);
     SET_FLOAT_WORD(z, ix);
+#else
+    /* Use generic implementation.  */
+    static const double
+            rt0 = 9.99999999859990725855365213134618E-01,
+            rt1 = 4.99999999495955425917856814202739E-01,
+            rt2 = 3.75017500867345182581453026130850E-01,
+            rt3 = 3.12523626554518656309172508769531E-01;
+    static const double big = 134217728.0;
+    double y, t, del, res, res1, hy, z1, zz, s;
+    mynumber a, c = {{0, 0}};
+    int4 k;
+
+    a.x = x;
+    k = a.i[HIGH_HALF];
+    a.i[HIGH_HALF] = (k & 0x001fffff) | 0x3fe00000;
+    t = inroot[(k & 0x001fffff) >> 14];
+    s = a.x;
+    /*----------------- 2^-1022  <= | x |< 2^1024  -----------------*/
+    if (k > 0x000fffff && k < 0x7ff00000) {
+        int rm = fegetround();
+        fenv_t env;
+        feholdexcept(&env);
+        fesetround(FE_TONEAREST);
+        double ret;
+        y = 1.0 - t * (t * s);
+        t = t * (rt0 + y * (rt1 + y * (rt2 + y * rt3)));
+        c.i[HIGH_HALF] = 0x20000000 + ((k & 0x7fe00000) >> 1);
+        y = t * s;
+        hy = (y + big) - big;
+        del = 0.5 * t * ((s - hy * hy) - (y - hy) * (y + hy));
+        res = y + del;
+        if (res == (res + 1.002 * ((y - res) + del)))
+            ret = res * c.x;
+        else {
+            res1 = res + 1.5 * ((y - res) + del);
+            EMULV(res, res1, z1, zz); /* (z1+zz)=res*res1 */
+            res = ((((z1 - s) + zz) < 0) ? max (res, res1) :
+                   min (res, res1));
+            ret = res * c.x;
+        }
+        math_force_eval(ret);
+        fesetenv(&env);
+        double dret = x / ret;
+        if (dret != ret) {
+            double force_inexact = 1.0 / 3.0;
+            math_force_eval(force_inexact);
+            /* The square root is inexact, ret is the round-to-nearest
+               value which may need adjusting for other rounding
+               modes.  */
+            switch (rm) {
+                case FE_UPWARD:
+                  if (dret > ret)
+                ret = (res + 0x1p-1022) * c.x;
+                  break;
+                case FE_DOWNWARD:
+                case FE_TOWARDZERO:
+                    if (dret < ret)
+                        ret = (res - 0x1p-1022) * c.x;
+                    break;
+                default:
+                    break;
+            }
+        }
+        /* Otherwise (x / ret == ret), either the square root was exact or the division was inexact.  */
+        return ret;
+    } else {
+        if ((k & 0x7ff00000) == 0x7ff00000)
+            return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
+        if (x == 0)
+            return x;       /* sqrt(+0)=+0, sqrt(-0)=-0 */
+        if (k < 0)
+            return (x - x) / (x - x); /* sqrt(-ve)=sNaN */
+        return 0x1p-256 * __ieee754_sqrt(x * 0x1p512);
+    }
+#endif
+#endif
     return z;
 }
diff --git a/library/math/finite.c b/library/math/finite.c
index b7e7629a..b669991e 100644
--- a/library/math/finite.c
+++ b/library/math/finite.c
@@ -7,8 +7,25 @@
 #endif /* _MATH_HEADERS_H */
 
 int
-finite(double x) {
+finite(double number) {
+#ifndef __SPE__
     int32 hx;
-    GET_HIGH_WORD(hx, x);
+    GET_HIGH_WORD(hx, number);
     return (int) ((uint32)((hx & 0x7fffffff) - 0x7ff00000) >> 31);
+#else
+    union ieee_double x;
+	int result;
+
+	x.value = number;
+
+	if(((x.raw[0] & 0x7ff00000) == 0x7ff00000) && ((x.raw[0] & 0x000fffff) != 0 || (x.raw[1] != 0)))
+		result = 0; /* Exponent = 2047 and fraction != 0.0 -> not a number */
+	else if (((x.raw[0] & 0x7fffffff) == 0x7ff00000) && (x.raw[1] == 0))
+		result = 0; /* Exponent = 2047 and fraction = 0.0 -> infinity */
+	else
+		result = 1;
+    //Printf("finite %ld\n", result);
+
+	return(result);
+#endif
 }
\ No newline at end of file
diff --git a/library/math/init_exit.c b/library/math/init_exit.c
index 72ce1ad4..53d7e107 100644
--- a/library/math/init_exit.c
+++ b/library/math/init_exit.c
@@ -45,7 +45,7 @@ MATH_CONSTRUCTOR(math_init) {
     single_x->raw[0] = 0x7f800000;
 
     single_x = (union ieee_single *) &__clib2->__nan;
-    single_x->raw[0] = 0x7fc00001;
+    single_x->raw[0] = 0x7fc00000;
 
     success = TRUE;
 
diff --git a/library/math/s_isfinite.c b/library/math/s_isfinite.c
index c7885a55..6828b824 100644
--- a/library/math/s_isfinite.c
+++ b/library/math/s_isfinite.c
@@ -37,6 +37,7 @@ __isfinite_double(double d) {
 	int result;
 
 	x.value = d;
+    //Printf("isfinite %ld %ld %ld\n", d, x.raw[0], x.raw[1]);
 
 	if(((x.raw[0] & 0x7ff00000) == 0x7ff00000) && ((x.raw[0] & 0x000fffff) != 0 || (x.raw[1] != 0)))
 		result = 0; /* Exponent = 2047 and fraction != 0.0 -> not a number */
diff --git a/library/math/s_nanf.c b/library/math/s_nanf.c
index 4286924c..f552905b 100644
--- a/library/math/s_nanf.c
+++ b/library/math/s_nanf.c
@@ -52,7 +52,7 @@ nanf(const char *s) {
     union ieee_single x;
 
 	/* Exponent = 255 and fraction != 0.0; this must be a quiet nan. */
-	x.raw[0] = 0x7fc00001;
+	x.raw[0] = 0x7fc00000;
 
 	return x.value;
 #endif
diff --git a/library/math/s_signbit.c b/library/math/s_signbit.c
index f83e0fe6..e565543b 100644
--- a/library/math/s_signbit.c
+++ b/library/math/s_signbit.c
@@ -6,62 +6,23 @@
 #include "math_headers.h"
 #endif /* _MATH_HEADERS_H */
 
-#ifndef __SPE__
 int
 __signbit_double(double d) {
-#ifndef __SPE__
     union IEEEd2bits u;
 
     u.d = d;
     return (u.bits.sign);
-#else
-    union ieee_double x;
-	int result;
-
-	x.value = d;
-
-	result = ((x.raw[0] = 0x80000000) != 0);
-
-	return(result);
-#endif
 }
 
 int
 __signbit_float(float f) {
-#ifndef __SPE__
     union IEEEf2bits u;
 
     u.f = f;
     return (u.bits.sign);
-#else
-    union ieee_single x;
-	int result;
-
-	x.value = f;
-
-	result = ((x.raw[0] = 0x80000000) != 0);
-
-	return(result);
-#endif
 }
 
 int
 __signbit_long_double(long double e) {
     return __signbit_double(e);
 }
-#else
-int
-__signbit_double(double d) {
-    return __builtin_signbit(d);
-}
-
-int
-__signbit_float(float f) {
-    return __builtin_signbitf(f);
-}
-
-int
-__signbit_long_double(long double e) {
-    return __builtin_signbitl(e);
-}
-#endif
\ No newline at end of file
diff --git a/library/stdio/vfprintf.c b/library/stdio/vfprintf.c
index 1f65c920..48c28b84 100644
--- a/library/stdio/vfprintf.c
+++ b/library/stdio/vfprintf.c
@@ -109,6 +109,7 @@ static int fmt_fp(Out *f, long double y, int w, int p, int fl, int t) {
 
     pl = 1;
     if (signbit(y)) {
+        //Printf("negative\n");
         y = -y;
     } else if (fl & __U_MARK_POS) {
         prefix += 3;
@@ -117,15 +118,19 @@ static int fmt_fp(Out *f, long double y, int w, int p, int fl, int t) {
     } else prefix++, pl = 0;
 
     if (!isfinite(y)) {
+        //Printf("!isfinite\n");
         char *s = (t & 32) ? "inf" : "INF";
-        if (y != y) s = (t & 32) ? "nan" : "NAN";
+        if (y != y)
+            s = (t & 32) ? "nan" : "NAN";
         pad(f, ' ', w, 3 + pl, fl & ~__U_ZERO_PAD);
         out(f, prefix, pl);
         out(f, s, 3);
         pad(f, ' ', w, 3 + pl, fl ^ __S_LEFT_ADJ);
         return MAX(w, 3 + pl);
     }
-
+    else {
+        //Printf("isfinite\n");
+    }
     y = frexpl(y, &e2) * 2;
 
     if (y)