From 3cc9bad5f94e08ade54ffc18ab521b8c343ffae4 Mon Sep 17 00:00:00 2001 From: Andrea Palmate Date: Fri, 28 Jul 2023 17:49:03 +0200 Subject: [PATCH] More SPE changes and tests.. --- GNUmakefile.os4 | 5 +- README.md | 2 +- library/include/fenv.h | 28 +++++------ library/include/math.h | 8 ++-- library/math/e_sqrt.c | 4 ++ library/math/e_sqrtf.c | 98 ++++++++++++++++++++++++++++++++++++++- library/math/finite.c | 21 ++++++++- library/math/init_exit.c | 2 +- library/math/s_isfinite.c | 1 + library/math/s_nanf.c | 2 +- library/math/s_signbit.c | 39 ---------------- library/stdio/vfprintf.c | 9 +++- 12 files changed, 153 insertions(+), 66 deletions(-) diff --git a/GNUmakefile.os4 b/GNUmakefile.os4 index c0bdf460..d5e14f63 100644 --- a/GNUmakefile.os4 +++ b/GNUmakefile.os4 @@ -119,9 +119,10 @@ AFLAGS := -Wa,-mregnames -mstrict-align ifdef SPE CC := ppc-amigaos-gcc-6.4.0 AS := ppc-amigaos-as-6.4.0 - CFLAGS := $(CFLAGS) -D__SPE__ -mspe -mcpu=8540 -mabi=spe -mfloat-gprs=double -ffloat-store -fno-fast-math -fno-inline-functions -fno-partial-inlining \ + CMATH := -mfpu=sp_full + CFLAGS := $(CFLAGS) -D__SPE__ -mspe -mcpu=8540 -mabi=spe -mfloat-gprs=double $(CMATH) -fno-inline-functions -fno-partial-inlining \ -fno-align-functions -fno-align-jumps -fno-align-loops -fno-align-labels -fno-inline-small-functions -fno-indirect-inlining -Wno-overflow -Wno-unused-but-set-variable -Wno-uninitialized #-Wdouble-promotion - AFLAGS := $(AFLAGS) -mvrsave -D__SPE__ -mspe -mcpu=8540 -ffloat-store -mfloat-gprs=double -fno-fast-math -Wno-overflow + AFLAGS := $(AFLAGS) -mvrsave -D__SPE__ -mspe -mcpu=8540 -mfloat-gprs=double $(CMATH) -Wno-overflow endif VERBOSE ?= @ diff --git a/README.md b/README.md index a5c51dec..b0a4fdfd 100755 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -A C runtime library for AmigaOS4 + C runtime library for AmigaOS4 [![Build Status](https://travis-ci.com/afxgroup/clib2.svg?branch=master)](https://travis-ci.org/afxgroup/clib2) [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) diff --git a/library/include/fenv.h b/library/include/fenv.h index 7a4c8676..3ebe466e 100755 --- a/library/include/fenv.h +++ b/library/include/fenv.h @@ -8,6 +8,7 @@ #include #include #include +#include #ifndef __fenv_static #define __fenv_static static @@ -63,20 +64,16 @@ extern const fenv_t __fe_dfl_env; #define _ENABLE_MASK ((FE_DIVBYZERO | FE_INEXACT | FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW) >> _FPUSW_SHIFT) #ifndef _SOFT_FLOAT -#ifdef __SPE__ -#define __mffs(__env) \ - __asm __volatile("mfspr %0, 512" : "=r" ((__env)->__bits.__reg)) -#define __mtfsf(__env) \ - __asm __volatile("mtspr 512,%0;isync" :: "r" ((__env).__bits.__reg)) -#else -#define __mffs(__env) \ - __asm __volatile("mffs %0" : "=f" ((__env)->__d)) -#define __mtfsf(__env) \ - __asm __volatile("mtfsf 255,%0" :: "f" ((__env).__d)) -#endif + #ifdef __SPE__ + #define __mffs(__env) __asm __volatile("mfspr %0, 512" : "=r" ((__env)->__bits.__reg)) + #define __mtfsf(__env) __asm __volatile("mtspr 512,%0;isync" :: "r" ((__env).__bits.__reg)) + #else + #define __mffs(__env) __asm __volatile("mffs %0" : "=f" ((__env)->__d)) + #define __mtfsf(__env) __asm __volatile("mtfsf 255,%0" :: "f" ((__env).__d)) + #endif #else -#define __mffs(__env) -#define __mtfsf(__env) + #define __mffs(__env) + #define __mtfsf(__env) #endif union __fpscr { @@ -92,6 +89,11 @@ union __fpscr { } __bits; }; +struct rm_ctx { + fenv_t env; + bool updated_status; +}; + __fenv_static inline int feclearexcept(int __excepts) { diff --git a/library/include/math.h b/library/include/math.h index 3326bded..b7628027 100755 --- a/library/include/math.h +++ b/library/include/math.h @@ -166,10 +166,10 @@ extern int *__signgam(void); #define isnan(x) __isnan(x) #endif -extern int finitef (float); -extern int finitel (long double); -extern int isinff (float); -extern int isnanf (float); +#define finitef(x) __isfinite_float(x) +#define finitel(x) __isfinite_long_double(x) +#define isinff(x) __isinff(x) +#define isnanf(x) __isnanf(x) extern float acosf(float x); extern float asinf(float x); diff --git a/library/math/e_sqrt.c b/library/math/e_sqrt.c index 8de59b66..9e88ebcd 100644 --- a/library/math/e_sqrt.c +++ b/library/math/e_sqrt.c @@ -11,6 +11,9 @@ static const float one = 1.0, tiny = 1.0e-30; double __ieee754_sqrt(double x) { double z; +#ifndef __SPE__ + asm ("fsqrt %0,%1\n" :"=f" (z):"f" (x)); +#else int32_t sign = (int) 0x80000000; int32_t ix0, s0, q, m, t, i; uint32_t r, t1, s1, ix1, q1; @@ -107,5 +110,6 @@ __ieee754_sqrt(double x) { if ((q & 1) == 1) ix1 |= sign; ix0 += (m << 20); INSERT_WORDS(z, ix0, ix1); +#endif return z; } diff --git a/library/math/e_sqrtf.c b/library/math/e_sqrtf.c index f5a1e94d..c5d281c3 100644 --- a/library/math/e_sqrtf.c +++ b/library/math/e_sqrtf.c @@ -6,11 +6,30 @@ #include "math_headers.h" #endif /* _MATH_HEADERS_H */ +#ifdef __SPE__ +/* CN = 1+2**27 = '41a0000002000000' IEEE double format. Use it to split a + double for better accuracy. */ +#define CN 134217729.0 +#define EMULV(x, y, z, zz) \ + ({ __typeof__ (x) __p, hx, tx, hy, ty; \ + __p = CN * (x); hx = ((x) - __p) + __p; tx = (x) - hx; \ + __p = CN * (y); hy = ((y) - __p) + __p; ty = (y) - hy; \ + z = (x) * (y); zz = (((hx * hy - z) + hx * ty) + tx * hy) + tx * ty; \ + }) + + +#include "root.tbl" +#else static const float one = 1.0, tiny = 1.0e-30; +#endif float __ieee754_sqrtf(float x) { float z; +#ifndef __SPE__ + asm ("fsqrts %0,%1\n" :"=f" (z):"f" (x)); +#else +#if 0 int32_t sign = (int) 0x80000000; int32_t ix, s, q, m, t, i; uint32_t r; @@ -24,7 +43,8 @@ __ieee754_sqrtf(float x) { } /* take care of zero */ if (ix <= 0) { - if ((ix & (~sign)) == 0) return x;/* sqrt(+-0) = +-0 */ + if ((ix & (~sign)) == 0) + return x;/* sqrt(+-0) = +-0 */ else if (ix < 0) return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ } @@ -70,5 +90,81 @@ __ieee754_sqrtf(float x) { ix = (q >> 1) + 0x3f000000; ix += (m << 23); SET_FLOAT_WORD(z, ix); +#else + /* Use generic implementation. */ + static const double + rt0 = 9.99999999859990725855365213134618E-01, + rt1 = 4.99999999495955425917856814202739E-01, + rt2 = 3.75017500867345182581453026130850E-01, + rt3 = 3.12523626554518656309172508769531E-01; + static const double big = 134217728.0; + double y, t, del, res, res1, hy, z1, zz, s; + mynumber a, c = {{0, 0}}; + int4 k; + + a.x = x; + k = a.i[HIGH_HALF]; + a.i[HIGH_HALF] = (k & 0x001fffff) | 0x3fe00000; + t = inroot[(k & 0x001fffff) >> 14]; + s = a.x; + /*----------------- 2^-1022 <= | x |< 2^1024 -----------------*/ + if (k > 0x000fffff && k < 0x7ff00000) { + int rm = fegetround(); + fenv_t env; + feholdexcept(&env); + fesetround(FE_TONEAREST); + double ret; + y = 1.0 - t * (t * s); + t = t * (rt0 + y * (rt1 + y * (rt2 + y * rt3))); + c.i[HIGH_HALF] = 0x20000000 + ((k & 0x7fe00000) >> 1); + y = t * s; + hy = (y + big) - big; + del = 0.5 * t * ((s - hy * hy) - (y - hy) * (y + hy)); + res = y + del; + if (res == (res + 1.002 * ((y - res) + del))) + ret = res * c.x; + else { + res1 = res + 1.5 * ((y - res) + del); + EMULV(res, res1, z1, zz); /* (z1+zz)=res*res1 */ + res = ((((z1 - s) + zz) < 0) ? max (res, res1) : + min (res, res1)); + ret = res * c.x; + } + math_force_eval(ret); + fesetenv(&env); + double dret = x / ret; + if (dret != ret) { + double force_inexact = 1.0 / 3.0; + math_force_eval(force_inexact); + /* The square root is inexact, ret is the round-to-nearest + value which may need adjusting for other rounding + modes. */ + switch (rm) { + case FE_UPWARD: + if (dret > ret) + ret = (res + 0x1p-1022) * c.x; + break; + case FE_DOWNWARD: + case FE_TOWARDZERO: + if (dret < ret) + ret = (res - 0x1p-1022) * c.x; + break; + default: + break; + } + } + /* Otherwise (x / ret == ret), either the square root was exact or the division was inexact. */ + return ret; + } else { + if ((k & 0x7ff00000) == 0x7ff00000) + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + if (x == 0) + return x; /* sqrt(+0)=+0, sqrt(-0)=-0 */ + if (k < 0) + return (x - x) / (x - x); /* sqrt(-ve)=sNaN */ + return 0x1p-256 * __ieee754_sqrt(x * 0x1p512); + } +#endif +#endif return z; } diff --git a/library/math/finite.c b/library/math/finite.c index b7e7629a..b669991e 100644 --- a/library/math/finite.c +++ b/library/math/finite.c @@ -7,8 +7,25 @@ #endif /* _MATH_HEADERS_H */ int -finite(double x) { +finite(double number) { +#ifndef __SPE__ int32 hx; - GET_HIGH_WORD(hx, x); + GET_HIGH_WORD(hx, number); return (int) ((uint32)((hx & 0x7fffffff) - 0x7ff00000) >> 31); +#else + union ieee_double x; + int result; + + x.value = number; + + if(((x.raw[0] & 0x7ff00000) == 0x7ff00000) && ((x.raw[0] & 0x000fffff) != 0 || (x.raw[1] != 0))) + result = 0; /* Exponent = 2047 and fraction != 0.0 -> not a number */ + else if (((x.raw[0] & 0x7fffffff) == 0x7ff00000) && (x.raw[1] == 0)) + result = 0; /* Exponent = 2047 and fraction = 0.0 -> infinity */ + else + result = 1; + //Printf("finite %ld\n", result); + + return(result); +#endif } \ No newline at end of file diff --git a/library/math/init_exit.c b/library/math/init_exit.c index 72ce1ad4..53d7e107 100644 --- a/library/math/init_exit.c +++ b/library/math/init_exit.c @@ -45,7 +45,7 @@ MATH_CONSTRUCTOR(math_init) { single_x->raw[0] = 0x7f800000; single_x = (union ieee_single *) &__clib2->__nan; - single_x->raw[0] = 0x7fc00001; + single_x->raw[0] = 0x7fc00000; success = TRUE; diff --git a/library/math/s_isfinite.c b/library/math/s_isfinite.c index c7885a55..6828b824 100644 --- a/library/math/s_isfinite.c +++ b/library/math/s_isfinite.c @@ -37,6 +37,7 @@ __isfinite_double(double d) { int result; x.value = d; + //Printf("isfinite %ld %ld %ld\n", d, x.raw[0], x.raw[1]); if(((x.raw[0] & 0x7ff00000) == 0x7ff00000) && ((x.raw[0] & 0x000fffff) != 0 || (x.raw[1] != 0))) result = 0; /* Exponent = 2047 and fraction != 0.0 -> not a number */ diff --git a/library/math/s_nanf.c b/library/math/s_nanf.c index 4286924c..f552905b 100644 --- a/library/math/s_nanf.c +++ b/library/math/s_nanf.c @@ -52,7 +52,7 @@ nanf(const char *s) { union ieee_single x; /* Exponent = 255 and fraction != 0.0; this must be a quiet nan. */ - x.raw[0] = 0x7fc00001; + x.raw[0] = 0x7fc00000; return x.value; #endif diff --git a/library/math/s_signbit.c b/library/math/s_signbit.c index f83e0fe6..e565543b 100644 --- a/library/math/s_signbit.c +++ b/library/math/s_signbit.c @@ -6,62 +6,23 @@ #include "math_headers.h" #endif /* _MATH_HEADERS_H */ -#ifndef __SPE__ int __signbit_double(double d) { -#ifndef __SPE__ union IEEEd2bits u; u.d = d; return (u.bits.sign); -#else - union ieee_double x; - int result; - - x.value = d; - - result = ((x.raw[0] = 0x80000000) != 0); - - return(result); -#endif } int __signbit_float(float f) { -#ifndef __SPE__ union IEEEf2bits u; u.f = f; return (u.bits.sign); -#else - union ieee_single x; - int result; - - x.value = f; - - result = ((x.raw[0] = 0x80000000) != 0); - - return(result); -#endif } int __signbit_long_double(long double e) { return __signbit_double(e); } -#else -int -__signbit_double(double d) { - return __builtin_signbit(d); -} - -int -__signbit_float(float f) { - return __builtin_signbitf(f); -} - -int -__signbit_long_double(long double e) { - return __builtin_signbitl(e); -} -#endif \ No newline at end of file diff --git a/library/stdio/vfprintf.c b/library/stdio/vfprintf.c index 1f65c920..48c28b84 100644 --- a/library/stdio/vfprintf.c +++ b/library/stdio/vfprintf.c @@ -109,6 +109,7 @@ static int fmt_fp(Out *f, long double y, int w, int p, int fl, int t) { pl = 1; if (signbit(y)) { + //Printf("negative\n"); y = -y; } else if (fl & __U_MARK_POS) { prefix += 3; @@ -117,15 +118,19 @@ static int fmt_fp(Out *f, long double y, int w, int p, int fl, int t) { } else prefix++, pl = 0; if (!isfinite(y)) { + //Printf("!isfinite\n"); char *s = (t & 32) ? "inf" : "INF"; - if (y != y) s = (t & 32) ? "nan" : "NAN"; + if (y != y) + s = (t & 32) ? "nan" : "NAN"; pad(f, ' ', w, 3 + pl, fl & ~__U_ZERO_PAD); out(f, prefix, pl); out(f, s, 3); pad(f, ' ', w, 3 + pl, fl ^ __S_LEFT_ADJ); return MAX(w, 3 + pl); } - + else { + //Printf("isfinite\n"); + } y = frexpl(y, &e2) * 2; if (y)