Some profiling changes

AmigaLabs · Sep 12, 2023 · d362aac · d362aac
1 parent 7c8458f
commit d362aac
Show file tree

Hide file tree

Showing 6 changed files with 136 additions and 12 deletions.
diff --git a/GNUmakefile.os4 b/GNUmakefile.os4
@@ -30,6 +30,7 @@ CC := ppc-amigaos-gcc
 AR := ppc-amigaos-ar -q
 LD := ppc-amigaos-ld
 RANLIB := ppc-amigaos-ranlib
+STRIP := ppc-amigaos-strip
 HOST_CXX := g++
 
 # On AmigaOS use native commands
@@ -101,9 +102,11 @@ STATIC   := $(if $(STATIC),$(STATIC),yes)
 LARGEDATA :=
 OPTIONS  += $(LARGEDATA) -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D__CLIB2__ -Wa,-mregnames -fno-builtin -nostdlib -D_GNU_SOURCE -D_XOPEN_SOURCE -D_USE_GNU -pipe
 OPTIMIZE := -O3 -mregnames -mmultiple -mupdate -mstrict-align
+PROFILE_P :=
 
 STABS :=
 DLIBS :=
+PLIBS :=
 
 ifndef DEBUG
     OPTIMIZE += $(STABS) -DNDEBUG
@@ -112,9 +115,14 @@ else
     DLIBS += $(BUILD_DIR)/lib/libdebug.a
 endif
 
-CFLAGS     := $(WARNINGS) $(OPTIMIZE) $(OPTIONS) $(INCLUDES) -D__USE_INLINE__
-CFLAGS_N   := $(WARNINGS) $(OPTIMIZE) $(OPTIONS) $(INCLUDES)
-AFLAGS     := -Wa,-mregnames -mstrict-align
+ifdef PROFILE
+    PROFILE_P := -pg -Ttext=0x00000000
+    PLIBS += $(BUILD_DIR)/lib/libprofile.a $(BUILD_DIR)/lib/libm.a
+endif
+
+CFLAGS     := $(WARNINGS) $(OPTIMIZE) $(OPTIONS) $(INCLUDES) $(PROFILE_P) -D__USE_INLINE__
+CFLAGS_N   := $(WARNINGS) $(OPTIMIZE) $(OPTIONS) $(INCLUDES) $(PROFILE_P)
+AFLAGS     := -Wa,-mregnames -mstrict-align $(PROFILE_P)
 
 ifdef SPE
     CC := ppc-amigaos-gcc-6.4.0
@@ -212,11 +220,11 @@ version:
 # Shared clib2.library rules
 
 clib2.library.debug: $(OBJ_C_LIB)
-	$(VERBOSE)$(CC) -o $(BUILD_DIR)/$@ $(CFLAGS) $(BUILD_DIR)/shared/shared_library/clib2.o $(BUILD_DIR)/shared/shared_library/math.o $(OBJ_C_LIB) $(DLIBS) -Wl,--cref,-M,-Map=$(BUILD_DIR)/$@.map -nostartfiles -nostdlib
+	$(VERBOSE)$(CC) -o $(BUILD_DIR)/$@ $(CFLAGS) $(BUILD_DIR)/shared/shared_library/clib2.o $(BUILD_DIR)/shared/shared_library/math.o $(OBJ_C_LIB) $(DLIBS) $(PLIBS) -Wl,--cref,-M,-Map=$(BUILD_DIR)/$@.map -nostartfiles -nostdlib
 
 clib2.library: clib2.library.debug
 	cp $(BUILD_DIR)/$< $(BUILD_DIR)/$@
-	ppc-amigaos-strip -R.comment -R.sdata2 --strip-unneeded-rel-relocs $(BUILD_DIR)/$@
+	$(STRIP) -R.comment -R.sdata2 --strip-unneeded-rel-relocs $(BUILD_DIR)/$@
 
 ##############################################################################
 

diff --git a/library/profile/gmon.c b/library/profile/gmon.c
@@ -175,7 +175,7 @@ void moncleanup(void) {
         if (p->froms[fromindex] == 0)
             continue;
 
-        frompc = p->lowpc; //0x01000000; /* FIXME: was p->lowpc; needs to be 0 and assumes -Ttext=0 on compile. Better idea? */
+        frompc = 0; /* FIXME: was p->lowpc; needs to be 0 and assumes -Ttext=0 on compile. Better idea? */
         frompc += fromindex * p->hashfraction * sizeof(*p->froms);
         for (toindex = p->froms[fromindex]; toindex != 0;
              toindex = p->tos[toindex].link) {

diff --git a/library/profile/profil.c b/library/profile/profil.c
@@ -8,6 +8,8 @@
 #include <resources/performancemonitor.h>
 #include <unistd.h>
 
+#include "profile_gmon.h"
+
 static struct Interrupt CounterInt;
 static struct PerformanceMonitorIFace *IPM;
 
@@ -91,6 +93,7 @@ profil(unsigned short *buffer, size_t bufSize, size_t offset, unsigned int scale
 
     IPM = (struct PerformanceMonitorIFace *) OpenResource("performancemonitor.resource");
     if (!IPM || IPM->Obtain() != 1) {
+        dprintf("Cannot open performancemonitor.resource\n");
         return 0;
     }
 
@@ -127,4 +130,4 @@ profil(unsigned short *buffer, size_t bufSize, size_t offset, unsigned int scale
         UserState(Stack);
 
     return 0;
-}
+}
diff --git a/library/profile/profile_gmon.h b/library/profile/profile_gmon.h
@@ -6,6 +6,10 @@
 #define _GMON_H
 
 #include <exec/types.h>
+#include "../shared_library/math.h"
+
+#undef DebugPrintF
+#define dprintf(format, args...) ((struct ExecIFace *)((*(struct ExecBase **)4)->MainInterface))->DebugPrintF("[%s] " format, __PRETTY_FUNCTION__, ##args)
 
 struct gmonhdr {
     uint32 lpc;

diff --git a/library/shared_library/math.c b/library/shared_library/math.c
@@ -392,7 +392,55 @@ __unorddf2 (double a, double b) {
     return isnan(a) || isnan(b);
 }
 
-#ifndef __SOFT_FP__
+#ifndef __SOFTFP__
+long double
+__floatunditf(uint64_t a) {
+    /* Begins with an exact copy of the code from __floatundidf */
+
+    static const double twop52 = 0x1.0p52;
+    static const double twop84 = 0x1.0p84;
+    static const double twop84_plus_twop52 = 0x1.00000001p84;
+
+    doublebits high = { .d = twop84 };
+    doublebits low  = { .d = twop52 };
+
+    high.x |= a >> 32;							/* 0x1.0p84 + high 32 bits of a */
+    low.x |= a & UINT64_C(0x00000000ffffffff);	/* 0x1.0p52 + low 32 bits of a */
+
+    const double high_addend = high.d - twop84_plus_twop52;
+
+    /* At this point, we have two double precision numbers
+     * high_addend and low.d, and we wish to return their sum
+     * as a canonicalized long double:
+     */
+    /* This implementation sets the inexact flag spuriously. */
+    /* This could be avoided, but at some substantial cost. */
+
+    DD result;
+
+    result.s.hi = high_addend + low.d;
+    result.s.lo = (high_addend - result.s.hi) + low.d;
+
+    return result.ld;
+}
+
+double
+__floatundidf(du_int a) {
+    static const double twop52 = 4503599627370496.0;           // 0x1.0p52
+    static const double twop84 = 19342813113834066795298816.0; // 0x1.0p84
+    static const double twop84_plus_twop52 =
+            19342813118337666422669312.0; // 0x1.00000001p84
+
+    doublebits high = {.d = twop84};
+    doublebits low = {.d = twop52};
+
+    high.x |= a >> 32;
+    low.x |= a & UINT64_C(0x00000000ffffffff);
+
+    const double result = (high.d - twop84_plus_twop52) + low.d;
+    return result;
+}
+
 /* Support for systems that have hardware floating-point; we'll set the inexact flag
  * as a side-effect of this computation.
  */
@@ -401,10 +449,7 @@ __floatdidf(di_int a) {
     static const double twop52 = 0x1.0p52;
     static const double twop32 = 0x1.0p32;
 
-    union {
-        int64_t x;
-        double d;
-    } low = {.d = twop52};
+    doublebits low = {.d = twop52};
 
     const double high = (int32_t)(a >> 32) * twop32;
     low.x |= a & INT64_C(0x00000000ffffffff);
@@ -468,4 +513,50 @@ __floatdidf(di_int a) {
     fb.u.low = (su_int)a;                         /* mantissa-low */
     return fb.f;
 }
+
+double
+__floatundidf(du_int a) {
+  if (a == 0)
+    return 0.0;
+  const unsigned N = sizeof(du_int) * CHAR_BIT;
+  int sd = N - __builtin_clzll(a); // number of significant digits
+  int e = sd - 1;                  // exponent
+  if (sd > DBL_MANT_DIG) {
+    //  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+    //  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+    //                                                12345678901234567890123456
+    //  1 = msb 1 bit
+    //  P = bit DBL_MANT_DIG-1 bits to the right of 1
+    //  Q = bit DBL_MANT_DIG bits to the right of 1
+    //  R = "or" of all bits to the right of Q
+    switch (sd) {
+    case DBL_MANT_DIG + 1:
+      a <<= 1;
+      break;
+    case DBL_MANT_DIG + 2:
+      break;
+    default:
+      a = (a >> (sd - (DBL_MANT_DIG + 2))) |
+          ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0);
+    };
+    // finish:
+    a |= (a & 4) != 0; // Or P into R
+    ++a;               // round - this step may add a significant bit
+    a >>= 2;           // dump Q and R
+    // a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits
+    if (a & ((du_int)1 << DBL_MANT_DIG)) {
+      a >>= 1;
+      ++e;
+    }
+    // a is now rounded to DBL_MANT_DIG bits
+  } else {
+    a <<= (DBL_MANT_DIG - sd);
+    // a is now rounded to DBL_MANT_DIG bits
+  }
+  double_bits fb;
+  fb.u.s.high = ((su_int)(e + 1023) << 20) |      // exponent
+                ((su_int)(a >> 32) & 0x000FFFFF); // mantissa-high
+  fb.u.s.low = (su_int)a;                         // mantissa-low
+  return fb.f;
+}
 #endif
diff --git a/library/shared_library/math.h b/library/shared_library/math.h
@@ -1,6 +1,8 @@
 #ifndef _MATH_H__
 #define _MATH_H__
 
+#include <stdint.h>
+
 typedef int si_int;
 typedef unsigned su_int;
 typedef long long di_int;
@@ -25,11 +27,25 @@ typedef union {
     su_int u;
     float f;
 } float_bits;
+
 typedef union {
     udwords u;
     double f;
 } double_bits;
 
+typedef union {
+    long double ld;
+    struct {
+        double hi;
+        double lo;
+    }s;
+} DD;
+
+typedef union {
+    int64_t x;
+    double d;
+} doublebits;
+
 typedef struct {
     udwords high;
     udwords low;
@@ -51,5 +67,7 @@ di_int __moddi3(di_int a, di_int b);
 di_int __divdi3(di_int a, di_int b);
 double __floatdidf(di_int a);
 di_int __unorddf2 (double a, double b);
+long double __floatunditf(uint64_t a);
+double __floatundidf(du_int a);
 
 #endif /* _MATH_H__ */