diff --git a/library/profile/_mcount.c b/library/profile/_mcount.c index ed4705ed..a26c1856 100644 --- a/library/profile/_mcount.c +++ b/library/profile/_mcount.c @@ -2,12 +2,11 @@ * $Id: profile__mcount.c,v 1.0 2022-08-06 10:36:26 clib2devs Exp $ */ -#include "profile_gmon.h" +#include "gmon.h" #include #include #include - void __mcount(uint32 frompc, uint32 selfpc); void diff --git a/library/profile/gmon.c b/library/profile/gmon.c index 0beb48b5..4c5f8e5c 100644 --- a/library/profile/gmon.c +++ b/library/profile/gmon.c @@ -10,14 +10,17 @@ #include #include #include +#include +#include #define SCALE_1_TO_1 0x10000L #define MIN_OS_VERSION 52 -#include "profile_gmon.h" +#include "gmon.h" +#include "gmon_out.h" -#undef DebugPrintF -#define dprintf(format, args...) ((struct ExecIFace *)((*(struct ExecBase **)4)->MainInterface))->DebugPrintF("[%s] " format, __PRETTY_FUNCTION__, ##args) +/* Head of basic-block list or NULL. */ +struct __bb *__bb_head __attribute__ ((visibility ("hidden"))); struct gmonparam _gmonparam = { state : kGmonProfOn @@ -25,16 +28,126 @@ struct gmonparam _gmonparam = { static unsigned int s_scale; -void moncontrol(int); -void monstartup(uint32, uint32); -void moncleanup(void); -void mongetpcs(uint32 *lowpc, uint32 *highpc); +void +write_hist(int fd) { + u_char tag = GMON_TAG_TIME_HIST; + + if (_gmonparam.kcountsize > 0) { + struct iovec iov[3] = { + { &tag, sizeof(tag) }, + { &thdr, sizeof(struct gmon_hist_hdr) }, + { _gmonparam.kcount, _gmonparam.kcountsize } + }; + + if (sizeof(thdr) != sizeof(struct gmon_hist_hdr) || (offsetof(struct real_gmon_hist_hdr, low_pc) != offsetof(struct gmon_hist_hdr, low_pc)) || (offsetof(struct real_gmon_hist_hdr, high_pc) != offsetof(struct gmon_hist_hdr, high_pc)) || (offsetof(struct real_gmon_hist_hdr, hist_size) != offsetof(struct gmon_hist_hdr, hist_size)) || (offsetof(struct real_gmon_hist_hdr, prof_rate) != offsetof(struct gmon_hist_hdr, prof_rate)) || (offsetof(struct real_gmon_hist_hdr, dimen) != offsetof(struct gmon_hist_hdr, dimen)) || (offsetof(struct real_gmon_hist_hdr, dimen_abbrev) != offsetof(struct gmon_hist_hdr, dimen_abbrev))) + return; + + thdr.low_pc = (char *)_gmonparam.lowpc; + thdr.high_pc = (char *)_gmonparam.highpc; + thdr.hist_size = _gmonparam.kcountsize / sizeof(HISTCOUNTER); + thdr.prof_rate = TICKS_PER_SECOND; + strncpy(thdr.dimen, "seconds", sizeof(thdr.dimen)); + thdr.dimen_abbrev = 's'; + + writev(fd, iov, 3); + } +} + +void +write_call_graph(int fd) { + u_char tag = GMON_TAG_CG_ARC; + struct gmon_cg_arc_record raw_arc[NARCS_PER_WRITEV] __attribute__((aligned(__alignof__(char *)))); + ARCINDEX from_index, to_index; + u_long from_len; + u_long frompc; + struct iovec iov[2 * NARCS_PER_WRITEV]; + int nfilled; + + for (nfilled = 0; nfilled < NARCS_PER_WRITEV; ++nfilled) { + iov[2 * nfilled].iov_base = &tag; + iov[2 * nfilled].iov_len = sizeof(tag); + + iov[2 * nfilled + 1].iov_base = &raw_arc[nfilled]; + iov[2 * nfilled + 1].iov_len = sizeof(struct gmon_cg_arc_record); + } + + nfilled = 0; + from_len = _gmonparam.fromssize / sizeof(*_gmonparam.froms); + for (from_index = 0; from_index < from_len; ++from_index) { + if (_gmonparam.froms[from_index] == 0) + continue; + + frompc = _gmonparam.lowpc; + frompc += (from_index * _gmonparam.hashfraction * sizeof(*_gmonparam.froms)); + for (to_index = _gmonparam.froms[from_index]; + to_index != 0; + to_index = _gmonparam.tos[to_index].link) { + struct arc { + char *frompc; + char *selfpc; + int32_t count; + } arc; + + arc.frompc = (char *)frompc; + arc.selfpc = (char *)_gmonparam.tos[to_index].selfpc; + arc.count = _gmonparam.tos[to_index].count; + memcpy(raw_arc + nfilled, &arc, sizeof(raw_arc[0])); + + if (++nfilled == NARCS_PER_WRITEV) { + writev(fd, iov, 2 * nfilled); + nfilled = 0; + } + } + } + if (nfilled > 0) + writev(fd, iov, 2 * nfilled); +} + +void +write_bb_counts(int fd) { + struct __bb *grp; + u_char tag = GMON_TAG_BB_COUNT; + size_t ncounts; + size_t i; + + struct iovec bbhead[2] = { + {&tag, sizeof(tag)}, + {&ncounts, sizeof(ncounts)} + }; + struct iovec bbbody[8]; + size_t nfilled; + + for (i = 0; i < (sizeof(bbbody) / sizeof(bbbody[0])); i += 2) { + bbbody[i].iov_len = sizeof(grp->addresses[0]); + bbbody[i + 1].iov_len = sizeof(grp->counts[0]); + } + + /* Write each group of basic-block info (all basic-blocks in a + compilation unit form a single group). */ + + for (grp = __bb_head; grp; grp = grp->next) { + ncounts = grp->ncounts; + writev(fd, bbhead, 2); + for (nfilled = i = 0; i < ncounts; ++i) { + if (nfilled > (sizeof(bbbody) / sizeof(bbbody[0])) - 2) { + writev(fd, bbbody, nfilled); + nfilled = 0; + } + + bbbody[nfilled++].iov_base = (char *)&grp->addresses[i]; + bbbody[nfilled++].iov_base = &grp->counts[i]; + } + if (nfilled > 0) + writev(fd, bbbody, nfilled); + } +} void monstartup(uint32 low_pc, uint32 high_pc) { uint8 *cp; uint32 lowpc, highpc; struct gmonparam *p = &_gmonparam; dprintf("in monstartup)\n"); + /* * If we don't get proper lowpc and highpc, then * we'll try to get them from the elf handle. @@ -101,13 +214,22 @@ void monstartup(uint32 low_pc, uint32 high_pc) { p->tos[0].link = 0; /* Verify granularity for sampling */ - if (p->kcountsize < p->textsize) - /* FIXME Avoid floating point */ - s_scale = ((float) p->kcountsize / p->textsize) * SCALE_1_TO_1; + if (p->kcountsize < p->textsize) { + /* avoid floating point operations */ + int quot = p->textsize / p->kcountsize; + + if (quot >= 0x10000) + s_scale = 1; + else if (quot >= 0x100) + s_scale = 0x10000 / quot; + else if (p->textsize >= 0x800000) + s_scale = 0x1000000 / (p->textsize / (p->kcountsize >> 8)); + else + s_scale = 0x1000000 / ((p->textsize << 8) / p->kcountsize); + } else s_scale = SCALE_1_TO_1; - s_scale >>= 1; dprintf("Enabling monitor\n"); moncontrol(1); } @@ -115,6 +237,10 @@ void monstartup(uint32 low_pc, uint32 high_pc) { void moncontrol(int mode) { struct gmonparam *p = &_gmonparam; + /* Don't change the state if we ran into an error. */ + if (p->state == kGmonProfError) + return; + if (mode) { /* Start profiling. */ profil((uint16 *) p->kcount, (size_t) p->kcountsize, p->lowpc, s_scale); @@ -128,16 +254,7 @@ void moncontrol(int mode) { void moncleanup(void) { BPTR fd; - int fromindex; - int endfrom; - uint32 frompc; - int toindex; - struct rawarc rawarc; struct gmonparam *p = &_gmonparam; - struct gmonhdr gmonhdr, *hdr; -#ifdef DEBUG - FILE *log; -#endif moncontrol(0); @@ -145,63 +262,47 @@ void moncleanup(void) { fprintf(stderr, "WARNING: Overflow during profiling\n"); } - fd = Open("gmon.out", MODE_NEWFILE); - if (!fd) { - fprintf(stderr, "ERROR: could not open gmon.out\n"); - return; - } + if (_gmonparam.kcountsize > 0) { + fd = open("gmon.out", O_CREAT | O_TRUNC | O_WRONLY); + if (!fd) { + fprintf(stderr, "ERROR: could not open gmon.out\n"); + return; + } - hdr = (struct gmonhdr *) &gmonhdr; + /* write gmon.out header: */ + struct real_gmon_hdr + { + char cookie[4]; + int32_t version; + char spare[3 * 4]; + } ghdr; - hdr->lpc = 0; //p->lowpc; - hdr->hpc = p->highpc - p->lowpc; - hdr->ncnt = (int) p->kcountsize + sizeof(gmonhdr); - hdr->version = GMONVERSION; - hdr->profrate = 100; //FIXME:!! + if (sizeof(ghdr) != sizeof(struct gmon_hdr) || (offsetof(struct real_gmon_hdr, cookie) != offsetof(struct gmon_hdr, cookie)) || (offsetof(struct real_gmon_hdr, version) != offsetof(struct gmon_hdr, version))) + return; - Write(fd, hdr, sizeof(*hdr)); - Write(fd, p->kcount, p->kcountsize); + memcpy(&ghdr.cookie[0], GMON_MAGIC, sizeof(ghdr.cookie)); + ghdr.version = GMON_VERSION; + memset(ghdr.spare, '\0', sizeof(ghdr.spare)); + write(fd, &ghdr, sizeof(struct gmon_hdr)); - endfrom = p->fromssize / sizeof(*p->froms); + /* write PC histogram: */ + write_hist(fd); -#ifdef DEBUG - log = fopen("gmon.log", "w"); -#endif + /* write call-graph: */ + write_call_graph(fd); - for (fromindex = 0; fromindex < endfrom; fromindex++) { - if (p->froms[fromindex] == 0) - continue; + /* write basic-block execution counts: */ + write_bb_counts(fd); - frompc = 0; /* FIXME: was p->lowpc; needs to be 0 and assumes -Ttext=0 on compile. Better idea? */ - frompc += fromindex * p->hashfraction * sizeof(*p->froms); - for (toindex = p->froms[fromindex]; toindex != 0; - toindex = p->tos[toindex].link) { -#ifdef DEBUG - if (log) - fprintf(log, "%p called from %p: %d times\n", frompc, - p->tos[toindex].selfpc, - p->tos[toindex].count); -#endif - rawarc.raw_frompc = frompc; - rawarc.raw_selfpc = p->tos[toindex].selfpc; - rawarc.raw_count = p->tos[toindex].count; - Write(fd, &rawarc, sizeof(rawarc)); - } + close(fd); } - -#ifdef DEBUG - if (log) - fclose(log); -#endif - Close(fd); } void mongetpcs(uint32 *lowpc, uint32 *highpc) { - struct Library *__ElfBase = NULL; - struct ElfIFace *__IElf = NULL; + struct Library *ElfBase = NULL; + struct ElfIFace *IElf = NULL; struct Process *self; BPTR seglist; - Elf32_Handle elfHandle; uint32 i; Elf32_Shdr *shdr; uint32 numSections; @@ -209,41 +310,42 @@ void mongetpcs(uint32 *lowpc, uint32 *highpc) { *lowpc = 0; *highpc = 0; - __ElfBase = OpenLibrary("elf.library", MIN_OS_VERSION); - if (__ElfBase) { - __IElf = (struct ElfIFace *) GetInterface(__ElfBase, "main", 1, NULL); - if (__IElf) { + ElfBase = OpenLibrary("elf.library", MIN_OS_VERSION); + if (ElfBase) { + IElf = (struct ElfIFace *) GetInterface(ElfBase, "main", 1, NULL); + if (IElf) { self = (struct Process *) FindTask(0); seglist = GetProcSegList(self, GPSLF_CLI | GPSLF_SEG); - - if (GetSegListInfoTags(seglist, GSLI_ElfHandle, &elfHandle, TAG_DONE) == 1) { - elfHandle = OpenElfTags(OET_ElfHandle, elfHandle, TAG_DONE); - - if (elfHandle) { - GetElfAttrsTags(elfHandle, EAT_NumSections, &numSections, TAG_DONE); - for (i = 0; i < numSections; i++) { - shdr = GetSectionHeaderTags(elfHandle, GST_SectionIndex, i, TAG_DONE); - if (shdr && (shdr->sh_flags & SWF_EXECINSTR)) { - uint32 base = (uint32) GetSectionTags(elfHandle, GST_SectionIndex, i, TAG_DONE); - *lowpc = base; - *highpc = base + shdr->sh_size; - break; + if (seglist != BZERO) { + Elf32_Handle elfHandle = NULL; + + if (GetSegListInfoTags(seglist, GSLI_ElfHandle, &elfHandle, TAG_DONE) == 1) { + elfHandle = OpenElfTags(OET_ElfHandle, elfHandle, TAG_DONE); + if (elfHandle) { + GetElfAttrsTags(elfHandle, EAT_NumSections, &numSections, TAG_DONE); + for (i = 0; i < numSections; i++) { + shdr = GetSectionHeaderTags(elfHandle, GST_SectionIndex, i, TAG_DONE); + if (shdr && (shdr->sh_flags & SWF_EXECINSTR)) { + uint32 base = (uint32) GetSectionTags(elfHandle, GST_SectionIndex, i, TAG_DONE); + *lowpc = base; + *highpc = base + shdr->sh_size; + break; + } } + CloseElfTags(elfHandle, CET_ReClose, TRUE, TAG_DONE); } - - CloseElfTags(elfHandle, CET_ReClose, TRUE, TAG_DONE); } } } } - if (__IElf) { - DropInterface((struct Interface *) __IElf); - __IElf = NULL; + if (IElf) { + DropInterface((struct Interface *) IElf); + IElf = NULL; } - if (__ElfBase) { - CloseLibrary(__ElfBase); - __ElfBase = NULL; + if (ElfBase) { + CloseLibrary(ElfBase); + ElfBase = NULL; } } diff --git a/library/profile/profile_gmon.h b/library/profile/gmon.h old mode 100755 new mode 100644 similarity index 51% rename from library/profile/profile_gmon.h rename to library/profile/gmon.h index 3bda1d6a..453f38b4 --- a/library/profile/profile_gmon.h +++ b/library/profile/gmon.h @@ -11,6 +11,11 @@ #undef DebugPrintF #define dprintf(format, args...) ((struct ExecIFace *)((*(struct ExecBase **)4)->MainInterface))->DebugPrintF("[%s] " format, __PRETTY_FUNCTION__, ##args) +void moncontrol(int); +void monstartup(uint32, uint32); +void moncleanup(void); +void mongetpcs(uint32 *lowpc, uint32 *highpc); + struct gmonhdr { uint32 lpc; uint32 hpc; @@ -20,16 +25,53 @@ struct gmonhdr { int reserved[3]; }; -#define GMONVERSION 0x00051879 - #define HISTCOUNTER uint16 + // I am sure we can make these bigger #define HISTFRACTION 2 #define HASHFRACTION 4 -#define ARCDENSITY 2 +/* + * Percent of text space to allocate for tostructs. + * This is a heuristic; we will fail with a warning when profiling programs + * with a very large number of very small functions, but that's + * normally OK. + * 2 is probably still a good value for normal programs. + * Profiling a test case with 64000 small functions will work if + * you raise this value to 3 and link statically (which bloats the + * text size, thus raising the number of arcs expected by the heuristic). + */ +#define ARCDENSITY 3 + +/* + * Always allocate at least this many tostructs. This + * hides the inadequacy of the ARCDENSITY heuristic, at least + * for small programs. + */ #define MINARCS 50 -#define MAXARCS ((1 << (8 * sizeof(HISTCOUNTER)))-2) + + +#define MAXARCS ((1 << (8 * sizeof(HISTCOUNTER)))-2) + +/* + * The type used to represent indices into gmonparam.tos[]. + */ +#define ARCINDEX u_long + + +/* structure emitted by "gcc -a". This must match struct bb in + gcc/libgcc2.c. It is OK for gcc to declare a longer structure as + long as the members below are present. */ +struct __bb { + long zero_word; + const char *filename; + long *counts; + long ncounts; + struct __bb *next; + const unsigned long *addresses; +}; + +extern struct __bb *__bb_head; struct tostruct { uint32 selfpc; diff --git a/library/profile/gmon_out.h b/library/profile/gmon_out.h new file mode 100644 index 00000000..c4b98d7d --- /dev/null +++ b/library/profile/gmon_out.h @@ -0,0 +1,76 @@ +/* +* $Id: profile_gmon_out.h,v 1.0 2023-09-17 12:04:26 clib2devs Exp $ +*/ + +/* This file specifies the format of gmon.out files. It should have + as few external dependencies as possible as it is going to be included + in many different programs. That is, minimize the number of #include's. + + A gmon.out file consists of a header (defined by gmon_hdr) followed by + a sequence of records. Each record starts with a one-byte tag + identifying the type of records, followed by records specific data. */ + +#ifndef _SYS_GMON_OUT_H +#define _SYS_GMON_OUT_H 1 + +#include + +#define GMON_MAGIC "gmon" /* magic cookie */ +#define GMON_VERSION 1 /* version number */ +//#define GMON_VERSION 0x00051879 + +/* For profiling shared object we need a new format. */ +#define GMON_SHOBJ_VERSION 0x1ffff +#define NARCS_PER_WRITEV 32 + +__BEGIN_DECLS + +/* + * Raw header as it appears on file (without padding). This header + * always comes first in gmon.out and is then followed by a series + * records defined below. + */ +struct gmon_hdr { + char cookie[4]; + char version[4]; + char spare[3 * 4]; +}; + +/* types of records in this file: */ +typedef enum { + GMON_TAG_TIME_HIST = 0, + GMON_TAG_CG_ARC = 1, + GMON_TAG_BB_COUNT = 2 +} GMON_Record_Tag; + +struct gmon_hist_hdr { + char low_pc[sizeof(char *)]; /* base pc address of sample buffer */ + char high_pc[sizeof(char *)]; /* max pc address of sampled buffer */ + char hist_size[4]; /* size of sample buffer */ + char prof_rate[4]; /* profiling clock rate */ + char dimen[15]; /* phys. dim., usually "seconds" */ + char dimen_abbrev; /* usually 's' for "seconds" */ +}; + +struct gmon_cg_arc_record { + char from_pc[sizeof(char *)]; /* address within caller's body */ + char self_pc[sizeof(char *)]; /* address within callee's body */ + char count[4]; /* number of arc traversals */ +}; + +struct real_gmon_hist_hdr { + char *low_pc; + char *high_pc; + int32_t hist_size; + int32_t prof_rate; + char dimen[15]; + char dimen_abbrev; +} thdr; + +void write_hist(int fd); +void write_call_graph(int fd); +void write_bb_counts(int fd); + +__END_DECLS + +#endif /* sys/gmon_out.h */ diff --git a/library/profile/profil.c b/library/profile/profil.c index dc83e24f..15fb400c 100644 --- a/library/profile/profil.c +++ b/library/profile/profil.c @@ -8,7 +8,7 @@ #include #include -#include "profile_gmon.h" +#include "gmon.h" static struct Interrupt CounterInt; static struct PerformanceMonitorIFace *IPM;