Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

x86-64 PCID and PMU Support #446

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions composition_scripts/unit_pmu.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
[system]
description = "Simplest system with both capability manager and scheduler to test shared memory implementation"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

focus is now on PMU testing, right?


[[components]]
name = "booter"
img = "no_interface.llbooter"
implements = [{interface = "init"}, {interface = "addr"}]
deps = [{srv = "kernel", interface = "init", variant = "kernel"}]
constructor = "kernel"

[[components]]
name = "capmgr"
img = "capmgr.simple"
deps = [{srv = "booter", interface = "init"}, {srv = "booter", interface = "addr"}]
implements = [{interface = "capmgr"}, {interface = "init"}, {interface = "memmgr"}, {interface = "capmgr_create"}]
constructor = "booter"

[[components]]
name = "sched"
img = "sched.root_fprr"
deps = [{srv = "capmgr", interface = "init"}, {srv = "capmgr", interface = "capmgr"}, {srv = "capmgr", interface = "memmgr"}]
implements = [{interface = "sched"}, {interface = "init"}]
constructor = "booter"

[[components]]
name = "pong"
img = "pong.pingpong"
deps = [{srv = "sched", interface = "init"}, {srv = "capmgr", interface = "capmgr_create"}]
implements = [{interface = "pong"}]
constructor = "booter"

[[components]]
name = "unit_pmu"
img = "tests.unit_pmu"
deps = [{srv = "sched", interface = "init"}, {srv = "capmgr", interface = "capmgr_create"}, {srv = "capmgr", interface = "memmgr"}, {srv = "pong", interface = "pong"}]
constructor = "booter"
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,7 @@ cos_init(void)
{
booter_init();
comps_init();

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does your editor auto-insert blank lines at the end of functions? Very strange behavior.

}

void
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ cos_init(void)
struct cos_compinfo *child_ci = cos_compinfo_get(&child_defci[id]);

printc("\tCreating new %s component [%d]\n", is_sched ? "scheduler" : "simple", id);
child_utpt = cos_pgtbl_alloc(ci);
child_utpt = cos_pgtbl_alloc(ci, (asid_t)0);
assert(child_utpt);

cos_meminfo_init(&(child_ci->mi), BOOT_MEM_KM_BASE, CHILD_UNTYPED_SIZE, child_utpt);
Expand Down
12 changes: 12 additions & 0 deletions src/components/implementation/tests/unit_pingshmem/ping.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ char *pong_test_strings[] = {

shm_bm_t shm;

static unsigned long
rdpmc (unsigned long cntr)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no space between fn name and (.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that the question is: where should we put this function?

We'll need to consider that it will need to have a generic version (that just returns 0) for architectures that don't support it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, need to figure out what a higher level abstraction would look like

{
unsigned int low, high;

asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (cntr));

return low | ((unsigned long)high) << 32;
}

void
ping_test_objread(void)
{
Expand Down Expand Up @@ -302,9 +312,11 @@ main(void)
ping_test_bigfree();
ping_test_refcnt();


ping_bench_syncinv();
ping_bench_msgpassing();



return 0;
}
18 changes: 18 additions & 0 deletions src/components/implementation/tests/unit_pmu/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Required variables used to drive the compilation process. It is OK
# for many of these to be empty.
#
# The set of interfaces that this component exports for use by other
# components. This is a list of the interface names.
INTERFACE_EXPORTS =
# The interfaces this component is dependent on for compilation (this
# is a list of directory names in interface/)
INTERFACE_DEPENDENCIES = init pong memmgr
# The library dependencies this component is reliant on for
# compilation/linking (this is a list of directory names in lib/)
LIBRARY_DEPENDENCIES = kernel ps
# Note: Both the interface and library dependencies should be
# *minimal*. That is to say that removing a dependency should cause
# the build to fail. The build system does not validate this
# minimality; that's on you!

include Makefile.subsubdir
63 changes: 63 additions & 0 deletions src/components/implementation/tests/unit_pmu/pmu_test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#include <cos_kernel_api.h>
#include <cos_types.h>
#include <pong.h>
#include <memmgr.h>

#define NUM_PAGES 1000

static unsigned long
rdpmc (unsigned long cntr)
{
unsigned int low, high;

asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (cntr));

return low | ((unsigned long)high) << 32;
}

int
main(void)
{
/* cheaty way to test PMU counters; should figure out a better API */
cos_pmu_enable_fixed_counter(BOOT_CAPTBL_SELF_INITHW_BASE, 0);
cos_pmu_enable_fixed_counter(BOOT_CAPTBL_SELF_INITHW_BASE, 1);
/* enable architecture specific counter events (reference https://perfmon-events.intel.com/) */
cos_pmu_program_event_counter(BOOT_CAPTBL_SELF_INITHW_BASE, 0, 0x49, 0x0E);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the power and flexibility of this -- the direct programming of the PMUs. For that reason, we likely want this implementation.

I don't like how this can't really be fit into a capabiliyt-based system were we can delegate in a controlled fashion out the ability to program a subset of the capabilities, including limiting the # of counters set (since the hardware has a limit). A problem for a future us.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thats actually how KVM does it from my understanding; they delegate a subset of the counters that the guest can use.

cos_pmu_program_event_counter(BOOT_CAPTBL_SELF_INITHW_BASE, 1, 0xC5, 0x11);

unsigned long hw_instructions, core_cycles, dtlb_misses, branch_mispredicts;
char *buf;
int i;

buf = (char *)memmgr_heap_page_allocn(NUM_PAGES);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

check return value, at least with assert.


/* write to a bunch of memory */
for (i = 0; i < NUM_PAGES*PAGE_SIZE; i++) {
buf[i] = (char)(i % 128);
}

dtlb_misses = rdpmc(0);
branch_mispredicts = rdpmc(1);
/* super poorly documented way to read intel's fixed counters */
hw_instructions = rdpmc(1<<30);
core_cycles = rdpmc((1<<30)+1);

/* context switch */
pong_call();

/* write to a bunch of memory */
for (i = 0; i < NUM_PAGES*PAGE_SIZE; i++) {
buf[i] = -(char)(i % 128);
}

hw_instructions = rdpmc(1<<30) - hw_instructions;
core_cycles = rdpmc((1<<30)+1) - core_cycles;
dtlb_misses = rdpmc(0) - dtlb_misses;
branch_mispredicts = rdpmc(1) - branch_mispredicts;

printc("HW Instructions: %lu\n", hw_instructions);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that the massive walk through the buffers is going to make the hw instructions/branch mispredicts not very useful. You only need to touch a single character per page, which should take these counts down significantly.

Might as well also report cycle count to see a relation between all of these.

printc("Core Cycles: %lu\n", core_cycles);
printc("DTLB Misses: %lu\n", dtlb_misses);
printc("Branch Mispredicts: %lu\n", branch_mispredicts);

}
8 changes: 5 additions & 3 deletions src/components/lib/crt/crt.c
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ crt_comp_create_from(struct crt_comp *c, char *name, compid_t id, struct crt_chk
assert(inv.server->id != chkpt->c->id);
}

ret = cos_compinfo_alloc(ci, c->ro_addr, BOOT_CAPTBL_FREE, c->entry_addr, root_ci);
ret = cos_compinfo_alloc(ci, c->ro_addr, BOOT_CAPTBL_FREE, c->entry_addr, (asid_t)0, root_ci);
assert(!ret);

mem = cos_page_bump_allocn(root_ci, chkpt->tot_sz_mem);
Expand All @@ -293,6 +293,7 @@ crt_comp_create_from(struct crt_comp *c, char *name, compid_t id, struct crt_chk
return 0;
}

int next_asid = 1; /* FIXME: This is to test ASID effectiveness. Replace with namespace implementation */
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

static

/**
* Create the component from the elf object including all the resource
* tables, and memory.
Expand Down Expand Up @@ -333,7 +334,8 @@ crt_comp_create(struct crt_comp *c, char *name, compid_t id, void *elf_hdr, vadd
printc("\t\t elf obj: ro [0x%lx, 0x%lx), data [0x%lx, 0x%lx), bss [0x%lx, 0x%lx).\n",
c->ro_addr, c->ro_addr + ro_sz, c->rw_addr, c->rw_addr + data_sz, c->rw_addr + data_sz, c->rw_addr + data_sz + bss_sz);

ret = cos_compinfo_alloc(ci, c->ro_addr, BOOT_CAPTBL_FREE, c->entry_addr, root_ci);
/* FIXME: Replace next_asid with namespace implementation */
ret = cos_compinfo_alloc(ci, c->ro_addr, BOOT_CAPTBL_FREE, c->entry_addr, next_asid++, root_ci);
assert(!ret);

tot_sz = round_up_to_page(round_up_to_page(ro_sz) + data_sz + bss_sz);
Expand Down Expand Up @@ -1082,7 +1084,7 @@ crt_comp_exec(struct crt_comp *c, struct crt_comp_exec_context *ctxt)
if (crt_comp_alias_in(c, c, &compres, CRT_COMP_ALIAS_PGTBL | CRT_COMP_ALIAS_COMP)) BUG();

/* Set up the untyped memory in the new component */
utpt = cos_pgtbl_alloc(ci);
utpt = cos_pgtbl_alloc(ci, (asid_t)0);
assert(utpt);
cos_meminfo_init(&(target_ci->mi), BOOT_MEM_KM_BASE, ctxt->memsz, utpt);
cos_meminfo_alloc(target_ci, BOOT_MEM_KM_BASE, ctxt->memsz);
Expand Down
2 changes: 1 addition & 1 deletion src/components/lib/kernel/cos_defkernel_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ cos_defcompinfo_child_alloc(struct cos_defcompinfo *child_defci, vaddr_t entry,
struct cos_aep_info *child_aep = cos_sched_aep_get(child_defci);

assert(curr_defci_init_status == INITIALIZED);
ret = cos_compinfo_alloc(child_ci, heap_ptr, cap_frontier, entry, ci);
ret = cos_compinfo_alloc(child_ci, heap_ptr, cap_frontier, entry, (asid_t)0, ci);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are the casts really necessary (here and above)? I'd imagine asid_t is a simple integer variant.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not necessary. Wanted more self documenting code because just sticking another 0 in there can be a little confusing. I can remove this though

if (ret) return ret;
ret = cos_aep_alloc_intern(child_aep, child_defci, 0, is_sched ? sched_aep : NULL, NULL, NULL, 0);

Expand Down
20 changes: 16 additions & 4 deletions src/components/lib/kernel/cos_kernel_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -802,7 +802,7 @@ cos_captbl_alloc(struct cos_compinfo *ci)
}

pgtblcap_t
cos_pgtbl_alloc(struct cos_compinfo *ci)
cos_pgtbl_alloc(struct cos_compinfo *ci, asid_t asid)
{
vaddr_t kmem;
capid_t cap;
Expand All @@ -812,7 +812,7 @@ cos_pgtbl_alloc(struct cos_compinfo *ci)
assert(ci);

if (__alloc_mem_cap(ci, CAP_PGTBL, &kmem, &cap)) return 0;
if (call_cap_op(ci->captbl_cap, CAPTBL_OP_PGTBLACTIVATE, cap, __compinfo_metacap(ci)->mi.pgtbl_cap, kmem, 0))
if (call_cap_op(ci->captbl_cap, CAPTBL_OP_PGTBLACTIVATE, cap, __compinfo_metacap(ci)->mi.pgtbl_cap, kmem, asid << 16))
BUG();

return cap;
Expand Down Expand Up @@ -845,7 +845,7 @@ cos_comp_alloc(struct cos_compinfo *ci, captblcap_t ctc, pgtblcap_t ptc, vaddr_t

int
cos_compinfo_alloc(struct cos_compinfo *ci, vaddr_t heap_ptr, capid_t cap_frontier, vaddr_t entry,
struct cos_compinfo *ci_resources)
asid_t asid, struct cos_compinfo *ci_resources)
{
pgtblcap_t ptc;
captblcap_t ctc;
Expand All @@ -854,7 +854,7 @@ cos_compinfo_alloc(struct cos_compinfo *ci, vaddr_t heap_ptr, capid_t cap_fronti

printd("cos_compinfo_alloc\n");

ptc = cos_pgtbl_alloc(ci_resources);
ptc = cos_pgtbl_alloc(ci_resources, asid);
assert(ptc);
ctc = cos_captbl_alloc(ci_resources);
assert(ctc);
Expand Down Expand Up @@ -1310,3 +1310,15 @@ cos_hw_map(struct cos_compinfo *ci, hwcap_t hwc, paddr_t pa, unsigned int len)

return (void *)va;
}

int
cos_pmu_program_event_counter(hwcap_t hwc, u8_t cntr, u8_t evnt, u8_t umask)
{
return call_cap_op(hwc, CAPTBL_OP_HW_PMU_PROG_EVT_CNTR, cntr, evnt, umask, 0);
}

int
cos_pmu_enable_fixed_counter(hwcap_t hwc, u8_t cntr)
{
return call_cap_op(hwc, CAPTBL_OP_HW_PMU_EN_FIXED_CNTR, cntr, 0, 0, 0);
}
7 changes: 5 additions & 2 deletions src/components/lib/kernel/cos_kernel_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,10 @@ int cos_pgtbl_intern_expandwith(struct cos_compinfo *ci, pgtblcap_t intern, vadd
* This uses the next three functions to allocate a new component and
* correctly populate ci (allocating all resources from ci_resources).
*/
int cos_compinfo_alloc(struct cos_compinfo *ci, vaddr_t heap_ptr, capid_t cap_frontier, vaddr_t entry,
int cos_compinfo_alloc(struct cos_compinfo *ci, vaddr_t heap_ptr, capid_t cap_frontier, vaddr_t entry, asid_t asid,
struct cos_compinfo *ci_resources);
captblcap_t cos_captbl_alloc(struct cos_compinfo *ci);
pgtblcap_t cos_pgtbl_alloc(struct cos_compinfo *ci);
pgtblcap_t cos_pgtbl_alloc(struct cos_compinfo *ci, asid_t asid);
compcap_t cos_comp_alloc(struct cos_compinfo *ci, captblcap_t ctc, pgtblcap_t ptc, vaddr_t entry);
void cos_comp_capfrontier_update(struct cos_compinfo *ci, capid_t cap_frontier);

Expand Down Expand Up @@ -201,6 +201,9 @@ int cos_hw_tlbstall(hwcap_t hwc);
int cos_hw_tlbstall_recount(hwcap_t hwc);
void cos_hw_shutdown(hwcap_t hwc);

int cos_pmu_enable_fixed_counter(hwcap_t hwc, u8_t cntr);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand what "fixed" refers to here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's hardware level. Most PMU implementations have two different counters: a set of fixed counters that count pre-programmed events (Intel has 3: hw instruction counter and two separate cycle counters), and programmable counters that can be set to count a different event. The hardware level interface for enabling/programming them is different so I separated them at the kernel level.

int cos_pmu_program_event_counter(hwcap_t hwc, u8_t cntr, u8_t evnt, u8_t umask);


capid_t cos_capid_bump_alloc(struct cos_compinfo *ci, cap_t cap);

Expand Down
25 changes: 22 additions & 3 deletions src/kernel/capinv.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "include/tcap.h"
#include "include/chal/defs.h"
#include "include/hw.h"
#include "include/pmu.h"
#include "include/chal/chal_proto.h"

#define COS_DEFAULT_RET_CAP 0
Expand Down Expand Up @@ -1122,9 +1123,10 @@ static int __attribute__((noinline)) composite_syscall_slowpath(struct pt_regs *
capid_t pt_entry = __userregs_get1(regs);
capid_t pgtbl_cap = __userregs_get2(regs);
vaddr_t kmem_cap = __userregs_get3(regs);
capid_t pgtbl_lvl = __userregs_get4(regs);
/* FIXME: change lvl to order */
ret = chal_pgtbl_pgtblactivate(ct, cap, pt_entry, pgtbl_cap, kmem_cap, pgtbl_lvl);
capid_t pgtbl_lvl = __userregs_get4(regs) & 0xFFFF;
asid_t asid = __userregs_get4(regs) >> 16;

ret = chal_pgtbl_pgtblactivate(ct, cap, pt_entry, pgtbl_cap, kmem_cap, pgtbl_lvl, asid);

break;
}
Expand Down Expand Up @@ -1652,6 +1654,23 @@ static int __attribute__((noinline)) composite_syscall_slowpath(struct pt_regs *
ret = chal_tlbstall_recount(0);
break;
}
case CAPTBL_OP_HW_PMU_PROG_EVT_CNTR: {
u8_t cntr = __userregs_get1(regs);
u8_t evt = __userregs_get2(regs);
u8_t umask = __userregs_get2(regs);

if ((ret = pmu_event_cntr_enable(cntr))) {
goto err;
}
ret = pmu_event_cntr_program(cntr, evt, umask);
break;
}
case CAPTBL_OP_HW_PMU_EN_FIXED_CNTR: {
u8_t cntr = __userregs_get1(regs);

ret = pmu_fixed_cntr_enable(cntr);
break;
}
default:
goto err;
}
Expand Down
2 changes: 1 addition & 1 deletion src/kernel/include/component.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ comp_activate(struct captbl *t, capid_t cap, capid_t capin, capid_t captbl_cap,

compc->entry_addr = entry_addr;
compc->info.pgtblinfo.pgtbl = ptc->pgtbl;
compc->info.pgtblinfo.asid = chal_asid_alloc();
compc->info.pgtblinfo.asid = ptc->asid;
compc->info.captbl = ctc->captbl;
compc->pgd = ptc;
compc->ct_top = ctc;
Expand Down
6 changes: 3 additions & 3 deletions src/kernel/include/pgtbl.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ unsigned long *pgtbl_lkup_pgd(pgtbl_t pt, vaddr_t addr, word_t *flags);
int pgtbl_get_cosframe(pgtbl_t pt, vaddr_t frame_addr, paddr_t *cosframe, vaddr_t *order);
vaddr_t pgtbl_translate(pgtbl_t pt, vaddr_t addr, word_t *flags);
pgtbl_t pgtbl_create(void *page, void *curr_pgtbl);
int pgtbl_activate(struct captbl *t, unsigned long cap, unsigned long capin, pgtbl_t pgtbl, u32_t lvl);
int pgtbl_activate(struct captbl *t, unsigned long cap, unsigned long capin, pgtbl_t pgtbl, u32_t lvl, asid_t asid);
int pgtbl_deactivate(struct captbl *t, struct cap_captbl *dest_ct_cap, unsigned long capin, livenessid_t lid,
capid_t pgtbl_cap, capid_t cosframe_addr, const int root);
int pgtbl_mapping_scan(struct cap_pgtbl *pt);
Expand Down Expand Up @@ -105,7 +105,7 @@ unsigned long chal_pgtbl_flag(unsigned long input);
int chal_pgtbl_kmem_act(pgtbl_t pt, vaddr_t addr, unsigned long *kern_addr, unsigned long **pte_ret);
int chal_tlb_quiescence_check(u64_t timestamp);
int chal_cap_memactivate(struct captbl *ct, struct cap_pgtbl *pt, capid_t frame_cap, capid_t dest_pt, vaddr_t vaddr, vaddr_t order);
int chal_pgtbl_activate(struct captbl *t, unsigned long cap, unsigned long capin, pgtbl_t pgtbl, u32_t lvl);
int chal_pgtbl_activate(struct captbl *t, unsigned long cap, unsigned long capin, pgtbl_t pgtbl, u32_t lvl, asid_t asid);
int chal_pgtbl_deactivate(struct captbl *t, struct cap_captbl *dest_ct_cap, unsigned long capin,
livenessid_t lid, capid_t pgtbl_cap, capid_t cosframe_addr, const int root);

Expand All @@ -127,7 +127,7 @@ int chal_pgtbl_quie_check(u32_t orig_v);
void chal_pgtbl_init_pte(void *pte);

/* Creation of the table object - not to be confused with activation of cap */
int chal_pgtbl_pgtblactivate(struct captbl *ct, capid_t cap, capid_t pt_entry, capid_t pgtbl_cap, vaddr_t kmem_cap, capid_t pgtbl_lvl);
int chal_pgtbl_pgtblactivate(struct captbl *ct, capid_t cap, capid_t pt_entry, capid_t pgtbl_cap, vaddr_t kmem_cap, capid_t pgtbl_lvl, asid_t asid);
/* Deactivate */
int chal_pgtbl_deact_pre(struct cap_header *ch, u32_t pa);
/* Page mapping */
Expand Down
Loading