diff --git a/compiler/vm/vm.nim b/compiler/vm/vm.nim index d4a485916e2..38d1fd769b0 100644 --- a/compiler/vm/vm.nim +++ b/compiler/vm/vm.nim @@ -905,7 +905,7 @@ proc rawExecute(c: var TCtx, pc: var int): YieldReason = kind: vmTraceMin, pc: pc)) - c.profiler.enter(c, tos) + c.profiler.enter() case instr.opcode of opcEof: # XXX: eof shouldn't be used to return a register @@ -3034,7 +3034,7 @@ proc rawExecute(c: var TCtx, pc: var int): YieldReason = #createStr regs[ra] regs[ra].strVal = typ.typeToString(preferExported) - c.profiler.leave(c) + c.profiler.leave(c.sframes) inc pc diff --git a/compiler/vm/vmdef.nim b/compiler/vm/vmdef.nim index 7dccde82e6e..142d649d188 100644 --- a/compiler/vm/vmdef.nim +++ b/compiler/vm/vmdef.nim @@ -736,21 +736,17 @@ type ProfileInfo* = object ## Profiler data for a single procedure. - time*: float ## the time spent on executing instructions (exclusive) - count*: int ## the number of instruction executed (exclusive) - - SourceLinePosition* = tuple - ## Identifies a line in a source file. Only intended for use by - ## the profiler. - fileIndex: typeof(TLineInfo.fileIndex) - line: typeof(TLineInfo.line) + time*: float ## the time spent on executing instructions (inclusive) + count*: int ## the number of instructions executed (exclusive) Profiler* = object # XXX: move this type to the ``vmprofiler`` module once possible - tEnter*: float - sframe*: StackFrameIndex ## The current stack frame + enabled*: bool ## whether profiling is enabled + tEnter*: float ## the point-in-time when the active measurment started - data*: Table[SourceLinePosition, ProfileInfo] + data*: Table[PSym, ProfileInfo] + ## maps the symbol of a procedure to the associated data gathered by the + ## profiler func `<`*(a, b: FieldIndex): bool {.borrow.} func `<=`*(a, b: FieldIndex): bool {.borrow.} @@ -893,6 +889,8 @@ proc initCtx*(module: PSym; cache: IdentCache; g: ModuleGraph; result.typeInfoCache.init() result.memory.allocator.byteType = result.typeInfoCache.charType + result.profiler.enabled = optProfileVM in g.config.globalOptions + func refresh*(c: var TCtx, module: PSym; idgen: IdGenerator) = addInNimDebugUtils(c.config, "refresh") c.module = module diff --git a/compiler/vm/vmprofiler.nim b/compiler/vm/vmprofiler.nim index bb343e92e1c..8f51e2fe11e 100644 --- a/compiler/vm/vmprofiler.nim +++ b/compiler/vm/vmprofiler.nim @@ -1,12 +1,16 @@ +## Implements a simple time profiler for the VM. + import std/[ algorithm, + hashes, times, strutils, tables ], compiler/ast/[ - lineinfos, + ast_types, + lineinfos ], compiler/front/[ options, @@ -16,36 +20,44 @@ import vmdef ] +from compiler/ast/ast import id + +func hash(s: PSym): Hash {.inline.} = + hash(s.id) -proc enter*(prof: var Profiler, c: TCtx, sframe: StackFrameIndex) {.inline.} = - if optProfileVM in c.config.globalOptions: +proc enter*(prof: var Profiler) {.inline.} = + if prof.enabled: prof.tEnter = cpuTime() - prof.sframe = sframe -proc leaveImpl(prof: var Profiler, c: TCtx) {.noinline.} = - let tLeave = cpuTime() - var frameIdx = prof.sframe - while frameIdx >= 0: - let frame = c.sframes[frameIdx] - if frame.prc != nil: - let li = (frame.prc.info.fileIndex, frame.prc.info.line) - if li notin prof.data: - prof.data[li] = ProfileInfo() - prof.data[li].time += tLeave - prof.tEnter - if frameIdx == prof.sframe: - inc prof.data[li].count - dec frameIdx +proc leaveImpl(prof: var Profiler, frames: openArray[TStackFrame]) {.noinline.} = + # note: the implementation is kept in a separate noinline procedure in + # order to reduce the instruction-cache pressure when profiling is disabled + let diff = cpuTime() + + for i in 0.. 0: + profiler.enter() + # sleep for a bit. The exact amount doesn't matter + sleep(1) + profiler.leave(frames) + # "leave" the frame + discard frames.pop() + +# render the data and verify the output: +let output = dump(conf, profiler).splitLines() +# the output must contain a header + 5 entries + trailing newline +doAssert output.len == 7, $output +doAssert output[^1] == "", output[^1] + +# verify the entries. Each must have exactly one sample taken, and the +# procedure with the most time spent must be the first symbol, followed by +# second one, etc. +for i in 1..