Skip to content

Commit

Permalink
vm: restore the VM profiler (#912)
Browse files Browse the repository at this point in the history
## Summary

Fix the compiler crashing when enabling the VM profiler (`--profilevm`)
and fully restore the latter's original functionality. In addition, the
profiler's overhead is significantly reduced.

## Details

The VM profiler stopped working when the stack-frames moved from being
`ref` based to being `seq` based
(fb03691).

This is due to the profiler expecting the frame index specified at the
start of a measurement (`enter`) still being valid at the end of a
measurement (`leave`), but this is not true when executing, for example,
a `Ret` instruction, which led to an out-of-bounds error.

As a simple and low impact fix, instead of remembering the index,
`leave` now takes the list of current stack frames (i.e., the stack) as
input, treating the last item as the current frame. A minor side-effect
of this is that a `Ret` instruction now counts for the caller, while a
call instruction counts for the callee.

There was also a bug in the rendering logic: `min` was used instead of
`max`, meaning that the entry limiting didn't work.

In addition, the documentation of the `vmprofiler` module is improved
and the implementation is refactored:

* the `data` table now uses a `PSym` as the key. The intention of the
  table was always to associate data with a procedure, but `TLineInfo`
  and later `SourceLinePosition` only approximated this. Using a `PSym`
  also makes it possible to render more information (such as the
  procedure's name) in the future
* whether profiling is enabled is now a property of the `Profiler`
  instance, removing the dependency on a `ConfigRef`. This also slightly
  reduces the overhead of `enter`/`leave`
* the `leaveImpl` procedure is improved. The time difference is only
  computed once, and instead of temporarily assigning each frame to a
  local (which incurred a very costly full copy), only `prc` is assigned
  to a local

With access to the symbol, rendering now also renders the full file,
line, and column of the profiled procedures again (instead of only the
containing file).

Finally, an integration test (`tprofiler.nim`) and a basic unit test
(`tvmprofiler.nim`) are added in order to prevent future regressions.
  • Loading branch information
zerbina authored Sep 17, 2023
1 parent f032b96 commit 6f3ac59
Show file tree
Hide file tree
Showing 5 changed files with 165 additions and 41 deletions.
4 changes: 2 additions & 2 deletions compiler/vm/vm.nim
Original file line number Diff line number Diff line change
Expand Up @@ -905,7 +905,7 @@ proc rawExecute(c: var TCtx, pc: var int): YieldReason =
kind: vmTraceMin,
pc: pc))

c.profiler.enter(c, tos)
c.profiler.enter()
case instr.opcode
of opcEof:
# XXX: eof shouldn't be used to return a register
Expand Down Expand Up @@ -3034,7 +3034,7 @@ proc rawExecute(c: var TCtx, pc: var int): YieldReason =
#createStr regs[ra]
regs[ra].strVal = typ.typeToString(preferExported)

c.profiler.leave(c)
c.profiler.leave(c.sframes)

inc pc

Expand Down
20 changes: 9 additions & 11 deletions compiler/vm/vmdef.nim
Original file line number Diff line number Diff line change
Expand Up @@ -736,21 +736,17 @@ type

ProfileInfo* = object
## Profiler data for a single procedure.
time*: float ## the time spent on executing instructions (exclusive)
count*: int ## the number of instruction executed (exclusive)

SourceLinePosition* = tuple
## Identifies a line in a source file. Only intended for use by
## the profiler.
fileIndex: typeof(TLineInfo.fileIndex)
line: typeof(TLineInfo.line)
time*: float ## the time spent on executing instructions (inclusive)
count*: int ## the number of instructions executed (exclusive)

Profiler* = object
# XXX: move this type to the ``vmprofiler`` module once possible
tEnter*: float
sframe*: StackFrameIndex ## The current stack frame
enabled*: bool ## whether profiling is enabled
tEnter*: float ## the point-in-time when the active measurment started

data*: Table[SourceLinePosition, ProfileInfo]
data*: Table[PSym, ProfileInfo]
## maps the symbol of a procedure to the associated data gathered by the
## profiler

func `<`*(a, b: FieldIndex): bool {.borrow.}
func `<=`*(a, b: FieldIndex): bool {.borrow.}
Expand Down Expand Up @@ -893,6 +889,8 @@ proc initCtx*(module: PSym; cache: IdentCache; g: ModuleGraph;
result.typeInfoCache.init()
result.memory.allocator.byteType = result.typeInfoCache.charType

result.profiler.enabled = optProfileVM in g.config.globalOptions

func refresh*(c: var TCtx, module: PSym; idgen: IdGenerator) =
addInNimDebugUtils(c.config, "refresh")
c.module = module
Expand Down
68 changes: 40 additions & 28 deletions compiler/vm/vmprofiler.nim
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
## Implements a simple time profiler for the VM.

import
std/[
algorithm,
hashes,
times,
strutils,
tables
],
compiler/ast/[
lineinfos,
ast_types,
lineinfos
],
compiler/front/[
options,
Expand All @@ -16,36 +20,44 @@ import
vmdef
]

from compiler/ast/ast import id

func hash(s: PSym): Hash {.inline.} =
hash(s.id)

proc enter*(prof: var Profiler, c: TCtx, sframe: StackFrameIndex) {.inline.} =
if optProfileVM in c.config.globalOptions:
proc enter*(prof: var Profiler) {.inline.} =
if prof.enabled:
prof.tEnter = cpuTime()
prof.sframe = sframe

proc leaveImpl(prof: var Profiler, c: TCtx) {.noinline.} =
let tLeave = cpuTime()
var frameIdx = prof.sframe
while frameIdx >= 0:
let frame = c.sframes[frameIdx]
if frame.prc != nil:
let li = (frame.prc.info.fileIndex, frame.prc.info.line)
if li notin prof.data:
prof.data[li] = ProfileInfo()
prof.data[li].time += tLeave - prof.tEnter
if frameIdx == prof.sframe:
inc prof.data[li].count
dec frameIdx
proc leaveImpl(prof: var Profiler, frames: openArray[TStackFrame]) {.noinline.} =
# note: the implementation is kept in a separate noinline procedure in
# order to reduce the instruction-cache pressure when profiling is disabled
let diff = cpuTime()

for i in 0..<frames.len:
let prc = frames[i].prc
if prc != nil:
if prc notin prof.data:
prof.data[prc] = ProfileInfo()
prof.data[prc].time += diff
# for the active frame, increment the number of samples taken
if i == frames.high:
inc prof.data[prc].count

proc leave*(prof: var Profiler, c: TCtx) {.inline.} =
if optProfileVM in c.config.globalOptions:
leaveImpl(prof, c)
proc leave*(prof: var Profiler, frames: openArray[TStackFrame]) {.inline.} =
## If profiling is enabled, ends a measurement, updating the collected data.
## The data of the profiler entries associated with the `frames` is updated
## with the measured time, and the "number of samples" counter of the current
## active frame (last entry in the list) is incremented by one.
if prof.enabled:
leaveImpl(prof, frames)

proc dump*(conf: ConfigRef, prof: Profiler): string =
## Constructs a string containing a report of VM execution based on the
## data collected by `prof`. The report is formatted and ready to print to
## console or similar interface.
## Constructs a string containing a report of VM execution based on the given
## `prof`. The report is formatted and ready to print to console or
## similar interface.
const MaxEntries = 32
var entries: seq[(SourceLinePosition, ProfileInfo)]
var entries: seq[(TLineInfo, ProfileInfo)]

proc compare(a: auto, b: ProfileInfo): int =
let t1 = a[1].time
Expand All @@ -54,12 +66,12 @@ proc dump*(conf: ConfigRef, prof: Profiler): string =
else: 0

# collect the entries with the most time spent:
for line, info in prof.data.pairs:
for sym, info in prof.data.pairs:
let pos = lowerBound(entries, info, compare)
if pos < MaxEntries:
entries.insert((line, info), pos)
entries.insert((sym.info, info), pos)
# discard excess entries:
entries.setLen(max(entries.len, MaxEntries))
entries.setLen(min(entries.len, MaxEntries))

# render the entries to a string:
result = "prof: µs #instr location\n"
Expand All @@ -68,5 +80,5 @@ proc dump*(conf: ConfigRef, prof: Profiler): string =
result.add align($int(info.time * 1e6), 10)
result.add align($int(info.count), 10)
result.add " "
result.add toMsgFilename(conf, pos.fileIndex)
result.add toFileLineCol(conf, pos)
result.add "\n"
80 changes: 80 additions & 0 deletions tests/compiler/tvmprofiler.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
discard """
description: '''
Basic unit test for making sure that the VM profiler works as expected
'''
targets: native
"""

import
std/[
os,
strscans,
strutils
],
compiler/ast/[
ast,
lineinfos
],
compiler/front/[
options
],
compiler/utils/[
pathutils
],
compiler/vm/[
vmdef,
vmprofiler
]

var
conf = newConfigRef(nil)
# get a ``FileInfoIdx``. The file's content doesn't matter, it only needs to
# exist
let self = conf.fileInfoIdx(currentSourcePath().AbsoluteFile)

conf.filenameOption = foName # render the filepath as only the name

# setup an enabled profiler:
var profiler = Profiler(enabled: true)

# setup some pseudo symbols to represent the procedures:
var syms: seq[PSym]
for i in 1..5:
syms.add PSym(itemId: ItemId(module: 0, item: int32(i-1)),
info: newLineInfo(self, i, i-1))

var frames: seq[TStackFrame]

# "enter" multiple procedures:
for it in syms.items:
frames.add TStackFrame(prc: it)

# take a sample on each frame:
while frames.len > 0:
profiler.enter()
# sleep for a bit. The exact amount doesn't matter
sleep(1)
profiler.leave(frames)
# "leave" the frame
discard frames.pop()

# render the data and verify the output:
let output = dump(conf, profiler).splitLines()
# the output must contain a header + 5 entries + trailing newline
doAssert output.len == 7, $output
doAssert output[^1] == "", output[^1]

# verify the entries. Each must have exactly one sample taken, and the
# procedure with the most time spent must be the first symbol, followed by
# second one, etc.
for i in 1..<output.len - 1:
var
time, num, line, col: int
path: string
doAssert scanf(output[i], "$s$i$s$i$s$w.nim($i,$s$i)$s$.",
time, num, path, line, col)
doAssert num == 1 # one sample is taken within each procedure
doAssert path == "tvmprofiler"
# check that the line + column are correct:
doAssert line == i
doAssert col == i
34 changes: 34 additions & 0 deletions tests/vm/tprofiler.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
discard """
description: '''
Ensure that the VM profiler doesn't crash with exceptional control-flow
'''
matrix: "--profileVM"
"""

proc doSomething() =
let f = 0.5
# further call nesting:
discard $f

proc p3() =
# call some procedure
doSomething()

raise CatchableError.newException("")

proc p2() =
try:
p3()
finally:
# procedure call in finally block during unwinding
doSomething()

proc p1() =
try:
p2()
except:
# procedure call in exception handler
doSomething()

static:
p1()

0 comments on commit 6f3ac59

Please sign in to comment.