From a6e61867dbdeacce2fb29ff84e904fb4adc33b3a Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 9 Aug 2023 19:35:50 +0200 Subject: [PATCH] internal: use dedicated context type for `vmbackend` (#835) ## Summary Add and use a dedicated type for the contextual state of the VM code- generation orchestrator. This removes the hard coupling between `vmbackend` and `vmdef.TCtx` and is another preparation for splitting up the latter. ## Details The new `GenCtx` type replaces `TCtx` as the contextual-state type for the orchestrator. For now, `GenCtx` still stores an instance of `TCtx`, as the latter currently contains the code generator state. In preparation for splitting up `TCtx`, usage of its `idgen` and `functions` fields are removed from `vmbackend.nim`. The ID generator is directly passed to procedures that need one, and the function table (`functions`) is now stored in `GenCtx` -- prior to serializing the VM environment, the function table stored with `GenCtx`'s is moved into the `TCtx` instance. With the new dedicated type for the orchestrator, the collected globals don't have to be stored in `TCtx` anymore. The `collectedGlobals` field is made part of `GenCtx` and is renamed to just `globals`. ### Misc - add the `base` template for `OrdinalSeq` and use it to reduce the noise in the `OrdinalSeq` routines - fix the `add` template for `OrdinalSeq` --- compiler/utils/containers.nim | 27 ++++++-- compiler/vm/vmaux.nim | 12 ---- compiler/vm/vmbackend.nim | 122 +++++++++++++++++++--------------- compiler/vm/vmdef.nim | 3 - 4 files changed, 87 insertions(+), 77 deletions(-) diff --git a/compiler/utils/containers.nim b/compiler/utils/containers.nim index 3950d1281e0..610d85fda9e 100644 --- a/compiler/utils/containers.nim +++ b/compiler/utils/containers.nim @@ -143,18 +143,31 @@ func merge*[I; T](dst: var Store[I, T], src: sink Store[I, T]): Option[I] = # ---------- OrdinalSeq API ------------ +template base*[I; T](x: OrdinalSeq[I, T]): seq[T] = + ## Returns underlying seq of `x`. + seq[T](x) + template len*[I; T](x: OrdinalSeq[I, T]): int = - (seq[T])(x).len + base(x).len template `[]`*[I; T](x: OrdinalSeq[I, T], i: I): untyped = - (seq[T])(x)[ord i] + base(x)[ord i] template `[]=`*[I; T](x: OrdinalSeq[I, T], i: I, item: T): untyped = - (seq[T])(x)[ord i] = item + base(x)[ord i] = item -func add*[I; T](x: OrdinalSeq[I, T], item: sink T): I {.inline.} = - (seq[T])(x).add item - result = I(x.high) +func add*[I; T](x: var OrdinalSeq[I, T], item: sink T): I {.inline.} = + base(x).add item + result = I(base(x).high) func newSeq*[I; T](x: var OrdinalSeq[I, T], len: int) {.inline.} = - newSeq((seq[T])(x), len) \ No newline at end of file + newSeq(base(x), len) + +func setLen*[I; T](x: var OrdinalSeq[I, T], len: int) {.inline.} = + setLen(base(x), len) + +iterator pairs*[I; T](x: OrdinalSeq[I, T]): (I, lent T) = + var i = 0 + while i < x.len: + yield (I(i), x[I(i)]) + inc i diff --git a/compiler/vm/vmaux.nim b/compiler/vm/vmaux.nim index ba9f0c2886d..7c79e76ed82 100644 --- a/compiler/vm/vmaux.nim +++ b/compiler/vm/vmaux.nim @@ -142,18 +142,6 @@ func fillProcEntry*(e: var FuncTableEntry, info: CodeInfo) {.inline.} = e.start = info.start e.regCount = info.regCount.uint16 -proc registerProc*(c: var TCtx, prc: PSym): FunctionIndex = - ## Registers the procedure in the function table if it wasn't already. In - ## both cases, an index into the function table is returned. Whether a - ## procedure will resolve to a callback is decided on it's addition to the - ## function table - let next = LinkIndex(c.functions.len) - - result = c.symToIndexTbl.mgetOrPut(prc.id, next).FunctionIndex - if result == next.FunctionIndex: - # a new entry: - c.functions.add(initProcEntry(c, prc)) - proc lookupProc*(c: var TCtx, prc: PSym): FunctionIndex {.inline.} = ## Returns the function-table index corresponding to the provided `prc` ## symbol. Behaviour is undefined if `prc` has no corresponding function- diff --git a/compiler/vm/vmbackend.nim b/compiler/vm/vmbackend.nim index 8c083ed6d2a..48397444d1b 100644 --- a/compiler/vm/vmbackend.nim +++ b/compiler/vm/vmbackend.nim @@ -67,6 +67,17 @@ type PartialTbl = Table[int, CodeFragment] ## Maps the symbol ID of a partial procedure to the in-progress fragment + GenCtx = object + ## State of the orchestrator. + graph: ModuleGraph + + # link tables: + globals: OrdinalSeq[LinkIndex, PVmType] + functions: OrdinalSeq[LinkIndex, FuncTableEntry] + # for constants, the discovery data is re-used + + gen: TCtx ## only used for the code generator state + func growBy[T](x: var seq[T], n: Natural) {.inline.} = x.setLen(x.len + n) @@ -91,22 +102,25 @@ proc registerCallbacks(c: var TCtx) = # Used by some tests cb "stdlib.system.getOccupiedMem" +proc registerProc(c: var GenCtx, prc: PSym): FunctionIndex = + ## Adds an empty function-table entry for `prc` and registers the latter + ## in the link table. + assert prc.id notin c.gen.symToIndexTbl + let idx = c.functions.add(initProcEntry(c.gen, prc)) + c.gen.symToIndexTbl[prc.id] = idx + result = FunctionIndex(idx) + proc appendCode(c: var TCtx, f: CodeFragment) = ## Copies the code from the fragment to the end of the global code buffer c.code.add(f.code) c.debug.add(f.debug) -proc refresh(c: var TCtx, m: Module) = - ## Prepares the code-generator state of `c` for processing AST - ## belonging to the module `m`. - assert m.idgen != nil - c.refresh(m.sym, m.idgen) - -proc generateCodeForProc(c: var TCtx, s: PSym, body: sink MirFragment): CodeInfo = +proc generateCodeForProc(c: var TCtx, idgen: IdGenerator, s: PSym, + body: sink MirFragment): CodeInfo = ## Generates and the bytecode for the procedure `s` with body `body`. The ## resulting bytecode is emitted into the global bytecode section. let - body = generateIR(c.graph, c.idgen, s, body) + body = generateIR(c.graph, idgen, s, body) r = genProc(c, s, body) if r.isOk: @@ -130,35 +144,34 @@ proc genStmt(c: var TCtx, f: var CodeFragment, stmt: CgNode) = if unlikely(r.isErr): c.config.localReport(vmGenDiagToLegacyReport(r.takeErr)) -proc declareGlobal(c: var TCtx, sym: PSym) = +proc declareGlobal(c: var GenCtx, sym: PSym) = # we silently ignore imported globals here and let ``vmgen`` raise an # error when one is accessed if exfNoDecl notin sym.extFlags and sfImportc notin sym.flags: # make sure the type is generated and register the global in the # link table - discard getOrCreate(c, sym.typ) - c.symToIndexTbl[sym.id] = LinkIndex(c.collectedGlobals.len) - c.collectedGlobals.add sym + c.gen.symToIndexTbl[sym.id] = c.globals.add(getOrCreate(c.gen, sym.typ)) -proc prepare(c: var TCtx, data: var DiscoveryData) = +proc prepare(c: var GenCtx, data: var DiscoveryData) = ## Registers with the link table all procedures, constants, globals, ## and threadvars discovered as part of producing the currently ## processed event. c.functions.setLen(data.procedures.len) for i, it in peek(data.procedures): - c.functions[i] = c.initProcEntry(it) - c.symToIndexTbl[it.id] = LinkIndex(i) + let idx = LinkIndex(i) + c.functions[idx] = c.gen.initProcEntry(it) + c.gen.symToIndexTbl[it.id] = idx # if a procedure's implementation is overridden with a VM callback, we # don't want any processing to happen for it, which we signal to the # event producer via ``exfNoDecl`` - if c.functions[i].kind == ckCallback: + if c.functions[idx].kind == ckCallback: it.extFlags.incl exfNoDecl # register the constants with the link table: for i, s in visit(data.constants): - c.symToIndexTbl[s.id] = LinkIndex(i) + c.gen.symToIndexTbl[s.id] = LinkIndex(i) for _, s in visit(data.globals): declareGlobal(c, s) @@ -166,12 +179,13 @@ proc prepare(c: var TCtx, data: var DiscoveryData) = for _, s in visit(data.threadvars): declareGlobal(c, s) -proc processEvent(c: var TCtx, mlist: ModuleList, discovery: var DiscoveryData, +proc processEvent(c: var GenCtx, mlist: ModuleList, discovery: var DiscoveryData, partial: var PartialTbl, evt: sink BackendEvent) = ## The orchestrator's event processor. prepare(c, discovery) - c.refresh(mlist[evt.module]) + let idgen = mlist[evt.module].idgen + c.gen.module = mlist[evt.module].sym case evt.kind of bekModule: @@ -182,18 +196,18 @@ proc processEvent(c: var TCtx, mlist: ModuleList, discovery: var DiscoveryData, # it's a fragment that was just started p.prc = PProc(sym: evt.sym) - let stmt = generateIR(c.graph, c.idgen, evt.sym, evt.body) - genStmt(c, p[], stmt) + let stmt = generateIR(c.graph, idgen, evt.sym, evt.body) + genStmt(c.gen, p[], stmt) of bekProcedure: # a complete procedure became available - let r = generateCodeForProc(c, evt.sym, evt.body) - fillProcEntry(c.functions[c.symToIndexTbl[evt.sym.id]], r) + let r = generateCodeForProc(c.gen, idgen, evt.sym, evt.body) + fillProcEntry(c.functions[c.gen.symToIndexTbl[evt.sym.id]], r) of bekImported: # not supported at the moment; ``vmgen`` is going to raise an # error when generating a call to a dynlib procedure discard "ignore" -proc generateAliveProcs(c: var TCtx, config: BackendConfig, +proc generateAliveProcs(c: var GenCtx, config: BackendConfig, discovery: var DiscoveryData, mlist: var ModuleList) = ## Generates and emits the bytecode for all alive procedure (excluding the ## entry point). @@ -206,14 +220,14 @@ proc generateAliveProcs(c: var TCtx, config: BackendConfig, # finish the partial procedures: for s, frag in partial.mpairs: let - start = c.code.len + start = c.gen.code.len rc = frag.prc.regInfo.len - c.appendCode(frag) - c.gABC(unknownLineInfo, opcRet) + c.gen.appendCode(frag) + c.gen.gABC(unknownLineInfo, opcRet) let id = registerProc(c, frag.prc.sym) - fillProcEntry(c.functions[id.int]): (start: start, regCount: rc) + fillProcEntry(c.functions[id.LinkIndex]): (start: start, regCount: rc) frag.prc.sym.ast[bodyPos] = newNode(nkStmtList) @@ -221,17 +235,19 @@ proc generateAliveProcs(c: var TCtx, config: BackendConfig, # already reset(frag) -proc generateCodeForMain(c: var TCtx, config: BackendConfig, +proc generateCodeForMain(c: var GenCtx, config: BackendConfig, modules: var ModuleList): FunctionIndex = ## Generate, emits, and links in the main procedure (the entry point). - let prc = generateMainProcedure(c.graph, mainModule(modules).idgen, modules) - var p = preprocess(config, prc, c.graph, c.idgen) - process(p, c.graph, c.idgen) + let + idgen = mainModule(modules).idgen + prc = generateMainProcedure(c.graph, idgen, modules) + var p = preprocess(config, prc, c.graph, idgen) + process(p, c.graph, idgen) result = registerProc(c, prc) - let r = generateCodeForProc(c, prc, p.body) - fillProcEntry(c.functions[result.int], r) + let r = generateCodeForProc(c.gen, idgen, prc, p.body) + fillProcEntry(c.functions[result.LinkIndex], r) func storeExtra(enc: var PackedEncoder, dst: var PackedEnv, routineSymLookup: sink Table[int, LinkIndex], @@ -264,14 +280,16 @@ proc generateCode*(g: ModuleGraph, mlist: sink ModuleList) = conf = g.config bconf = BackendConfig(noImported: true, options: {goIsNimvm}) - var c = TCtx(config: g.config, cache: g.cache, graph: g, idgen: g.idgen, - mode: emStandalone) + var c = + GenCtx(graph: g, + gen: TCtx(config: g.config, cache: g.cache, graph: g, + mode: emStandalone)) - c.typeInfoCache.init() + c.gen.typeInfoCache.init() # register the extra ops so that code generation isn't performed for the # corresponding procs: - registerCallbacks(c) + registerCallbacks(c.gen) # generate code for all alive routines: var discovery: DiscoveryData @@ -279,35 +297,29 @@ proc generateCode*(g: ModuleGraph, mlist: sink ModuleList) = let entryPoint = generateCodeForMain(c, bconf, mlist) - c.gABC(unknownLineInfo, opcEof) + c.gen.gABC(unknownLineInfo, opcEof) # ----- code generation is finished - # collect globals and `const`s: - # XXX: these two steps could be combined with storing into `PackedEnv`. - # Pros: no need for the `globals` and `consts` seqs - # Cons: (probably) higher I-cache pressure, slightly more complex logic - - var globals = newSeq[PVmType](c.collectedGlobals.len) - for i, sym in c.collectedGlobals.pairs: - let typ = c.typeInfoCache.lookup(conf, sym.typ) - # the type was already created during vmgen - globals[i] = typ.unsafeGet - + # produce a list with the type of each constant: var consts = newSeq[(PVmType, PNode)](discovery.constants.len) for i, sym in all(discovery.constants): - let typ = c.typeInfoCache.lookup(conf, sym.typ) + let typ = c.gen.typeInfoCache.lookup(conf, sym.typ) consts[i] = (typ.unsafeGet, sym.ast) + # put the finished function table into the ``TCtx`` object for the encoder + # to pack it + c.gen.functions = move base(c.functions) + # pack the data and write it to the ouput file: var enc: PackedEncoder env: PackedEnv - enc.init(c.types) - storeEnv(enc, env, c) - storeExtra(enc, env, c.symToIndexTbl, consts, globals) - env.code = move c.code + enc.init(c.gen.types) + storeEnv(enc, env, c.gen) + storeExtra(enc, env, c.gen.symToIndexTbl, consts, base(c.globals)) + env.code = move c.gen.code env.entryPoint = entryPoint let err = writeToFile(env, prepareToWriteOutput(conf)) diff --git a/compiler/vm/vmdef.nim b/compiler/vm/vmdef.nim index 530d54a1ef6..4d629bf8e3e 100644 --- a/compiler/vm/vmdef.nim +++ b/compiler/vm/vmdef.nim @@ -735,9 +735,6 @@ type ## dependencies. Expanded during code-generation and used for looking ## up the link-index (e.g. `FunctionIndex`) of a symbol - collectedGlobals*: seq[PSym] - ## leaked implementation detail of ``vmbackend.nim`` -- don't use - flags*: set[CodeGenFlag] ## flags that alter the behaviour of the code ## generator. Initialized by the VM's callsite and queried by the JIT. # XXX: `flags` is code generator / JIT state, and needs to be moved out of