Skip to content

Commit

Permalink
internal: prepare for code generator improvements (#852)
Browse files Browse the repository at this point in the history
## Summary

Introduce the `Body` data type, meant for storing a self-contained
procedure body, and integrate it into each code generator. While only
the procedure's code is stored in `Body` so far, the idea is to have it
store all other body-related input data once moving to a data-oriented
design for the IR.

## Details

#### Architectural changes

* the code generators now require a `Body` instead of a free-standing
  `CgNode` as input, of which they take ownership
* `generateIR` returns a `Body` instance instead of a free-standing
  `CgNode` 

Each code generator stores the `Body` in its internal procedure context
type.

#### Additional C and JS code generator changes

* merge each partial body into the procedure's total body. While not
  necessary at the moment, it will be, once `Body` starts storing more
  than just the `CgNode`

#### Additional VM code generator changes

* `PProc` is turned into a non-`ref` object type, made private, and
  renamed to `BProc` (the name also used by the C code generator)
* `bestEffort` is turned from a procedure into a field of `BProc`,
  allowing the register allocation procedures to take a `BProc` as
  input, instead of the broader `TCtx`
* the setup of `BProc` plus initial address-taken analysis is moved into
  a common procedure, making procedure and expression/statement code
  generation more uniform
* both `genExpr` and `genStmt` now return the required amount of
  registers, instead of the callsite having to manually query the
  value

Instead of incrementally generating the code for partial procedures,
`vmbackend` accumulates the `CgNode` code and only invokes the code
generator once the procedures are complete, which greatly simplifies
the partial-procedure-related logic
  • Loading branch information
zerbina authored Aug 23, 2023
1 parent e83f27d commit d202855
Show file tree
Hide file tree
Showing 13 changed files with 154 additions and 148 deletions.
2 changes: 1 addition & 1 deletion compiler/backend/backends.nim
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ proc process(body: var MirFragment, ctx: PSym, graph: ModuleGraph,
injectDestructorCalls(graph, idgen, ctx, body.tree, body.source)

proc generateIR*(graph: ModuleGraph, idgen: IdGenerator, owner: PSym,
code: sink MirFragment): CgNode =
code: sink MirFragment): Body =
## Translates the MIR code provided by `code` into ``CgNode`` IR and,
## if enabled, echoes the result.
result = generateIR(graph, idgen, owner, code.tree, code.source)
Expand Down
22 changes: 12 additions & 10 deletions compiler/backend/cbackend.nim
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ type
InlineProc = object
## Information about an inline procedure.
sym: PSym
body: CgNode
body: Body
## the fully processed body of the procedure

deps: PackedSet[uint32]
Expand Down Expand Up @@ -223,12 +223,12 @@ proc processEvent(g: BModuleList, inl: var InliningData, discovery: var Discover

var p = getOrDefault(partial, evt.sym)
if p == nil:
p = startProc(g.modules[evt.module.int], evt.sym)
p = startProc(g.modules[evt.module.int], evt.sym, Body())
partial[evt.sym] = p

let body = generateIR(g.graph, bmod.idgen, evt.sym, evt.body)
# emit into the procedure:
genStmts(p, body)
genStmts(p, merge(p.body, body))

processLate(bmod, discovery, inl, evt.module, inlineId)
of bekProcedure:
Expand All @@ -243,7 +243,7 @@ proc processEvent(g: BModuleList, inl: var InliningData, discovery: var Discover

# we can't generate with ``genProc`` because we still need to output
# the mangled names
genStmts(p, body)
genStmts(p, p.body.code)
writeMangledLocals(p)
let r = finishProc(p, evt.sym)

Expand Down Expand Up @@ -272,7 +272,7 @@ proc emit(m: BModule, inl: InliningData, prc: InlineProc, r: var Rope) =
for dep in prc.deps.items:
emit(m, inl, inl.inlineProcs[dep], r)

assert prc.body != nil, "missing body"
assert prc.body.code != nil, "missing body"
# conservatively emit a prototype for all procedures to make sure that
# recursive procedures work:
genProcPrototype(m, prc.sym)
Expand Down Expand Up @@ -317,10 +317,6 @@ proc generateHeader(g: BModuleList, inl: InliningData, data: DiscoveryData,
proc generateCodeForMain(m: BModule, modules: ModuleList) =
## Generates and emits the C code for the program's or library's entry
## point.
let p = newProc(nil, m)
# we don't want error or stack-trace code in the main procedure:
p.flags.incl nimErrorFlagDisabled
p.options = {}

# generate the body:
let body = newNode(nkStmtList)
Expand All @@ -332,7 +328,13 @@ proc generateCodeForMain(m: BModule, modules: ModuleList) =
generateTeardown(m.g.graph, modules, body)

# now generate the C code for the body:
genStmts(p, canonicalize(m.g.graph, m.idgen, m.module, body, {}))
let p = newProc(nil, m)
# we don't want error or stack-trace code in the main procedure:
p.flags.incl nimErrorFlagDisabled
p.options = {}
p.body = canonicalize(m.g.graph, m.idgen, m.module, body, {})

genStmts(p, p.body.code)
var code: string
code.add(p.s(cpsLocals))
code.add(p.s(cpsInit))
Expand Down
11 changes: 6 additions & 5 deletions compiler/backend/cgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -805,8 +805,9 @@ proc allPathsAsgnResult(n: CgNode): InitResultEnum =
proc isNoReturn(m: BModule; s: PSym): bool {.inline.} =
sfNoReturn in s.flags and m.config.exc != excGoto

proc startProc*(m: BModule, prc: PSym; procBody: CgNode = nil): BProc =
proc startProc*(m: BModule, prc: PSym; procBody: sink Body): BProc =
var p = newProc(prc, m)
p.body = procBody
assert(prc.ast != nil)
fillProcLoc(m, prc) # ensure that a loc exists
if m.procs[prc].params.len == 0:
Expand All @@ -833,8 +834,8 @@ proc startProc*(m: BModule, prc: PSym; procBody: CgNode = nil): BProc =
# global is either 'nil' or points to valid memory and so the RC operation
# succeeds without touching not-initialized memory.
if sfNoInit in prc.flags: discard
elif procBody != nil and
allPathsAsgnResult(procBody) == InitSkippable: discard
elif p.body.code != nil and
allPathsAsgnResult(p.body.code) == InitSkippable: discard
else:
resetLoc(p, p.params[0])
if skipTypes(res.typ, abstractInst).kind == tyArray:
Expand Down Expand Up @@ -919,11 +920,11 @@ proc finishProc*(p: BProc, prc: PSym): string =

result = generatedProc

proc genProc*(m: BModule, prc: PSym, procBody: CgNode): Rope =
proc genProc*(m: BModule, prc: PSym, procBody: sink Body): Rope =
## Generates the code for the procedure `prc`, where `procBody` is the code
## of the body with all applicable lowerings and transformation applied.
let p = startProc(m, prc, procBody)
genStmts(p, procBody)
genStmts(p, p.body.code)
result = finishProc(p, prc)

proc genProcPrototype(m: BModule, sym: PSym) =
Expand Down
1 change: 1 addition & 0 deletions compiler/backend/cgendata.nim
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ type
withinBlockLeaveActions*: int ## complex to explain
sigConflicts*: CountTable[string]

body*: Body ## the procedure's full body
locals*: SymbolMap[TLoc] ## the locs for all locals of the procedure

TTypeSeq* = seq[PType]
Expand Down
26 changes: 25 additions & 1 deletion compiler/backend/cgir.nim
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,11 @@ type
# future direction: move to a single-sequence-based, data-oriented design
# for the code-generator IR

Body* = object
## A self-contained CG IR fragment. This is usually the full body of a
## procedure.
code*: CgNode

func len*(n: CgNode): int {.inline.} =
n.kids.len

Expand Down Expand Up @@ -211,4 +216,23 @@ proc newNode*(kind: CgNodeKind; info = unknownLineInfo;
proc newOp*(kind: CgNodeKind; info: TLineInfo, typ: PType,
opr: sink CgNode): CgNode =
result = CgNode(kind: kind, info: info, typ: typ)
result.operand = opr
result.operand = opr

proc merge*(dest: var Body, source: Body): CgNode =
## Merges `source` into `dest` by appending the former to the latter.
## Returns the node representing the code from `source` after it
## was merged.
result = source.code

if dest.code == nil:
# make things easier by supporting `dest` being uninitialized
dest.code = source.code
elif source.code.kind != cnkEmpty:
case dest.code.kind
of cnkEmpty:
dest.code = source.code
of cnkStmtList:
dest.code.kids.add source.code
else:
dest.code = newStmt(cnkStmtList, dest.code.info,
[dest.code, source.code])
4 changes: 2 additions & 2 deletions compiler/backend/cgirgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1358,11 +1358,11 @@ proc tb(tree: TreeWithSource, cl: var TranslateCl, start: NodePosition): CgNode


proc generateIR*(graph: ModuleGraph, idgen: IdGenerator, owner: PSym,
tree: sink MirTree, sourceMap: sink SourceMap): CgNode =
tree: sink MirTree, sourceMap: sink SourceMap): Body =
## Generates the ``CgNode`` IR corresponding to the input MIR code (`tree`),
## using `idgen` for provide new IDs when creating symbols. `sourceMap`
## must be the ``SourceMap`` corresponding to `tree` and is used as the
## provider for source position information
var cl = TranslateCl(graph: graph, idgen: idgen, cache: graph.cache,
owner: owner)
tb(TreeWithSource(tree: tree, map: sourceMap), cl, NodePosition 0)
Body(code: tb(TreeWithSource(tree: tree, map: sourceMap), cl, NodePosition 0))
3 changes: 2 additions & 1 deletion compiler/backend/jsbackend.nim
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import
],
compiler/backend/[
backends,
cgir,
jsgen
],
compiler/front/[
Expand Down Expand Up @@ -74,7 +75,7 @@ proc processEvent(g: PGlobals, graph: ModuleGraph, modules: BModuleList,
partial[evt.sym.id] = p

let body = generateIR(graph, bmod.idgen, evt.sym, evt.body)
genStmt(p, body)
genStmt(p, merge(p.fullBody, body))

processLate(g, discovery)
of bekProcedure:
Expand Down
14 changes: 9 additions & 5 deletions compiler/backend/jsgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ type
PProc* = ref TProc
TProc* = object
prc: PSym
fullBody*: Body
## the procedure's full body
locals, body: Rope
options: TOptions
module: BModule
Expand Down Expand Up @@ -2334,9 +2336,10 @@ proc finishProc*(p: PProc): string =
# echo "END generated code for: " & prc.name.s

proc genProc*(g: PGlobals, module: BModule, prc: PSym,
transformedBody: CgNode): Rope =
body: sink Body): Rope =
var p = startProc(g, module, prc)
p.nested: genStmt(p, transformedBody)
p.fullBody = body
p.nested: genStmt(p, p.fullBody.code)
result = finishProc(p)

proc genStmt(p: PProc, n: CgNode) =
Expand Down Expand Up @@ -2523,11 +2526,12 @@ proc genHeader*(): Rope =
var lastJSError = null;
""".unindent.format(VersionAsString))

proc genTopLevelStmt*(globals: PGlobals, m: BModule, n: CgNode) =
m.config.internalAssert(m.module != nil, n.info, "genTopLevelStmt")
proc genTopLevelStmt*(globals: PGlobals, m: BModule, body: sink Body) =
m.config.internalAssert(m.module != nil, body.code.info, "genTopLevelStmt")
var p = newInitProc(globals, m)
p.fullBody = body
p.unique = globals.unique
genStmt(p, n)
genStmt(p, p.fullBody.code)
p.g.code.add(p.locals)
p.g.code.add(p.body)

Expand Down
6 changes: 3 additions & 3 deletions compiler/mir/mirbridge.nim
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,12 @@ proc echoMir*(config: ConfigRef, owner: PSym, tree: MirTree) =
writeBody(config, "-- MIR: " & owner.name.s):
config.writeln(print(tree))

proc echoOutput*(config: ConfigRef, owner: PSym, body: CgNode) =
proc echoOutput*(config: ConfigRef, owner: PSym, body: Body) =
## If requested via the define, renders the output IR `body` and writes the
## result out through ``config.writeLine``.
if config.getStrDefine("nimShowMirOutput") == owner.name.s:
writeBody(config, "-- output AST: " & owner.name.s):
config.writeln(treeRepr(body))
config.writeln(treeRepr(body.code))

proc rewriteGlobalDefs*(body: var MirTree, sourceMap: var SourceMap,
outermost: bool) =
Expand Down Expand Up @@ -168,7 +168,7 @@ proc rewriteGlobalDefs*(body: var MirTree, sourceMap: var SourceMap,
apply(body, prepared)

proc canonicalize*(graph: ModuleGraph, idgen: IdGenerator, owner: PSym,
body: PNode, options: set[GenOption]): CgNode =
body: PNode, options: set[GenOption]): Body =
## Legacy routine. Translates the body `body` of the procedure `owner` to
## MIR code, and the MIR code to ``CgNode`` IR.
echoInput(graph.config, owner, body)
Expand Down
2 changes: 1 addition & 1 deletion compiler/sem/injectdestructors.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1325,7 +1325,7 @@ proc injectDestructorCalls*(g: ModuleGraph; idgen: IdGenerator; owner: PSym;
# the MIR code wouldn't be very useful, so we turn it into backend IR
# first, which we then render to text
# XXX: this needs a deeper rethink
let n = generateIR(g, idgen, owner, tree, sourceMap)
let n = generateIR(g, idgen, owner, tree, sourceMap).code
g.config.msgWrite("--expandArc: " & owner.name.s & "\n")
g.config.msgWrite(render(n))
g.config.msgWrite("\n-- end of expandArc ------------------------\n")
67 changes: 19 additions & 48 deletions compiler/vm/vmbackend.nim
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,13 @@ from compiler/ast/report_enums import ReportKind
import std/options as stdoptions

type
CodeFragment = object
## The state required for generating code in multiple steps.
## `CodeFragment` helps when generating code for multiple procedures in
## an interleaved manner.
prc: PProc
code: seq[TInstr]
debug: seq[TLineInfo]
PartialProc = object
## The in-progress body of a procedure.
sym: PSym
body: Body

PartialTbl = Table[int, CodeFragment]
## Maps the symbol ID of a partial procedure to the in-progress fragment
PartialTbl = Table[int, PartialProc]
## Maps the symbol ID of a partial procedure to its in-progress body

GenCtx = object
## State of the orchestrator.
Expand Down Expand Up @@ -121,11 +118,6 @@ proc registerProc(c: var GenCtx, prc: PSym): FunctionIndex =
setLinkIndex(c, prc, idx)
result = FunctionIndex(idx)

proc appendCode(c: var CodeGenCtx, f: CodeFragment) =
## Copies the code from the fragment to the end of the global code buffer
c.code.add(f.code)
c.debug.add(f.debug)

proc generateCodeForProc(c: var CodeGenCtx, idgen: IdGenerator, s: PSym,
body: sink MirFragment): CodeInfo =
## Generates and the bytecode for the procedure `s` with body `body`. The
Expand All @@ -139,22 +131,6 @@ proc generateCodeForProc(c: var CodeGenCtx, idgen: IdGenerator, s: PSym,
else:
c.config.localReport(vmGenDiagToLegacyReport(r.takeErr))

proc genStmt(c: var CodeGenCtx, f: var CodeFragment, stmt: CgNode) =
## Generates and emits the code for a statement into the fragment `f`.
template swapState() =
swap(c.code, f.code)
swap(c.debug, f.debug)
swap(c.prc, f.prc)

# in order to generate code into the fragment, the fragment's state is
# swapped with `c`'s
swapState()
let r = genStmt(c, stmt)
swapState() # swap back

if unlikely(r.isErr):
c.config.localReport(vmGenDiagToLegacyReport(r.takeErr))

proc declareGlobal(c: var GenCtx, sym: PSym) =
# we silently ignore imported globals here and let ``vmgen`` raise an
# error when one is accessed
Expand Down Expand Up @@ -202,13 +178,8 @@ proc processEvent(c: var GenCtx, mlist: ModuleList, discovery: var DiscoveryData
of bekModule:
discard "nothing to do"
of bekPartial:
let p = addr mgetOrPut(partial, evt.sym.id, CodeFragment())
if p.prc == nil:
# it's a fragment that was just started
p.prc = PProc(sym: evt.sym)

let stmt = generateIR(c.graph, idgen, evt.sym, evt.body)
genStmt(c.gen, p[], stmt)
let p = addr mgetOrPut(partial, evt.sym.id, PartialProc(sym: evt.sym))
discard merge(p.body): generateIR(c.graph, idgen, evt.sym, evt.body)
of bekProcedure:
# a complete procedure became available
let r = generateCodeForProc(c.gen, idgen, evt.sym, evt.body)
Expand All @@ -228,23 +199,23 @@ proc generateAliveProcs(c: var GenCtx, config: BackendConfig,
for evt in process(c.graph, mlist, discovery, MagicsToKeep, config):
processEvent(c, mlist, discovery, partial, evt)

# finish the partial procedures:
for s, frag in partial.mpairs:
# generate the bytecode for the partial procedures:
for _, p in partial.mpairs:
let
start = c.gen.code.len
rc = frag.prc.regInfo.len

c.gen.appendCode(frag)
c.gen.gABC(unknownLineInfo, opcRet)
id = registerProc(c, p.sym)
r = genProc(c.gen, p.sym, move p.body)

let id = registerProc(c, frag.prc.sym)
fillProcEntry(c.functions[id.LinkIndex]): (start: start, regCount: rc)
if r.isOk:
fillProcEntry(c.functions[id.LinkIndex]): r.unsafeGet
else:
c.gen.config.localReport(vmGenDiagToLegacyReport(r.takeErr))

frag.prc.sym.ast[bodyPos] = newNode(nkStmtList)
# mark as non-empty:
p.sym.ast[bodyPos] = newNode(nkStmtList)

# the fragment isn't used beyond this point anymore, so it can be freed
# already
reset(frag)
reset(p)

proc generateCodeForMain(c: var GenCtx, config: BackendConfig,
modules: var ModuleList): FunctionIndex =
Expand Down
Loading

0 comments on commit d202855

Please sign in to comment.