Skip to content

Commit

Permalink
use whole-program code-generation for all backends (#712)
Browse files Browse the repository at this point in the history
## Summary

Change the C and JavaScript backends to use whole-program code-
generation in the same way that the VM and IC backends do. In short,
this means that code generation is now only run after *all* modules part
of the program were semantically analyzed.

This brings the architecture of the C and JS backends closer to that of
the VM backend, and is the first step towards unifying the backend
processing.

The final goal is to have all backends process code in the same way,
with as much as possible of the pre-processing currently performed by
the code generators being moved to a shared, backend-agnostic layer.

## Details

Generalize the `ModuleGraph` pass that the VM backend used for gathering
the AST of each alive module, and move it into its own module. Compared
to the original implementation, the generalized pass:
* doesn't drop declarative nodes (the VM backend doesn't care about
  them, but the C and JS backends do)
* remembers the `IdGenerator` associated with each module
* doesn't introduce its own module order (that's left to the backend)
* uses the more descriptive `SeqMap` instead of a raw `seq`

`vmbackend` is adjusted to work with the generalized collection pass:
the module list produced by the pass is translated into the structure
that the rest of the VM backend still expects, but a `Store` with a
dedicated ID type is now used instead of a raw `seq` (preparing for
future improvements).

Using the same naming scheme as `vmbackend`, the modules `cbackend`
and `jsbackend` are introduced -- they implement the code-generation
orchestrators for the C and JavaScript backends, respectively. The
`passes` integration is removed from `cgen` and `jsgen`, as invoking the
code generators is now the responsibility of the orchestrators.

The new orchestrators take the module list produced by the collector
pass and generate the code for it (with `jsbackend` also writing the
output to disk already). They're very basic at the moment, but in the
future will take on similar responsibilities as `vmbackend` does
for the VM backend (e.g., dead-code elimination, running `transf`,
etc.).

Since forwarded procedures don't reach the code generators anymore, the
special handling for them is removed.

### Known Issues

Changes to options performed by `.push` having an effect on top-level
code relied on semantic analysis and code generation happening in a
pipelined manner.

Since this is no longer the case, disabling or enabling checks for top-
level code stops working for now, but the plan is to bring this feature
back in the future.
  • Loading branch information
zerbina authored May 20, 2023
1 parent 1609fa2 commit f6b9d84
Show file tree
Hide file tree
Showing 11 changed files with 397 additions and 239 deletions.
73 changes: 73 additions & 0 deletions compiler/backend/cbackend.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
## The code-generation orchestrator for the C backend. It generates the C code
## for the semantically analysed AST of the whole progam by invoking ``cgen``.
##
## The general direction is to move more logic out of the code generator (such
## as figuring out the set of alive procedures) and into the orchestrator,
## leaving only the core of code generation to ``cgen``.

import
compiler/ast/[
ast
],
compiler/backend/[
cgen,
cgendata,
collectors,
extccomp
],
compiler/front/[
options
],
compiler/modules/[
modulegraphs
],
compiler/utils/[
containers,
pathutils
]

from compiler/sem/passes import skipCodegen

proc generateCode*(graph: ModuleGraph, mlist: sink ModuleList) =
## Entry point for C code-generation. Only the C code is generated -- nothing
## is written to disk yet.
let
config = graph.config

var g = newModuleList(graph)

# first create a module list entry for each input module. This has to happen
# *before* the code generator is invoked.
for key, val in mlist.modules.pairs:
let m = newModule(g, val.sym, config)
m.idgen = val.idgen

# setup the module for the generated header, if required:
if optGenIndex in config.globalOptions:
let f = if config.headerFile.len > 0: AbsoluteFile config.headerFile
else: config.projectFull
g.generatedHeader = rawNewModule(g, mlist.modules[config.projectMainIdx2].sym,
changeFileExt(completeCfilePath(config, f), hExt))
incl g.generatedHeader.flags, isHeaderFile

# the main part: invoke the code generator for all top-level code
for index in mlist.modulesClosed.items:
let
m {.cursor.} = mlist.modules[index]
bmod = g.modules[index.int]

# pass all top-level code to the code generator:
for it in m.stmts.items:
if not skipCodegen(bmod.config, it):
genTopLevelStmt(bmod, it)

# wrap up the main part of code generation for the module. Note that this
# doesn't mean that they're closed for writing; invoking the code generator
# for other modules' code can still add new code to this module's sections
finalCodegenActions(graph, g.modules[index.int], newNode(nkStmtList))

# the callsite still expects `graph.backend` to point to the ``BModuleList``
# so that ``cgenWriteModules`` can query it
# XXX: this is the wrong approach -- the code generator must not be
# responsible for writing the generated C translation units to disk.
graph.backend = g
105 changes: 22 additions & 83 deletions compiler/backend/cgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ import
idioms
],
compiler/sem/[
passes,
rodutils,
aliases,
lowerings,
Expand All @@ -76,9 +75,10 @@ from compiler/ast/reports_sem import SemReport,
reportTyp
from compiler/ast/report_enums import ReportKind

# XXX: the code-generator should not need to know about the existance of
# XXX: the code-generator should not need to know about the existence of
# destructor injections (or destructors, for that matter)
from compiler/sem/injectdestructors import deferGlobalDestructor
from compiler/sem/passes import moduleHasChanged # XXX: leftover dependency

import std/strutils except `%`, addf # collides with ropes.`%`

Expand Down Expand Up @@ -108,9 +108,6 @@ const NonMagics* = {mNone, mIsolate, mNewSeq, mSetLengthSeq, mAppendSeqElem}
## magics that are treated like normal procedures by the code generator.
## This set only applies when using the new runtime.

proc addForwardedProc(m: BModule, prc: PSym) =
m.g.forwardedProcs.add(prc)

proc findPendingModule(m: BModule, s: PSym): BModule =
let ms = s.itemId.module #getModule(s)
result = m.g.modules[ms]
Expand Down Expand Up @@ -1174,18 +1171,18 @@ proc requestConstImpl(p: BProc, sym: PSym) =
proc isActivated(prc: PSym): bool = prc.typ != nil

proc genProc(m: BModule, prc: PSym) =
if sfBorrow in prc.flags or not isActivated(prc): return
if sfForward in prc.flags:
addForwardedProc(m, prc)
fillProcLoc(m, prc.ast[namePos])
else:
genProcNoForward(m, prc)
if {sfExportc, sfCompilerProc} * prc.flags == {sfExportc} and
m.g.generatedHeader != nil and lfNoDecl notin prc.loc.flags:
genProcPrototype(m.g.generatedHeader, prc)
if prc.typ.callConv == ccInline:
if not containsOrIncl(m.g.generatedHeader.declaredThings, prc.id):
genProcAux(m.g.generatedHeader, prc)
# unresolved borrows or forward declarations must not reach here
assert {sfBorrow, sfForward} * prc.flags == {}
assert isActivated(prc)
genProcNoForward(m, prc)
if {sfExportc, sfCompilerProc} * prc.flags == {sfExportc} and
m.g.generatedHeader != nil and lfNoDecl notin prc.loc.flags:
# XXX: don't populate the generated header from inside the code
# generator -- make it a responsibility of the orchestrator
genProcPrototype(m.g.generatedHeader, prc)
if prc.typ.callConv == ccInline:
if not containsOrIncl(m.g.generatedHeader.declaredThings, prc.id):
genProcAux(m.g.generatedHeader, prc)

proc genVarPrototype(m: BModule, n: PNode) =
#assert(sfGlobal in sym.flags)
Expand Down Expand Up @@ -1586,7 +1583,7 @@ proc initProcOptions(m: BModule): TOptions =
let opts = m.config.options
if sfSystemModule in m.module.flags: opts-{optStackTrace} else: opts

proc rawNewModule(g: BModuleList; module: PSym, filename: AbsoluteFile): BModule =
proc rawNewModule*(g: BModuleList; module: PSym, filename: AbsoluteFile): BModule =
new(result)
result.g = g
result.tmpBase = rope("TM" & $hashOwner(module) & "_")
Expand Down Expand Up @@ -1629,25 +1626,6 @@ proc newModule*(g: BModuleList; module: PSym; conf: ConfigRef): BModule =
#growCache g.modules, module.position
g.modules[module.position] = result

template injectG() {.dirty.} =
if graph.backend == nil:
graph.backend = newModuleList(graph)
let g = BModuleList(graph.backend)

when not defined(nimHasSinkInference):
{.pragma: nosinks.}

proc myOpen(graph: ModuleGraph; module: PSym; idgen: IdGenerator): PPassContext {.nosinks.} =
injectG()
result = newModule(g, module, graph.config)
result.idgen = idgen
if optGenIndex in graph.config.globalOptions and g.generatedHeader == nil:
let f = if graph.config.headerFile.len > 0: AbsoluteFile graph.config.headerFile
else: graph.config.projectFull
g.generatedHeader = rawNewModule(g, module,
changeFileExt(completeCfilePath(graph.config, f), hExt))
incl g.generatedHeader.flags, isHeaderFile

proc writeHeader(m: BModule) =
var result = headerTop()
var guard = "__$1__" % [m.filename.splitFile.name.rope]
Expand All @@ -1670,16 +1648,8 @@ proc writeHeader(m: BModule) =
proc getCFile(m: BModule): AbsoluteFile =
result = changeFileExt(completeCfilePath(m.config, withPackageName(m.config, m.cfilename)), ".nim.c")

when false:
proc myOpenCached(graph: ModuleGraph; module: PSym, rd: PRodReader): PPassContext =
injectG()
var m = newModule(g, module, graph.config)
readMergeInfo(getCFile(m), m)
result = m

proc genTopLevelStmt*(m: BModule; n: PNode) =
## Also called from `ic/cbackend.nim`.
if passes.skipCodegen(m.config, n): return
## Called from `ic/cbackend.nim` and ``backend/cbackend.nim``.
m.initProc.options = initProcOptions(m)
#softRnl = if optLineDir in m.config.options: noRnl else: rnl
# XXX replicate this logic!
Expand All @@ -1689,12 +1659,6 @@ proc genTopLevelStmt*(m: BModule; n: PNode) =

genProcBody(m.initProc, transformedN)

proc myProcess(b: PPassContext, n: PNode): PNode =
result = n
if b != nil:
var m = BModule(b)
genTopLevelStmt(m, n)

proc shouldRecompile(m: BModule; code: Rope, cfile: Cfile): bool =
if optForceFullMake notin m.config.globalOptions:
if not moduleHasChanged(m.g.graph, m.module):
Expand Down Expand Up @@ -1760,7 +1724,7 @@ proc finalCodegenActions*(graph: ModuleGraph; m: BModule; n: PNode) =
# phase ordering problem here: We need to announce this
# dependency to 'nimTestErrorFlag' before system.c has been written to
# disk. We also have to announce the dependency *from* the system module, as
# only there it is certain that all the procedure's dependencies also exist
# only there it is certain that all the procedure's dependencies exist
# already
if sfSystemModule in m.module.flags:
discard cgsym(m, "nimTestErrorFlag")
Expand All @@ -1783,44 +1747,19 @@ proc finalCodegenActions*(graph: ModuleGraph; m: BModule; n: PNode) =
if emulatedThreadVars(m.config) and m.config.target.targetOS != osStandalone:
discard cgsym(m, "initThreadVarsEmulation")

if m.g.forwardedProcs.len == 0:
incl m.flags, objHasKidsValid
incl m.flags, objHasKidsValid
let disp = generateMethodDispatchers(graph)
for x in disp: genProcAux(m, x.sym)

let mm = m
m.g.modulesClosed.add mm


proc myClose(graph: ModuleGraph; b: PPassContext, n: PNode): PNode =
result = n
if b == nil: return
finalCodegenActions(graph, BModule(b), n)

proc genForwardedProcs(g: BModuleList) =
# Forward declared proc:s lack bodies when first encountered, so they're given
# a second pass here
# Note: ``genProcNoForward`` may add to ``forwardedProcs``
while g.forwardedProcs.len > 0:
let
prc = g.forwardedProcs.pop()
m = g.modules[prc.itemId.module]
m.config.internalAssert(sfForward notin prc.flags, prc.info, "still forwarded: " & prc.name.s)

genProcNoForward(m, prc)
# for compatibility, the code generator still manages its own "closed order"
# list, but this should be phased out eventually
m.g.modulesClosed.add m

proc cgenWriteModules*(backend: RootRef, config: ConfigRef) =
let g = BModuleList(backend)
g.config = config

# we need to process the transitive closure because recursive module
# deps are allowed (and the system module is processed in the wrong
# order anyway)
genForwardedProcs(g)

for m in cgenModules(g):
m.writeModule(pending=true)
writeMapping(config, g.mapping)
if g.generatedHeader != nil: writeHeader(g.generatedHeader)

const cgenPass* = makePass(myOpen, myProcess, myClose)
if g.generatedHeader != nil: writeHeader(g.generatedHeader)
4 changes: 2 additions & 2 deletions compiler/backend/cgendata.nim
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ type
mapping*: Rope ## the generated mapping file (if requested)
modules*: seq[BModule] ## list of all compiled modules
modulesClosed*: seq[BModule] ## list of the same compiled modules, but in the order they were closed
forwardedProcs*: seq[PSym] ## proc:s that did not yet have a body
generatedHeader*: BModule
typeInfoMarker*: TypeCacheWithOwner
typeInfoMarkerV2*: TypeCacheWithOwner
Expand All @@ -151,7 +150,8 @@ type
## nimtvDeps is VERY hard to cache because it's
## not a list of IDs nor can it be made to be one.

TCGen = object of PPassContext ## represents a C source file
TCGen = object ## represents a C source file
idgen*: IdGenerator
s*: TCFileSections ## sections of the C file
flags*: set[CodegenFlag]
module*: PSym
Expand Down
82 changes: 82 additions & 0 deletions compiler/backend/collectors.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
## Implements the "collect" pass. This pass gathers the full AST for each
## module into a single structure, which is then meant to be consumed by the
## code-generation orchestrators (``cbackend``, ``jsbackend``, etc.).
##
## This is somewhat similar to the rodfile-based IC backend, but instead of
## reading the modules' content from the rodfiles, it's collected via the pass
## interface.

import
compiler/ast/[
ast,
ast_idgen,
ast_types,
lineinfos
],
compiler/modules/[
modulegraphs
],
compiler/sem/[
passes
],
compiler/utils/[
containers
]

type
FullModule* = object
stmts*: seq[PNode] ## top level statements in the order they were parsed
sym*: PSym ## module symbol
idgen*: IdGenerator

ModuleListRef* = ref ModuleList
ModuleList* = object of RootObj
modules*: SeqMap[FileIndex, FullModule]
modulesClosed*: seq[FileIndex]
## stores the modules in the order they were closed. The first closed
## module comes first, then the next, etc.

ModuleRef = ref object of TPassContext
## The pass context for the VM backend. Represents a reference to a
## module in the module list
list: ModuleListRef
index: FileIndex

func isFilled*(m: FullModule): bool =
# required so that ``FullModule`` is usable as the item type of a ``SeqMap``
m.sym != nil

proc takeModuleList*(graph: ModuleGraph): ModuleList =
## Moves the ``ModuleList`` set up by the collector pass out of the
## `graph.backend` field and returns it.
result = move ModuleListRef(graph.backend)[]
graph.backend = nil

# Below is the `passes` interface implementation

proc myOpen(graph: ModuleGraph, module: PSym, idgen: IdGenerator): PPassContext =
if graph.backend == nil:
graph.backend = ModuleListRef()

let
mlist = ModuleListRef(graph.backend)
pos = module.position.FileIndex

# add an empty entry for the module:
mlist.modules[pos] = FullModule(sym: module, idgen: idgen)

result = ModuleRef(list: mlist, index: pos)

proc myProcess(b: PPassContext, n: PNode): PNode =
result = n
let m = ModuleRef(b)

m.list.modules[m.index].stmts.add(n)

proc myClose(graph: ModuleGraph; b: PPassContext, n: PNode): PNode =
result = myProcess(b, n)

let m = ModuleRef(b)
m.list.modulesClosed.add(m.index)

const collectPass* = makePass(myOpen, myProcess, myClose)
Loading

0 comments on commit f6b9d84

Please sign in to comment.