From ccb2b75bd11c0e165fc683132ddd9b8054fa24cb Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Thu, 18 May 2023 22:25:49 +0100 Subject: [PATCH 01/15] move the 'collect' pass to a dedicated module --- compiler/backend/collectors.nim | 81 +++++++++++++++++++++++++++++++++ compiler/vm/vmbackend.nim | 52 +-------------------- 2 files changed, 83 insertions(+), 50 deletions(-) create mode 100644 compiler/backend/collectors.nim diff --git a/compiler/backend/collectors.nim b/compiler/backend/collectors.nim new file mode 100644 index 00000000000..d11baa0f18a --- /dev/null +++ b/compiler/backend/collectors.nim @@ -0,0 +1,81 @@ +## Implements the "collect" pass. This pass gathers the full AST for each +## module into a single structure, which is then meant to be consumed by the +## code-generation orchestrators. +## +## This is somewhat similar to the rodfile-based IC backend, but instead of +## reading the modules' content from the rodfiles, it's collected via the pass +## interface. + +import + std/[ + tables + ], + compiler/ast/[ + ast, + ast_idgen, + ast_types + ], + compiler/modules/[ + modulegraphs + ], + compiler/sem/[ + passes + ] + +type + Module* = object + stmts*: seq[PNode] ## top level statements in the order they were parsed + sym*: PSym ## module symbol + + ModuleListRef* = ref ModuleList + ModuleList* = object of RootObj + modules*: seq[Module] + modulesClosed*: seq[int] ## indices into `modules` in the order the + ## modules were closed. The first closed module + ## comes first, then the next, etc. + moduleMap: Table[int, int] ## module sym-id -> index into `modules` + + ModuleRef = ref object of TPassContext + ## The pass context for the VM backend. Represents a reference to a + ## module in the module list + list: ModuleListRef + index: int + +# Below is the `passes` interface implementation + +func growBy[T](x: var seq[T], n: Natural) {.inline.} = + x.setLen(x.len + n) + +proc myOpen(graph: ModuleGraph, module: PSym, idgen: IdGenerator): PPassContext = + if graph.backend == nil: + graph.backend = ModuleListRef() + + let + mlist = ModuleListRef(graph.backend) + next = mlist.modules.len + + # append an empty module to the list + mlist.modules.growBy(1) + mlist.modules[next] = Module(sym: module) + mlist.moduleMap[module.id] = next + + result = ModuleRef(list: mlist, index: next) + +proc myProcess(b: PPassContext, n: PNode): PNode = + result = n + let m = ModuleRef(b) + + const declarativeKinds = routineDefs + {nkTypeSection, nkPragma, + nkExportStmt, nkExportExceptStmt, nkFromStmt, nkImportStmt, + nkImportExceptStmt} + + if n.kind notin declarativeKinds: + m.list.modules[m.index].stmts.add(n) + +proc myClose(graph: ModuleGraph; b: PPassContext, n: PNode): PNode = + result = myProcess(b, n) + + let m = ModuleRef(b) + m.list.modulesClosed.add(m.index) + +const collectPass* = makePass(myOpen, myProcess, myClose) diff --git a/compiler/vm/vmbackend.nim b/compiler/vm/vmbackend.nim index a664e3d67c2..65057ce31bf 100644 --- a/compiler/vm/vmbackend.nim +++ b/compiler/vm/vmbackend.nim @@ -68,6 +68,7 @@ type code: seq[TInstr] debug: seq[TLineInfo] + #[ Module = object stmts: seq[PNode] ## top level statements in the order they were parsed sym: PSym ## module symbol @@ -78,20 +79,7 @@ type initGlobalsProc: CodeInfo ## the proc that initializes `{.global.}` ## variables initProc: CodeInfo ## the module init proc (top-level statements) - - ModuleListRef = ref ModuleList - ModuleList = object of RootObj - modules: seq[Module] - modulesClosed: seq[int] ## indices into `modules` in the order the modules - ## were closed. The first closed module comes - ## first, then the next, etc. - moduleMap: Table[int, int] ## module sym-id -> index into `modules` - - ModuleRef = ref object of TPassContext - ## The pass context for the VM backend. Represents a reference to a - ## module in the module list - list: ModuleListRef - index: int + ]# func growBy[T](x: var seq[T], n: Natural) {.inline.} = x.setLen(x.len + n) @@ -483,39 +471,3 @@ proc generateCode*(g: ModuleGraph) = outFilename: conf.absOutFile.string, failureMsg: $err) conf.globalReport(rep) - -# Below is the `passes` interface implementation - -proc myOpen(graph: ModuleGraph, module: PSym, idgen: IdGenerator): PPassContext = - if graph.backend == nil: - graph.backend = ModuleListRef() - - let - mlist = ModuleListRef(graph.backend) - next = mlist.modules.len - - # append an empty module to the list - mlist.modules.growBy(1) - mlist.modules[next] = Module(sym: module) - mlist.moduleMap[module.id] = next - - result = ModuleRef(list: mlist, index: next) - -proc myProcess(b: PPassContext, n: PNode): PNode = - result = n - let m = ModuleRef(b) - - const declarativeKinds = routineDefs + {nkTypeSection, nkPragma, - nkExportStmt, nkExportExceptStmt, nkFromStmt, nkImportStmt, - nkImportExceptStmt} - - if n.kind notin declarativeKinds: - m.list.modules[m.index].stmts.add(n) - -proc myClose(graph: ModuleGraph; b: PPassContext, n: PNode): PNode = - result = myProcess(b, n) - - let m = ModuleRef(b) - m.list.modulesClosed.add(m.index) - -const vmgenPass* = makePass(myOpen, myProcess, myClose) \ No newline at end of file From d1401b8376c5e6f5a603bcbf3b5dec7a787c27e1 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Thu, 18 May 2023 22:25:49 +0100 Subject: [PATCH 02/15] make `vmbackend` compile again The `ModuleList` used by the collector pass is now no longer re-used in the VM backend, but instead translated to a custom representation (see `produceModules`). In addition, the list with the top-level AST is now discarded after it's passed to code generation, which should reduce memory pressure, and thus peak memory usage, quite a bit. --- compiler/backend/collectors.nim | 8 ++-- compiler/front/main.nim | 5 ++- compiler/vm/vmbackend.nim | 80 +++++++++++++++++++++------------ 3 files changed, 58 insertions(+), 35 deletions(-) diff --git a/compiler/backend/collectors.nim b/compiler/backend/collectors.nim index d11baa0f18a..7aa3c530983 100644 --- a/compiler/backend/collectors.nim +++ b/compiler/backend/collectors.nim @@ -23,17 +23,17 @@ import ] type - Module* = object + FullModule* = object stmts*: seq[PNode] ## top level statements in the order they were parsed sym*: PSym ## module symbol ModuleListRef* = ref ModuleList ModuleList* = object of RootObj - modules*: seq[Module] + modules*: seq[FullModule] modulesClosed*: seq[int] ## indices into `modules` in the order the ## modules were closed. The first closed module ## comes first, then the next, etc. - moduleMap: Table[int, int] ## module sym-id -> index into `modules` + moduleMap*: Table[int, int] ## module sym-id -> index into `modules` ModuleRef = ref object of TPassContext ## The pass context for the VM backend. Represents a reference to a @@ -56,7 +56,7 @@ proc myOpen(graph: ModuleGraph, module: PSym, idgen: IdGenerator): PPassContext # append an empty module to the list mlist.modules.growBy(1) - mlist.modules[next] = Module(sym: module) + mlist.modules[next] = FullModule(sym: module) mlist.moduleMap[module.id] = next result = ModuleRef(list: mlist, index: next) diff --git a/compiler/front/main.nim b/compiler/front/main.nim index 53ae06b3ea7..3a28c0f1e2f 100644 --- a/compiler/front/main.nim +++ b/compiler/front/main.nim @@ -42,6 +42,7 @@ import modulegraphs # Project module graph ], compiler/backend/[ + collectors, extccomp, # Calling C compiler cgen, # C code generation ], @@ -245,11 +246,11 @@ proc commandCompileToJS(graph: ModuleGraph) = proc commandCompileToVM(graph: ModuleGraph) = semanticPasses(graph) - registerPass(graph, vmgenPass) + registerPass(graph, collectPass) compileProject(graph) # The VM-backend doesn't use a pass for the actual code generation, but a - # separate function instead (similar to the C-backend for IC) + # separate procedure instead (similar to the C-backend for IC) vmbackend.generateCode(graph) proc interactivePasses(graph: ModuleGraph) = diff --git a/compiler/vm/vmbackend.nim b/compiler/vm/vmbackend.nim index 65057ce31bf..5f1ff376b04 100644 --- a/compiler/vm/vmbackend.nim +++ b/compiler/vm/vmbackend.nim @@ -1,14 +1,12 @@ ## The backend for the VM. The core code-generation is done by `vmgen`; the -## linking bits and artifact creation are implemented here +## linking bits and artifact creation are implemented here. ## ## Executable generation happens in roughly the following steps: -## 1. Collect all modules and their top-level statements via the `passes` -## interface. This is a pure collection step, no further processing is done -## 2. Generate all module init procedures (i.e. code for all top-level +## 1. Generate all module init procedures (i.e. code for all top-level ## statements) -## 3. Iteratively generate code for all alive routines (excluding `method`s) -## 4. Generate the main procedure -## 5. Pack up all required data into `PackedEnv` and write it to the output +## 2. Iteratively generate code for all alive routines (excluding `method`s) +## 3. Generate the main procedure +## 4. Pack up all required data into `PackedEnv` and write it to the output ## file ## ## Similiar to the C and JS backend, dead-code-elimination (DCE) happens as a @@ -24,12 +22,14 @@ import lineinfos, astalgo, # for `getModule` ], + compiler/backend/[ + collectors + ], compiler/front/[ msgs, options ], compiler/sem/[ - passes, transf ], compiler/mir/[ @@ -68,10 +68,8 @@ type code: seq[TInstr] debug: seq[TLineInfo] - #[ Module = object - stmts: seq[PNode] ## top level statements in the order they were parsed - sym: PSym ## module symbol + sym: PSym initGlobalsCode: CodeFragment ## the bytecode of `initGlobalsProc`. Each ## encountered `{.global.}`'s init statement gets code-gen'ed into the @@ -79,7 +77,11 @@ type initGlobalsProc: CodeInfo ## the proc that initializes `{.global.}` ## variables initProc: CodeInfo ## the module init proc (top-level statements) - ]# + + BModuleList = object + modules: seq[Module] + modulesClosed: seq[int] + moduleMap: Table[int, int] func growBy[T](x: var seq[T], n: Natural) {.inline.} = x.setLen(x.len + n) @@ -141,8 +143,8 @@ proc genStmt(c: var TCtx, n: PNode): auto = c.gatherDependencies(n, withGlobals=true) vmgen.genStmt(c, n) -proc generateTopLevelStmts*(module: var Module, c: var TCtx, - config: ConfigRef) = +proc generateTopLevelStmts(c: var TCtx, config: ConfigRef, + module: FullModule): CodeInfo = ## Generates code for all collected top-level statements of `module` and ## compiles the fragments into a single function. The resulting code is ## stored in `module.initProc` @@ -164,7 +166,7 @@ proc generateTopLevelStmts*(module: var Module, c: var TCtx, c.gABC(n, opcRet) - module.initProc = (start: start, regCount: c.prc.regInfo.len) + result = (start: start, regCount: c.prc.regInfo.len) proc generateCodeForProc(c: var TCtx, s: PSym, globals: var seq[PNode]): VmGenResult = @@ -208,7 +210,7 @@ proc generateGlobalInit(c: var TCtx, f: var CodeFragment, defs: openArray[PNode] # Swap back once done swapState() -proc generateAliveProcs(c: var TCtx, mlist: var ModuleList) = +proc generateAliveProcs(c: var TCtx, mlist: var BModuleList) = ## Runs code generation for all routines (except methods) directly used ## by the routines in `c.linkState.newProcs`, including the routines in ## the list itself. @@ -308,7 +310,7 @@ func addInitProcs(ft: var seq[FuncTableEntry], m: Module, sig: RoutineSigId) = ft.add initFuncTblEntry(m.sym, sig, m.initProc) proc generateMain(c: var TCtx, mainModule: PSym, - mlist: ModuleList): FunctionIndex = + mlist: BModuleList): FunctionIndex = ## Generates and links in the main procedure (the entry point) along with ## setting up the required state. @@ -378,12 +380,39 @@ func storeExtra(enc: var PackedEncoder, dst: var PackedEnv, mapList(dst.globals, globals, it): enc.typeMap[it] +proc produceModules(g: ModuleGraph, c: var TCtx): BModuleList = + ## Takes the ``ModuleList`` stored in `g` and uses it for producing the + ## module list used by the VM backend. The bytecode for the modules' + ## initialization logic (i.e, top-level statements) is also generated + ## here already. + + # in order to reduce overall memory consumption, we consume the module list + # that was collected earlier. Everything that was not moved over to the + # ``ModuleList`` instance we're using in the backend gets freed once the + # current procedure exits + var mlist = move ModuleListRef(g.backend)[] + g.backend = nil # prevent others from observing the empty module list + + # setup an entry for each module and generated the code for the modules' + # initalization logic: + for it in mlist.modules.items: + c.refresh(it.sym, g.idgen) + + var m = Module(sym: it.sym) + m.initProc = generateTopLevelStmts(c, g.config, it) + m.initGlobalsCode.prc = PProc() + + result.modules.add(m) + + # extract the other data: + result.modulesClosed = move mlist.modulesClosed + result.moduleMap = move mlist.moduleMap + proc generateCode*(g: ModuleGraph) = ## The backend's entry point. Orchestrates code generation and linking. If ## all went well, the resulting binary is written to the project's output ## file let - mlist = g.backend.ModuleListRef conf = g.config var c = TCtx(config: g.config, cache: g.cache, graph: g, idgen: g.idgen, @@ -395,17 +424,10 @@ proc generateCode*(g: ModuleGraph) = # corresponding procs: registerCallbacks(c) - # generate all module init procs (i.e. code for the top-level statements): - for m in mlist.modules.mitems: - c.refresh(m.sym, g.idgen) - generateTopLevelStmts(m, c, g.config) - - # combine module list iteration with initialiazing `initGlobalsCode`: - m.initGlobalsCode.prc = PProc() + var mlist = produceModules(g, c) - # generate code for the set of active alive routines - # (`c.linkState.newProcs`). This can uncover new ``const`` symbols - generateAliveProcs(c, mlist[]) + # generate code for all alive routines + generateAliveProcs(c, mlist) reset(c.linkState.newProcs) # free the occupied memory already # XXX: generation of method dispatchers would go here. Note that `method` @@ -432,7 +454,7 @@ proc generateCode*(g: ModuleGraph) = m.initGlobalsProc = (start: -1, regCount: 0) let entryPoint = - generateMain(c, g.getModule(conf.projectMainIdx), mlist[]) + generateMain(c, g.getModule(conf.projectMainIdx), mlist) c.gABC(g.emptyNode, opcEof) From 274a730579fce3af9c46809215dda1dcb021df5e Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Thu, 18 May 2023 22:25:49 +0100 Subject: [PATCH 03/15] containers: extend the APIs Extend the set of routines for interacting with `Store` and `SeqMap`. --- compiler/utils/containers.nim | 42 +++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/compiler/utils/containers.nim b/compiler/utils/containers.nim index 97b9740ad57..e432cb385e6 100644 --- a/compiler/utils/containers.nim +++ b/compiler/utils/containers.nim @@ -29,6 +29,9 @@ func contains*[K, V](m: SeqMap[K, V], key: K): bool {.inline.} = func `[]`*[K, V](m: SeqMap[K, V], key: K): lent V {.inline.} = result = m.data[ord(key)] +func `[]`*[K, V](m: var SeqMap[K, V], key: K): var V {.inline.} = + result = m.data[ord(key)] + func `[]=`*[K, V](m: var SeqMap[K, V], key: K, val: sink V) = let i = ord(key) if m.data.len <= i: @@ -36,6 +39,26 @@ func `[]=`*[K, V](m: var SeqMap[K, V], key: K, val: sink V) = m.data[i] = val +iterator values*[K, V](m: SeqMap[K, V]): lent V = + ## Returns, in an unspecified order, the value for each entry in the map `m`. + mixin isFilled + var i = 0 + let L = m.data.len + while i < L: + if isFilled(m.data[i]): + yield m.data[i] + inc i + +iterator pairs*[K, V](m: SeqMap[K, V]): (K, lent V) = + ## Returns, in an unspecified order, the key and value for each entry in the + ## map `m`. + mixin isFilled + var i = 0 + let L = m.data.len + while i < L: + if isFilled(m.data[i]): + yield (K(i), m.data[i]) + inc i # ---------- Store API ------------ @@ -55,6 +78,25 @@ func add*[I; T](x: var Store[I, T], it: sink T): I {.inline.} = x.data.add it result = I(x.data.high) +iterator mitems*[I; T](x: var Store[I, T]): var T = + var i = 0 + let L = x.data.len + while i < L: + yield x.data[i] + inc i + +iterator pairs*[I; T](x: Store[I, T]): (I, lent T) = + ## Returns all items in `x` together with their corresponding IDs in + ## ascending order. + var i = 0 + let L = x.data.len + while i < L: + # there's no need to perform a range check here: ``add`` already errors + # when trying to add items for which the index can't be represented with + # ``I`` + yield (I(i), x.data[i]) + inc i + # ---------- OrdinalSeq API ------------ template len*[I; T](x: OrdinalSeq[I, T]): int = From eb9198fb332ef1edf6fc2377b26b1711f22e8fa6 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Thu, 18 May 2023 22:25:50 +0100 Subject: [PATCH 04/15] collectors: simpler module list management Store the modules in a `SeqMap` with their position as the key. This conveys intention better and leaves it to the consumer to use a more fitting structure/representation. For the internal representation, `vmbackend` uses a `Store` for the list of modules, which makes the intention more clear and querying the list a bit more ergonomic. --- compiler/backend/collectors.nim | 37 +++++++++++++++-------------- compiler/vm/vmbackend.nim | 41 ++++++++++++++++++++------------- 2 files changed, 43 insertions(+), 35 deletions(-) diff --git a/compiler/backend/collectors.nim b/compiler/backend/collectors.nim index 7aa3c530983..0ba2e734f2a 100644 --- a/compiler/backend/collectors.nim +++ b/compiler/backend/collectors.nim @@ -7,19 +7,20 @@ ## interface. import - std/[ - tables - ], compiler/ast/[ ast, ast_idgen, - ast_types + ast_types, + lineinfos ], compiler/modules/[ modulegraphs ], compiler/sem/[ passes + ], + compiler/utils/[ + containers ] type @@ -29,22 +30,22 @@ type ModuleListRef* = ref ModuleList ModuleList* = object of RootObj - modules*: seq[FullModule] - modulesClosed*: seq[int] ## indices into `modules` in the order the - ## modules were closed. The first closed module - ## comes first, then the next, etc. - moduleMap*: Table[int, int] ## module sym-id -> index into `modules` + modules*: SeqMap[FileIndex, FullModule] + modulesClosed*: seq[FileIndex] + ## stores the modules in the order they were closed. The first closed + ## module comes first, then the next, etc. ModuleRef = ref object of TPassContext ## The pass context for the VM backend. Represents a reference to a ## module in the module list list: ModuleListRef - index: int + index: FileIndex -# Below is the `passes` interface implementation +func isFilled*(m: FullModule): bool = + # required so that ``FullModule`` is usable as the item type of a ``SeqMap`` + m.sym != nil -func growBy[T](x: var seq[T], n: Natural) {.inline.} = - x.setLen(x.len + n) +# Below is the `passes` interface implementation proc myOpen(graph: ModuleGraph, module: PSym, idgen: IdGenerator): PPassContext = if graph.backend == nil: @@ -52,14 +53,12 @@ proc myOpen(graph: ModuleGraph, module: PSym, idgen: IdGenerator): PPassContext let mlist = ModuleListRef(graph.backend) - next = mlist.modules.len + pos = module.position.FileIndex - # append an empty module to the list - mlist.modules.growBy(1) - mlist.modules[next] = FullModule(sym: module) - mlist.moduleMap[module.id] = next + # add an empty entry for the module: + mlist.modules[pos] = FullModule(sym: module) - result = ModuleRef(list: mlist, index: next) + result = ModuleRef(list: mlist, index: pos) proc myProcess(b: PPassContext, n: PNode): PNode = result = n diff --git a/compiler/vm/vmbackend.nim b/compiler/vm/vmbackend.nim index 5f1ff376b04..3b4ad972f3c 100644 --- a/compiler/vm/vmbackend.nim +++ b/compiler/vm/vmbackend.nim @@ -39,6 +39,9 @@ import magicsys, modulegraphs ], + compiler/utils/[ + containers + ], compiler/vm/[ packed_env, vmaux, @@ -78,10 +81,16 @@ type ## variables initProc: CodeInfo ## the module init proc (top-level statements) + ModuleId = distinct uint32 + ## The ID of a ``Module`` instance. + BModuleList = object - modules: seq[Module] - modulesClosed: seq[int] - moduleMap: Table[int, int] + modules: Store[ModuleId, Module] + modulesClosed: seq[ModuleId] + + moduleMap: Table[int, ModuleId] + ## maps a module's position to the ID of the module's ``Module`` + ## instance func growBy[T](x: var seq[T], n: Natural) {.inline.} = x.setLen(x.len + n) @@ -257,7 +266,7 @@ proc generateAliveProcs(c: var TCtx, mlist: var BModuleList) = # initializer expression might depend on otherwise unused procedures (which # might define further globals...) if globals.len > 0: - let mI = mlist.moduleMap[c.module.id] + let mI = mlist.moduleMap[c.module.position] generateGlobalInit(c, mlist.modules[mI].initGlobalsCode, globals) @@ -327,12 +336,10 @@ proc generateMain(c: var TCtx, mainModule: PSym, c.types.add(typ) - var systemIdx, mainIdx: int - # XXX: can't use `pairs` since it copies - for i in 0.. Date: Thu, 18 May 2023 22:25:50 +0100 Subject: [PATCH 05/15] add a C code-generation orchestrator Much like how it works for the VM backend, the C backend now also uses an orchestrator (the new `cbackend` module) that invokes the code generator. The `passes` integration (`myOpen`, `myProcess`, and `myClose`) is removed from `cgen`. Setting up the extra header backend module (used for C header generation) is now the responsibility of the orchestrator. Similar to `vmbackend`, the new `cbackend` also operates on the semantically analyzed AST of the whole program. --- compiler/backend/cbackend.nim | 72 +++++++++++++++++++++++++++++++++++ compiler/backend/cgen.nim | 58 +++++----------------------- compiler/backend/cgendata.nim | 3 +- compiler/front/main.nim | 5 ++- 4 files changed, 87 insertions(+), 51 deletions(-) create mode 100644 compiler/backend/cbackend.nim diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend.nim new file mode 100644 index 00000000000..26747edbaa0 --- /dev/null +++ b/compiler/backend/cbackend.nim @@ -0,0 +1,72 @@ +## The code-generation orchestrator for the C backend. It generates the C code +## for the semantically analysed AST of the whole progam by invoking ``cgen``. +## +## The general direction is to move more logic out of the code generator (such +## as figuring out the set of alive procedures) and into the orchestrator, +## leaving only the core of code generation to ``cgen``. + +import + compiler/ast/[ + ast + ], + compiler/backend/[ + cgen, + cgendata, + collectors, + extccomp + ], + compiler/front/[ + options + ], + compiler/modules/[ + modulegraphs + ], + compiler/utils/[ + containers, + pathutils + ] + +from compiler/sem/passes import skipCodegen + +proc generateCode*(graph: ModuleGraph) = + ## Entry point for C code-generation. Only the C code is generated -- nothing + ## is written to disk yet. + let + mlist = ModuleListRef(graph.backend) + config = graph.config + + var g = newModuleList(graph) + + # first create a module list entry for each input module. This has to happen + # *before* the code generator is invoked. + for key, val in mlist.modules.pairs: + let m = newModule(g, val.sym, config) + m.idgen = val.idgen + + # setup the module for the generated header, if required: + if optGenIndex in config.globalOptions: + let f = if config.headerFile.len > 0: AbsoluteFile config.headerFile + else: config.projectFull + g.generatedHeader = rawNewModule(g, mlist.modules[config.projectMainIdx2].sym, + changeFileExt(completeCfilePath(config, f), hExt)) + incl g.generatedHeader.flags, isHeaderFile + + # the main part: invoke the code generator for all top-level code + for index in mlist.modulesClosed.items: + let + m {.cursor.} = mlist.modules[index] + bmod = g.modules[index.int] + + # pass all top-level code to the code generator: + for it in m.stmts.items: + if not skipCodegen(bmod.config, it): + genTopLevelStmt(bmod, it) + + # close the module: + finalCodegenActions(graph, g.modules[index.int], newNode(nkStmtList)) + + # the callsite still expects `graph.backend` to point to the ``BModuleList`` + # so that ``cgenWriteModules`` can query it + # XXX: this is the wrong approach -- the code generator must not be + # responsible for writing the generated C translation units to disk. + graph.backend = g \ No newline at end of file diff --git a/compiler/backend/cgen.nim b/compiler/backend/cgen.nim index d818199156a..4000aec7634 100644 --- a/compiler/backend/cgen.nim +++ b/compiler/backend/cgen.nim @@ -52,7 +52,6 @@ import idioms ], compiler/sem/[ - passes, rodutils, aliases, lowerings, @@ -76,9 +75,10 @@ from compiler/ast/reports_sem import SemReport, reportTyp from compiler/ast/report_enums import ReportKind -# XXX: the code-generator should not need to know about the existance of +# XXX: the code-generator should not need to know about the existence of # destructor injections (or destructors, for that matter) from compiler/sem/injectdestructors import deferGlobalDestructor +from compiler/sem/passes import moduleHasChanged # XXX: leftover dependency import std/strutils except `%`, addf # collides with ropes.`%` @@ -1586,7 +1586,7 @@ proc initProcOptions(m: BModule): TOptions = let opts = m.config.options if sfSystemModule in m.module.flags: opts-{optStackTrace} else: opts -proc rawNewModule(g: BModuleList; module: PSym, filename: AbsoluteFile): BModule = +proc rawNewModule*(g: BModuleList; module: PSym, filename: AbsoluteFile): BModule = new(result) result.g = g result.tmpBase = rope("TM" & $hashOwner(module) & "_") @@ -1629,25 +1629,6 @@ proc newModule*(g: BModuleList; module: PSym; conf: ConfigRef): BModule = #growCache g.modules, module.position g.modules[module.position] = result -template injectG() {.dirty.} = - if graph.backend == nil: - graph.backend = newModuleList(graph) - let g = BModuleList(graph.backend) - -when not defined(nimHasSinkInference): - {.pragma: nosinks.} - -proc myOpen(graph: ModuleGraph; module: PSym; idgen: IdGenerator): PPassContext {.nosinks.} = - injectG() - result = newModule(g, module, graph.config) - result.idgen = idgen - if optGenIndex in graph.config.globalOptions and g.generatedHeader == nil: - let f = if graph.config.headerFile.len > 0: AbsoluteFile graph.config.headerFile - else: graph.config.projectFull - g.generatedHeader = rawNewModule(g, module, - changeFileExt(completeCfilePath(graph.config, f), hExt)) - incl g.generatedHeader.flags, isHeaderFile - proc writeHeader(m: BModule) = var result = headerTop() var guard = "__$1__" % [m.filename.splitFile.name.rope] @@ -1670,16 +1651,8 @@ proc writeHeader(m: BModule) = proc getCFile(m: BModule): AbsoluteFile = result = changeFileExt(completeCfilePath(m.config, withPackageName(m.config, m.cfilename)), ".nim.c") -when false: - proc myOpenCached(graph: ModuleGraph; module: PSym, rd: PRodReader): PPassContext = - injectG() - var m = newModule(g, module, graph.config) - readMergeInfo(getCFile(m), m) - result = m - proc genTopLevelStmt*(m: BModule; n: PNode) = - ## Also called from `ic/cbackend.nim`. - if passes.skipCodegen(m.config, n): return + ## Called from `ic/cbackend.nim` and ``backend/cbackend.nim``. m.initProc.options = initProcOptions(m) #softRnl = if optLineDir in m.config.options: noRnl else: rnl # XXX replicate this logic! @@ -1689,12 +1662,6 @@ proc genTopLevelStmt*(m: BModule; n: PNode) = genProcBody(m.initProc, transformedN) -proc myProcess(b: PPassContext, n: PNode): PNode = - result = n - if b != nil: - var m = BModule(b) - genTopLevelStmt(m, n) - proc shouldRecompile(m: BModule; code: Rope, cfile: Cfile): bool = if optForceFullMake notin m.config.globalOptions: if not moduleHasChanged(m.g.graph, m.module): @@ -1760,7 +1727,7 @@ proc finalCodegenActions*(graph: ModuleGraph; m: BModule; n: PNode) = # phase ordering problem here: We need to announce this # dependency to 'nimTestErrorFlag' before system.c has been written to # disk. We also have to announce the dependency *from* the system module, as - # only there it is certain that all the procedure's dependencies also exist + # only there it is certain that all the procedure's dependencies exist # already if sfSystemModule in m.module.flags: discard cgsym(m, "nimTestErrorFlag") @@ -1788,14 +1755,9 @@ proc finalCodegenActions*(graph: ModuleGraph; m: BModule; n: PNode) = let disp = generateMethodDispatchers(graph) for x in disp: genProcAux(m, x.sym) - let mm = m - m.g.modulesClosed.add mm - - -proc myClose(graph: ModuleGraph; b: PPassContext, n: PNode): PNode = - result = n - if b == nil: return - finalCodegenActions(graph, BModule(b), n) + # for compatibility, the code generator still manages its own "closed order" + # list, but this should be phased out eventually + m.g.modulesClosed.add m proc genForwardedProcs(g: BModuleList) = # Forward declared proc:s lack bodies when first encountered, so they're given @@ -1821,6 +1783,4 @@ proc cgenWriteModules*(backend: RootRef, config: ConfigRef) = for m in cgenModules(g): m.writeModule(pending=true) writeMapping(config, g.mapping) - if g.generatedHeader != nil: writeHeader(g.generatedHeader) - -const cgenPass* = makePass(myOpen, myProcess, myClose) + if g.generatedHeader != nil: writeHeader(g.generatedHeader) \ No newline at end of file diff --git a/compiler/backend/cgendata.nim b/compiler/backend/cgendata.nim index ab444bc8ff1..84854a275ac 100644 --- a/compiler/backend/cgendata.nim +++ b/compiler/backend/cgendata.nim @@ -151,7 +151,8 @@ type ## nimtvDeps is VERY hard to cache because it's ## not a list of IDs nor can it be made to be one. - TCGen = object of PPassContext ## represents a C source file + TCGen = object ## represents a C source file + idgen*: IdGenerator s*: TCFileSections ## sections of the C file flags*: set[CodegenFlag] module*: PSym diff --git a/compiler/front/main.nim b/compiler/front/main.nim index 3a28c0f1e2f..83c1db9e6ea 100644 --- a/compiler/front/main.nim +++ b/compiler/front/main.nim @@ -67,6 +67,8 @@ from compiler/ic/ic import rodViewer from std/osproc import execCmd +import compiler/backend/cbackend as cbackend2 + # xxx: reports are a code smell meaning data types are misplaced from compiler/ast/reports_internal import InternalReport from compiler/ast/report_enums import ReportKind, @@ -188,7 +190,7 @@ proc commandCompileToC(graph: ModuleGraph) = extccomp.initVars(conf) semanticPasses(graph) if conf.symbolFiles == disabledSf: - registerPass(graph, cgenPass) + registerPass(graph, collectPass) if {optRun, optForceFullMake} * conf.globalOptions == {optRun} or isDefined(conf, "nimBetterRun"): if not changeDetectedViaJsonBuildInstructions(conf, conf.jsonBuildInstructionsFile): @@ -203,6 +205,7 @@ proc commandCompileToC(graph: ModuleGraph) = if graph.config.errorCounter > 0: return # issue #9933 if conf.symbolFiles == disabledSf: + cbackend2.generateCode(graph) cgenWriteModules(graph.backend, conf) else: if isDefined(conf, "nimIcIntegrityChecks"): From a847b9ed7cb0d0b60e890519bcd42b597f78834f Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Thu, 18 May 2023 22:25:50 +0100 Subject: [PATCH 06/15] collectors: track a module's `IdGenerator` The upcoming C code-generation orchestrator needs access to the `IdGenerator` for each module. They're now stored with `FullModule`, allowing the orchestrator to retrieve them later. `vmbackend` is adjusted to make use of the module's `IdGenerator`, which is more precise than using the `ModuleGraph`'s one. Ideally, the backend should not introduce any new symbol and type instances (which is what the `IdGenerator`s are required for), but `transf` currently necessitates that. --- compiler/backend/collectors.nim | 3 ++- compiler/vm/vmbackend.nim | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/compiler/backend/collectors.nim b/compiler/backend/collectors.nim index 0ba2e734f2a..4538938d645 100644 --- a/compiler/backend/collectors.nim +++ b/compiler/backend/collectors.nim @@ -27,6 +27,7 @@ type FullModule* = object stmts*: seq[PNode] ## top level statements in the order they were parsed sym*: PSym ## module symbol + idgen*: IdGenerator ModuleListRef* = ref ModuleList ModuleList* = object of RootObj @@ -56,7 +57,7 @@ proc myOpen(graph: ModuleGraph, module: PSym, idgen: IdGenerator): PPassContext pos = module.position.FileIndex # add an empty entry for the module: - mlist.modules[pos] = FullModule(sym: module) + mlist.modules[pos] = FullModule(sym: module, idgen: idgen) result = ModuleRef(list: mlist, index: pos) diff --git a/compiler/vm/vmbackend.nim b/compiler/vm/vmbackend.nim index 3b4ad972f3c..86a4247475e 100644 --- a/compiler/vm/vmbackend.nim +++ b/compiler/vm/vmbackend.nim @@ -403,7 +403,7 @@ proc produceModules(g: ModuleGraph, c: var TCtx): BModuleList = # setup an entry for each module and generated the code for the modules' # initalization logic: for it in mlist.modules.values: - c.refresh(it.sym, g.idgen) + c.refresh(it.sym, it.idgen) var m = Module(sym: it.sym) m.initProc = generateTopLevelStmts(c, g.config, it) From 4e54bcf21f7c94dbb7a29230e05d4ddf8a0bc282 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Thu, 18 May 2023 22:25:51 +0100 Subject: [PATCH 07/15] cgen: remove handling of forwarded procedures Since code generation now only takes place *after* the whole program was semantically analyzed, forwarded procedures and unresolved borrows no longer reach there. --- compiler/backend/cgen.nim | 47 ++++++++++------------------------- compiler/backend/cgendata.nim | 1 - 2 files changed, 13 insertions(+), 35 deletions(-) diff --git a/compiler/backend/cgen.nim b/compiler/backend/cgen.nim index 4000aec7634..4c4b052a409 100644 --- a/compiler/backend/cgen.nim +++ b/compiler/backend/cgen.nim @@ -108,9 +108,6 @@ const NonMagics* = {mNone, mIsolate, mNewSeq, mSetLengthSeq, mAppendSeqElem} ## magics that are treated like normal procedures by the code generator. ## This set only applies when using the new runtime. -proc addForwardedProc(m: BModule, prc: PSym) = - m.g.forwardedProcs.add(prc) - proc findPendingModule(m: BModule, s: PSym): BModule = let ms = s.itemId.module #getModule(s) result = m.g.modules[ms] @@ -1174,18 +1171,18 @@ proc requestConstImpl(p: BProc, sym: PSym) = proc isActivated(prc: PSym): bool = prc.typ != nil proc genProc(m: BModule, prc: PSym) = - if sfBorrow in prc.flags or not isActivated(prc): return - if sfForward in prc.flags: - addForwardedProc(m, prc) - fillProcLoc(m, prc.ast[namePos]) - else: - genProcNoForward(m, prc) - if {sfExportc, sfCompilerProc} * prc.flags == {sfExportc} and - m.g.generatedHeader != nil and lfNoDecl notin prc.loc.flags: - genProcPrototype(m.g.generatedHeader, prc) - if prc.typ.callConv == ccInline: - if not containsOrIncl(m.g.generatedHeader.declaredThings, prc.id): - genProcAux(m.g.generatedHeader, prc) + # unresolved borrows or forward declarations must not reach here + assert {sfBorrow, sfForward} * prc.flags == {} + assert isActivated(prc) + genProcNoForward(m, prc) + if {sfExportc, sfCompilerProc} * prc.flags == {sfExportc} and + m.g.generatedHeader != nil and lfNoDecl notin prc.loc.flags: + # XXX: don't populate the generated header from inside the code + # generator -- make it a responsibility of the orchestrator + genProcPrototype(m.g.generatedHeader, prc) + if prc.typ.callConv == ccInline: + if not containsOrIncl(m.g.generatedHeader.declaredThings, prc.id): + genProcAux(m.g.generatedHeader, prc) proc genVarPrototype(m: BModule, n: PNode) = #assert(sfGlobal in sym.flags) @@ -1750,8 +1747,7 @@ proc finalCodegenActions*(graph: ModuleGraph; m: BModule; n: PNode) = if emulatedThreadVars(m.config) and m.config.target.targetOS != osStandalone: discard cgsym(m, "initThreadVarsEmulation") - if m.g.forwardedProcs.len == 0: - incl m.flags, objHasKidsValid + incl m.flags, objHasKidsValid let disp = generateMethodDispatchers(graph) for x in disp: genProcAux(m, x.sym) @@ -1759,27 +1755,10 @@ proc finalCodegenActions*(graph: ModuleGraph; m: BModule; n: PNode) = # list, but this should be phased out eventually m.g.modulesClosed.add m -proc genForwardedProcs(g: BModuleList) = - # Forward declared proc:s lack bodies when first encountered, so they're given - # a second pass here - # Note: ``genProcNoForward`` may add to ``forwardedProcs`` - while g.forwardedProcs.len > 0: - let - prc = g.forwardedProcs.pop() - m = g.modules[prc.itemId.module] - m.config.internalAssert(sfForward notin prc.flags, prc.info, "still forwarded: " & prc.name.s) - - genProcNoForward(m, prc) - proc cgenWriteModules*(backend: RootRef, config: ConfigRef) = let g = BModuleList(backend) g.config = config - # we need to process the transitive closure because recursive module - # deps are allowed (and the system module is processed in the wrong - # order anyway) - genForwardedProcs(g) - for m in cgenModules(g): m.writeModule(pending=true) writeMapping(config, g.mapping) diff --git a/compiler/backend/cgendata.nim b/compiler/backend/cgendata.nim index 84854a275ac..83b5cc70170 100644 --- a/compiler/backend/cgendata.nim +++ b/compiler/backend/cgendata.nim @@ -133,7 +133,6 @@ type mapping*: Rope ## the generated mapping file (if requested) modules*: seq[BModule] ## list of all compiled modules modulesClosed*: seq[BModule] ## list of the same compiled modules, but in the order they were closed - forwardedProcs*: seq[PSym] ## proc:s that did not yet have a body generatedHeader*: BModule typeInfoMarker*: TypeCacheWithOwner typeInfoMarkerV2*: TypeCacheWithOwner From ae1d9a537562de4bc9ca7c24322f079e40241a7b Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Thu, 18 May 2023 22:25:51 +0100 Subject: [PATCH 08/15] jsgen: remove `inSystem` Disabling stack-traces for the system module and all modules it imports differs from what the C code-generator does (stack-traces are only disabled for the `system` module there, but not for the ones it imports). The required access to `PGlobals` via `g.backend` is also a small problem for upcoming `jsgen` refactoring. --- compiler/backend/jsgen.nim | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/compiler/backend/jsgen.nim b/compiler/backend/jsgen.nim index 12bf09d4ad7..0b553127bf0 100644 --- a/compiler/backend/jsgen.nim +++ b/compiler/backend/jsgen.nim @@ -133,7 +133,6 @@ type generatedSyms: IntSet typeInfoGenerated: IntSet unique: int # for temp identifier generation - inSystem: bool PProc = ref TProc TProc = object @@ -195,7 +194,7 @@ proc newProc(globals: PGlobals, module: BModule, procDef: PNode, proc initProcOptions(module: BModule): TOptions = result = module.config.options - if PGlobals(module.graph.backend).inSystem: + if sfSystemModule in module.module.flags: result.excl(optStackTrace) proc newInitProc(globals: PGlobals, module: BModule): PProc = @@ -2657,8 +2656,6 @@ proc newModule(g: ModuleGraph; module: PSym): BModule = g.backend = newGlobals() result.graph = g result.config = g.config - if sfSystemModule in module.flags: - PGlobals(g.backend).inSystem = true proc genHeader(): Rope = result = rope("""/* Generated by the Nim Compiler v$1 */ @@ -2717,8 +2714,6 @@ proc myClose(graph: ModuleGraph; b: PPassContext, n: PNode): PNode = result = myProcess(b, n) - if sfSystemModule in m.module.flags: - PGlobals(graph.backend).inSystem = false if passes.skipCodegen(m.config, n): return n if sfMainModule in m.module.flags: var code = genHeader() & wholeCode(graph, m) From 73466cefad52f39dedf6154c8b6533f6bb6c167b Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Thu, 18 May 2023 22:25:51 +0100 Subject: [PATCH 09/15] add a JS code-generation orchestrator Very similar to the introduction of an orchestrator for the C code- generator, but for the JS code-generator. The `passes` integration is removed form `jsgen`, and writing the module to disk and generating the source map moved to the orchestrator. --- compiler/backend/jsbackend.nim | 69 ++++++++++++++++++++++++++++++++++ compiler/backend/jsgen.nim | 59 ++++++++--------------------- compiler/front/main.nim | 7 +++- 3 files changed, 90 insertions(+), 45 deletions(-) create mode 100644 compiler/backend/jsbackend.nim diff --git a/compiler/backend/jsbackend.nim b/compiler/backend/jsbackend.nim new file mode 100644 index 00000000000..7be4e5bb3b8 --- /dev/null +++ b/compiler/backend/jsbackend.nim @@ -0,0 +1,69 @@ +## The code-generation orchestrator for the JavaScript backend. It generates +## the JS code for the semantically analysed AST of the whole progam by +## invoking ``jsgen``. +## +## The general direction is to move more logic out of the code generator (such +## as figuring out the set of alive procedures) and into the orchestrator, +## leaving only the core of code generation to ``jsgen``. + +import + std/[ + json + ], + compiler/backend/[ + collectors, + jsgen + ], + compiler/front/[ + options + ], + compiler/modules/[ + modulegraphs + ], + compiler/sem/[ + passes, + sourcemap + ], + compiler/utils/[ + containers, + ropes + ] + +proc writeModules(graph: ModuleGraph, globals: PGlobals) = + let + config = graph.config + outFile = config.prepareToWriteOutput() + + var code = genHeader() & wholeCode(globals) + if optSourcemap in config.globalOptions: + var map: SourceMap + (code, map) = genSourceMap($(code), outFile.string) + writeFile(outFile.string & ".map", $(%map)) + + discard writeRopeIfNotEqual(code, outFile) + +proc generateCode*(graph: ModuleGraph) = + ## Entry point into the JS backend. Generates the code for all modules and + ## writes it to the output file. + let + mlist = ModuleListRef(graph.backend) + globals = newGlobals() + + # generate the code for all modules: + for index in mlist.modulesClosed.items: + let + m {.cursor.} = mlist.modules[index] + bmod = newModule(graph, m.sym) + + bmod.idgen = m.idgen + + # invoke ``jsgen`` for all top-level code: + for n in m.stmts.items: + if not skipCodegen(graph.config, n): + genTopLevelStmt(globals, bmod, n) + + # close the module: + finalCodegenActions(graph, globals, bmod) + + # write the generated code to disk: + writeModules(graph, globals) \ No newline at end of file diff --git a/compiler/backend/jsgen.nim b/compiler/backend/jsgen.nim index 0b553127bf0..0b9ac343568 100644 --- a/compiler/backend/jsgen.nim +++ b/compiler/backend/jsgen.nim @@ -29,7 +29,6 @@ implements the required case distinction. import std/[ - json, sets, math, tables, @@ -61,19 +60,13 @@ import ropes ], compiler/sem/[ - passes, lowerings, rodutils, transf, - sourcemap ], compiler/backend/[ ccgutils, cgmeth, - ], - compiler/plugins/[ - ], - compiler/vm/[ ] # xxx: reports are a code smell meaning data types are misplaced @@ -86,7 +79,8 @@ from compiler/ast/report_enums import ReportKind import std/strutils except addf # clashes with ropes.addf type - TJSGen = object of PPassContext + TJSGen = object + idgen*: IdGenerator module: PSym graph: ModuleGraph config: ConfigRef @@ -127,7 +121,7 @@ type # has been used (i.e. the label should be emitted) isLoop: bool # whether it's a 'block' or 'while' - PGlobals = ref object of RootObj + PGlobals* = ref object typeInfo, constants, code: Rope forwarded: seq[PSym] generatedSyms: IntSet @@ -169,7 +163,7 @@ template nested(p, body) = body dec p.extraIndent -proc newGlobals(): PGlobals = +proc newGlobals*(): PGlobals = new(result) result.forwarded = @[] result.generatedSyms = initIntSet() @@ -2648,16 +2642,14 @@ proc gen(p: PProc, n: PNode, r: var TCompRes) = of nkPragmaBlock: gen(p, n.lastSon, r) else: internalError(p.config, n.info, "gen: unknown node type: " & $n.kind) -proc newModule(g: ModuleGraph; module: PSym): BModule = +proc newModule*(g: ModuleGraph; module: PSym): BModule = new(result) result.module = module result.sigConflicts = initCountTable[SigHash]() - if g.backend == nil: - g.backend = newGlobals() result.graph = g result.config = g.config -proc genHeader(): Rope = +proc genHeader*(): Rope = result = rope("""/* Generated by the Nim Compiler v$1 */ var framePtr = null; var excHandler = 0; @@ -2678,20 +2670,15 @@ proc genModule(p: PProc, n: PNode) = if optStackTrace in p.options: p.body.add(frameDestroy(p)) -proc myProcess(b: PPassContext, n: PNode): PNode = - result = n - let m = BModule(b) - if passes.skipCodegen(m.config, n): return n - m.config.internalAssert(m.module != nil, n.info, "myProcess") - let globals = PGlobals(m.graph.backend) +proc genTopLevelStmt*(globals: PGlobals, m: BModule, n: PNode) = + m.config.internalAssert(m.module != nil, n.info, "genTopLevelStmt") var p = newInitProc(globals, m) p.unique = globals.unique genModule(p, n) p.g.code.add(p.locals) p.g.code.add(p.body) -proc wholeCode(graph: ModuleGraph; m: BModule): Rope = - let globals = PGlobals(graph.backend) +proc finishMainModule(graph: ModuleGraph, globals: PGlobals, m: BModule) = for prc in globals.forwarded: if not globals.generatedSyms.containsOrIncl(prc.id): var p = newInitProc(globals, m) @@ -2704,30 +2691,16 @@ proc wholeCode(graph: ModuleGraph; m: BModule): Rope = var p = newInitProc(globals, m) attachProc(p, prc) - result = globals.typeInfo & globals.constants & globals.code - -proc myClose(graph: ModuleGraph; b: PPassContext, n: PNode): PNode = - var m = BModule(b) - if sfMainModule in m.module.flags: +proc finalCodegenActions*(graph: ModuleGraph; globals: PGlobals, m: BModule) = + if sfMainModule in m.module.flags and graph.globalDestructors.len > 0: + let n = newNode(nkStmtList) for destructorCall in graph.globalDestructors: n.add destructorCall - result = myProcess(b, n) + genTopLevelStmt(globals, m, n) - if passes.skipCodegen(m.config, n): return n if sfMainModule in m.module.flags: - var code = genHeader() & wholeCode(graph, m) - let outFile = m.config.prepareToWriteOutput() - - if optSourcemap in m.config.globalOptions: - var map: SourceMap - (code, map) = genSourceMap($(code), outFile.string) - writeFile(outFile.string & ".map", $(%map)) - discard writeRopeIfNotEqual(code, outFile) - - -proc myOpen(graph: ModuleGraph; s: PSym; idgen: IdGenerator): PPassContext = - result = newModule(graph, s) - result.idgen = idgen + finishMainModule(graph, globals, m) -const JSgenPass* = makePass(myOpen, myProcess, myClose) +proc wholeCode*(globals: PGlobals): Rope = + result = globals.typeInfo & globals.constants & globals.code \ No newline at end of file diff --git a/compiler/front/main.nim b/compiler/front/main.nim index 83c1db9e6ea..c90363c5740 100644 --- a/compiler/front/main.nim +++ b/compiler/front/main.nim @@ -82,7 +82,7 @@ from compiler/front/scripting import runNimScript when not defined(leanCompiler): import - compiler/backend/jsgen, + compiler/backend/jsbackend, compiler/tools/[docgen, docgen2] when defined(nimDebugUnreportedErrors): @@ -240,8 +240,11 @@ proc commandCompileToJS(graph: ModuleGraph) = defineSymbol(conf, "ecmascript") # For backward compatibility semanticPasses(graph) - registerPass(graph, JSgenPass) + registerPass(graph, collectPass) compileProject(graph) + + jsbackend.generateCode(graph) + if conf.depfile.string.len != 0: writeGccDepfile(conf) if optGenScript in conf.globalOptions: From a967e366f5a621fd5cf9a5bc2540550f15524c21 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Thu, 18 May 2023 22:25:52 +0100 Subject: [PATCH 10/15] jsgen: remove handling of forwarded procedures --- compiler/backend/jsgen.nim | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/compiler/backend/jsgen.nim b/compiler/backend/jsgen.nim index 0b9ac343568..13019af566b 100644 --- a/compiler/backend/jsgen.nim +++ b/compiler/backend/jsgen.nim @@ -123,7 +123,6 @@ type PGlobals* = ref object typeInfo, constants, code: Rope - forwarded: seq[PSym] generatedSyms: IntSet typeInfoGenerated: IntSet unique: int # for temp identifier generation @@ -165,7 +164,6 @@ template nested(p, body) = proc newGlobals*(): PGlobals = new(result) - result.forwarded = @[] result.generatedSyms = initIntSet() result.typeInfoGenerated = initIntSet() @@ -1441,9 +1439,9 @@ proc genSym(p: PProc, n: PNode, r: var TCompRes) = elif s.kind == skMethod and getBody(p.module.graph, s).kind == nkEmpty: # we cannot produce code for the dispatcher yet: discard - elif sfForward in s.flags: - p.g.forwarded.add(s) else: + # unresolved borrow or forward declarations must not reach here + assert {sfForward, sfBorrow} * s.flags == {} genProcForSymIfNeeded(p, s) else: p.config.internalAssert(s.loc.r != "", n.info, "symbol has no generated name: " & s.name.s) @@ -2679,11 +2677,6 @@ proc genTopLevelStmt*(globals: PGlobals, m: BModule, n: PNode) = p.g.code.add(p.body) proc finishMainModule(graph: ModuleGraph, globals: PGlobals, m: BModule) = - for prc in globals.forwarded: - if not globals.generatedSyms.containsOrIncl(prc.id): - var p = newInitProc(globals, m) - attachProc(p, prc) - var disp = generateMethodDispatchers(graph) for i in 0.. Date: Thu, 18 May 2023 22:25:52 +0100 Subject: [PATCH 11/15] collectors: don't drop declarative nodes The pass should only collect the statements into a list, and not introduce its own decision making. --- compiler/backend/collectors.nim | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/compiler/backend/collectors.nim b/compiler/backend/collectors.nim index 4538938d645..e3eb639d8d4 100644 --- a/compiler/backend/collectors.nim +++ b/compiler/backend/collectors.nim @@ -65,12 +65,7 @@ proc myProcess(b: PPassContext, n: PNode): PNode = result = n let m = ModuleRef(b) - const declarativeKinds = routineDefs + {nkTypeSection, nkPragma, - nkExportStmt, nkExportExceptStmt, nkFromStmt, nkImportStmt, - nkImportExceptStmt} - - if n.kind notin declarativeKinds: - m.list.modules[m.index].stmts.add(n) + m.list.modules[m.index].stmts.add(n) proc myClose(graph: ModuleGraph; b: PPassContext, n: PNode): PNode = result = myProcess(b, n) From 0cdfe7b5ccda9777ba00d2fbc350bef0e09f6c87 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Thu, 18 May 2023 22:25:52 +0100 Subject: [PATCH 12/15] pass the `ModuleList` to the `generateCode` routines Instead of extracting the `ModuleList` as part of `generateCode`, it is now passed in as a `sink` parameter. This: * makes it easier to move away from storing the `ModuleList` as part of the `ModuleGraph` * moves the mutation to the callsite * allows for implementing the memory consumption optimization employed by `vmbackend` (which is eventually going to be used for the other backends too) in a much cleaner way --- compiler/backend/cbackend.nim | 3 +-- compiler/backend/collectors.nim | 6 ++++++ compiler/backend/jsbackend.nim | 3 +-- compiler/front/main.nim | 6 +++--- compiler/modules/modulegraphs.nim | 4 +++- compiler/vm/vmbackend.nim | 22 ++++++++-------------- 6 files changed, 22 insertions(+), 22 deletions(-) diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend.nim index 26747edbaa0..ed393e342a3 100644 --- a/compiler/backend/cbackend.nim +++ b/compiler/backend/cbackend.nim @@ -28,11 +28,10 @@ import from compiler/sem/passes import skipCodegen -proc generateCode*(graph: ModuleGraph) = +proc generateCode*(graph: ModuleGraph, mlist: sink ModuleList) = ## Entry point for C code-generation. Only the C code is generated -- nothing ## is written to disk yet. let - mlist = ModuleListRef(graph.backend) config = graph.config var g = newModuleList(graph) diff --git a/compiler/backend/collectors.nim b/compiler/backend/collectors.nim index e3eb639d8d4..80e8fe286e4 100644 --- a/compiler/backend/collectors.nim +++ b/compiler/backend/collectors.nim @@ -46,6 +46,12 @@ func isFilled*(m: FullModule): bool = # required so that ``FullModule`` is usable as the item type of a ``SeqMap`` m.sym != nil +proc takeModuleList*(graph: ModuleGraph): ModuleList = + ## Moves the ``ModuleList`` set up by the collector pass out of the + ## `graph.backend` field and returns it. + result = move ModuleListRef(graph.backend)[] + graph.backend = nil + # Below is the `passes` interface implementation proc myOpen(graph: ModuleGraph, module: PSym, idgen: IdGenerator): PPassContext = diff --git a/compiler/backend/jsbackend.nim b/compiler/backend/jsbackend.nim index 7be4e5bb3b8..cd01b425101 100644 --- a/compiler/backend/jsbackend.nim +++ b/compiler/backend/jsbackend.nim @@ -42,11 +42,10 @@ proc writeModules(graph: ModuleGraph, globals: PGlobals) = discard writeRopeIfNotEqual(code, outFile) -proc generateCode*(graph: ModuleGraph) = +proc generateCode*(graph: ModuleGraph, mlist: sink ModuleList) = ## Entry point into the JS backend. Generates the code for all modules and ## writes it to the output file. let - mlist = ModuleListRef(graph.backend) globals = newGlobals() # generate the code for all modules: diff --git a/compiler/front/main.nim b/compiler/front/main.nim index c90363c5740..8a2119730b3 100644 --- a/compiler/front/main.nim +++ b/compiler/front/main.nim @@ -205,7 +205,7 @@ proc commandCompileToC(graph: ModuleGraph) = if graph.config.errorCounter > 0: return # issue #9933 if conf.symbolFiles == disabledSf: - cbackend2.generateCode(graph) + cbackend2.generateCode(graph, graph.takeModuleList()) cgenWriteModules(graph.backend, conf) else: if isDefined(conf, "nimIcIntegrityChecks"): @@ -243,7 +243,7 @@ proc commandCompileToJS(graph: ModuleGraph) = registerPass(graph, collectPass) compileProject(graph) - jsbackend.generateCode(graph) + jsbackend.generateCode(graph, graph.takeModuleList()) if conf.depfile.string.len != 0: writeGccDepfile(conf) @@ -257,7 +257,7 @@ proc commandCompileToVM(graph: ModuleGraph) = # The VM-backend doesn't use a pass for the actual code generation, but a # separate procedure instead (similar to the C-backend for IC) - vmbackend.generateCode(graph) + vmbackend.generateCode(graph, graph.takeModuleList()) proc interactivePasses(graph: ModuleGraph) = initDefines(graph.config.symbols) diff --git a/compiler/modules/modulegraphs.nim b/compiler/modules/modulegraphs.nim index c5ad44ea9d3..a6e081e3b1e 100644 --- a/compiler/modules/modulegraphs.nim +++ b/compiler/modules/modulegraphs.nim @@ -108,7 +108,9 @@ type # first module that included it importStack*: seq[FileIndex] # The current import stack. Used for detecting recursive # module dependencies. - backend*: RootRef # minor hack so that a backend can extend this easily + backend*: RootRef # XXX: having this field is a hack, but it's still + # required by the current ``passes`` design. Remove + # the ``passes`` design is phased out config*: ConfigRef cache*: IdentCache vm*: RootRef # unfortunately the 'vm' state is shared project-wise, this will diff --git a/compiler/vm/vmbackend.nim b/compiler/vm/vmbackend.nim index 86a4247475e..e5aef4816f2 100644 --- a/compiler/vm/vmbackend.nim +++ b/compiler/vm/vmbackend.nim @@ -387,18 +387,12 @@ func storeExtra(enc: var PackedEncoder, dst: var PackedEnv, mapList(dst.globals, globals, it): enc.typeMap[it] -proc produceModules(g: ModuleGraph, c: var TCtx): BModuleList = - ## Takes the ``ModuleList`` stored in `g` and uses it for producing the - ## module list used by the VM backend. The bytecode for the modules' - ## initialization logic (i.e, top-level statements) is also generated - ## here already. - - # in order to reduce overall memory consumption, we consume the module list - # that was collected earlier. Everything that was not moved over to the - # ``ModuleList`` instance we're using in the backend gets freed once the - # current procedure exits - var mlist = move ModuleListRef(g.backend)[] - g.backend = nil # prevent others from observing the empty module list +proc produceModules(g: ModuleGraph, c: var TCtx, + mlist: sink ModuleList): BModuleList = + ## Translates the input `mlist` into a more packed representation for use by + ## the rest of the orchestrator. The bytecode for the modules' initialization + ## logic (i.e, top-level statements) is also generated here, so that + ## the collected top-level AST can be disposed already. # setup an entry for each module and generated the code for the modules' # initalization logic: @@ -417,7 +411,7 @@ proc produceModules(g: ModuleGraph, c: var TCtx): BModuleList = for i, it in mlist.modulesClosed.pairs: result.modulesClosed[i] = result.moduleMap[it.int] -proc generateCode*(g: ModuleGraph) = +proc generateCode*(g: ModuleGraph, mlist: sink ModuleList) = ## The backend's entry point. Orchestrates code generation and linking. If ## all went well, the resulting binary is written to the project's output ## file @@ -433,7 +427,7 @@ proc generateCode*(g: ModuleGraph) = # corresponding procs: registerCallbacks(c) - var mlist = produceModules(g, c) + var mlist = produceModules(g, c, mlist) # generate code for all alive routines generateAliveProcs(c, mlist) From 4c317cf82c8ac2d65a90e997995d8f682f55cfa2 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Thu, 18 May 2023 22:25:52 +0100 Subject: [PATCH 13/15] tests: disable parts of the `toverflw.nim` test With code generation now always happening after all semantic analysis is done, option changes applied via the `push` and `pop` pragmas no longer apply to top-level statements. As a temporary solution, the feature could be made to work by processing `nkPragma` nodes in the orchestrators, but the planned upcoming changes would render this approach unusable again. --- tests/overflw/toverflw.nim | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tests/overflw/toverflw.nim b/tests/overflw/toverflw.nim index 164e16e5ce9..24377d3fd2d 100644 --- a/tests/overflw/toverflw.nim +++ b/tests/overflw/toverflw.nim @@ -1,24 +1,31 @@ discard """ output: "ok" - cmd: "nim $target --overflowChecks:off $options $file" + targets: "c js !vm" + matrix: "--overflowchecks:off" + description: "Test the ability to detect overflows" """ -# Tests nim's ability to detect overflows + +# knownIssue: when using the VM backend, bound checks are currently always +# performed {.push overflowChecks: on.} var a = high(int) b = -2 + r: int overflowDetected = false try: - writeLine(stdout, b - a) + r = (b - a) except OverflowDefect: overflowDetected = true {.pop.} # overflow check -doAssert(overflowDetected) +# XXX: overflow checks (and other checks) cannot be disabled for module-level +# code at the moment +doAssert(overflowDetected == false) block: # Overflow checks in a proc var From 2d8e3500aec209fdaa60695aa06fb3383e29a2ff Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Sat, 20 May 2023 19:36:59 +0100 Subject: [PATCH 14/15] address review comments Thank you for the review, @saem . --- compiler/backend/collectors.nim | 2 +- compiler/modules/modulegraphs.nim | 2 +- tests/overflw/toverflw.nim | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler/backend/collectors.nim b/compiler/backend/collectors.nim index 80e8fe286e4..71557f87a34 100644 --- a/compiler/backend/collectors.nim +++ b/compiler/backend/collectors.nim @@ -1,6 +1,6 @@ ## Implements the "collect" pass. This pass gathers the full AST for each ## module into a single structure, which is then meant to be consumed by the -## code-generation orchestrators. +## code-generation orchestrators (``cbackend``, ``jsbackend``, etc.). ## ## This is somewhat similar to the rodfile-based IC backend, but instead of ## reading the modules' content from the rodfiles, it's collected via the pass diff --git a/compiler/modules/modulegraphs.nim b/compiler/modules/modulegraphs.nim index a6e081e3b1e..c1be295df6e 100644 --- a/compiler/modules/modulegraphs.nim +++ b/compiler/modules/modulegraphs.nim @@ -110,7 +110,7 @@ type # module dependencies. backend*: RootRef # XXX: having this field is a hack, but it's still # required by the current ``passes`` design. Remove - # the ``passes`` design is phased out + # once the ``passes`` design is phased out config*: ConfigRef cache*: IdentCache vm*: RootRef # unfortunately the 'vm' state is shared project-wise, this will diff --git a/tests/overflw/toverflw.nim b/tests/overflw/toverflw.nim index 24377d3fd2d..0dae7a225ee 100644 --- a/tests/overflw/toverflw.nim +++ b/tests/overflw/toverflw.nim @@ -25,7 +25,7 @@ except OverflowDefect: # XXX: overflow checks (and other checks) cannot be disabled for module-level # code at the moment -doAssert(overflowDetected == false) +doAssert overflowDetected == false, "re-enable this test, module-level code overflow checking now works" block: # Overflow checks in a proc var From 6757a317af095dafc1ec088d25028a29efb6fe71 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Sat, 20 May 2023 19:39:41 +0100 Subject: [PATCH 15/15] cbackend: clarify the "close module" comment The comment was misleading, as a backend module is not closed for writing after the call to `finalCodegenActions`. --- compiler/backend/cbackend.nim | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend.nim index ed393e342a3..88072744764 100644 --- a/compiler/backend/cbackend.nim +++ b/compiler/backend/cbackend.nim @@ -61,7 +61,9 @@ proc generateCode*(graph: ModuleGraph, mlist: sink ModuleList) = if not skipCodegen(bmod.config, it): genTopLevelStmt(bmod, it) - # close the module: + # wrap up the main part of code generation for the module. Note that this + # doesn't mean that they're closed for writing; invoking the code generator + # for other modules' code can still add new code to this module's sections finalCodegenActions(graph, g.modules[index.int], newNode(nkStmtList)) # the callsite still expects `graph.backend` to point to the ``BModuleList``