From faa65aa551924bef7f8f5b991d57438320101fdd Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Fri, 14 Jul 2023 19:45:54 +0000 Subject: [PATCH 1/3] internal: data-oriented `TLib` storage Summary ======= Replace storing `TLib` objects as `ref`s in `TSym` with storing all `TLib` instances belonging to a module in a sequence that a `TSym` then stores an index into. This slightly reduces the size of `TSym` and, more importantly, makes serializing the symbols and associated `TLib` instances easier: instead of storing a `PackedLib` for *each* packed symbol, the packed symbols now only store an index into the attached-to module's packed lib list. Details ======= For simplicity, the list with the libs for a module is stored in a separate `seq` in `ModuleGraph`. The list cannot be stored in `PContext`, as it needs to be accessible after semantic analysis, at which point `PContext` is already gone. Storing the `TLib` objects for a module happens when the module is closed. While not strictly necessary at the moment, this is a preparation for storing a `PSym` in `TLib.name` that is only set up once the module is closed. Aside from reducing the size of rodfiles, not storing a `PackedLib` for each `PackedSym` also fixes another inefficiency, namely that each deserialized got a unique, possibly duplicated, `TLib` instance. --- compiler/ast/ast.nim | 5 +++- compiler/ast/ast_types.nim | 9 ++++++-- compiler/backend/backends.nim | 10 +++++--- compiler/backend/cgen.nim | 22 +++++++++--------- compiler/ic/cbackend.nim | 1 + compiler/ic/ic.nim | 38 ++++++++++++++++++++----------- compiler/ic/packed_ast.nim | 3 ++- compiler/ic/replayer.nim | 8 +++++++ compiler/ic/rodfiles.nim | 1 + compiler/modules/modulegraphs.nim | 7 ++++++ compiler/modules/modules.nim | 1 + compiler/sem/pragmas.nim | 32 +++++++++++++++----------- compiler/sem/sem.nim | 8 +++++++ compiler/sem/semdata.nim | 31 +++++++++++-------------- 14 files changed, 114 insertions(+), 62 deletions(-) diff --git a/compiler/ast/ast.nim b/compiler/ast/ast.nim index 4e2ea6d23ae..93002e9294d 100644 --- a/compiler/ast/ast.nim +++ b/compiler/ast/ast.nim @@ -29,6 +29,9 @@ import std/[ strutils, tables # For symbol table mapping + ], + experimental/[ + dod_helpers ] export ast_types, ast_idgen, ast_query, int128 @@ -324,7 +327,7 @@ proc assignType*(dest, src: PType) = if src.sym != nil: if dest.sym != nil: dest.sym.flags.incl src.sym.flags-{sfExported} - if dest.sym.annex == nil: dest.sym.annex = src.sym.annex + if dest.sym.annex.isNone: dest.sym.annex = src.sym.annex dest.sym.extFlags.incl src.sym.extFlags if dest.sym.extname.len == 0: dest.sym.extname = src.sym.extname diff --git a/compiler/ast/ast_types.nim b/compiler/ast/ast_types.nim index 63fe2f6b8fe..9e7aed856d5 100644 --- a/compiler/ast/ast_types.nim +++ b/compiler/ast/ast_types.nim @@ -1,6 +1,7 @@ import compiler/ast/lineinfos import compiler/utils/ropes import std/[hashes] +import experimental/dod_helpers from compiler/ast/idents import PIdent, TIdent @@ -44,11 +45,16 @@ type type NodeId* = distinct int32 + LibId* = distinct uint32 proc `==`*(a, b: NodeId): bool {.borrow.} proc hash*(a: NodeId): Hash {.borrow.} proc `$`*(a: NodeId): string {.borrow.} +template indexLike*(_: typedesc[LibId]) = + # makes ``LibId`` available to use with ``opt`` + discard + type TNodeKind* = enum ## order is important, because ranges are used to check whether a node @@ -1633,7 +1639,6 @@ type PScope* = ref TScope - PLib* = ref TLib TSym* {.acyclic.} = object of TIdObj # Keep in sync with PackedSym ## proc and type instantiations are cached in the generic symbol case kind*: TSymKind @@ -1690,7 +1695,7 @@ type ## generation locId*: uint32 ## associates the symbol with a loc in the C code ## generator. 0 means unset. - annex*: PLib ## additional fields (seldom used, so we use a + annex*: opt(LibId) ## additional fields (seldom used, so we use a ## reference to another object to save space) constraint*: PNode ## additional constraints like 'lit|result'; also ## misused for the codegenDecl pragma in the hope diff --git a/compiler/backend/backends.nim b/compiler/backend/backends.nim index 62cba8ebf16..79c20ad46fe 100644 --- a/compiler/backend/backends.nim +++ b/compiler/backend/backends.nim @@ -36,6 +36,9 @@ import compiler/utils/[ containers, idioms + ], + experimental/[ + dod_helpers ] type @@ -565,7 +568,8 @@ proc preprocessDynlib(graph: ModuleGraph, idgen: IdGenerator, # be removed once handling of dynlib procedures and globals is fully # implemented in the ``process`` iterator if exfDynamicLib in sym.extFlags: - if sym.annex.path.kind in nkStrKinds: + let lib = addr graph.libs[moduleId(sym)][sym.annex[]] + if lib.path.kind in nkStrKinds: # it's a string, no need to transform nor scan it discard else: @@ -575,12 +579,12 @@ proc preprocessDynlib(graph: ModuleGraph, idgen: IdGenerator, t: MirTree m: SourceMap - generateCode(graph, {}, sym.annex.path, t, m) + generateCode(graph, {}, lib.path, t, m) for dep in deps(t, {}): # just ignore magics here if dep.kind in routineKinds + {skConst}: deps.add dep - sym.annex.path = generateAST(graph, idgen, sym.owner, t, m) + lib.path = generateAST(graph, idgen, sym.owner, t, m) proc preprocessDynlib(graph: ModuleGraph, mlist: ModuleList, data: var DiscoveryData) = diff --git a/compiler/backend/cgen.nim b/compiler/backend/cgen.nim index 2938b7e8ccd..1c781a60de9 100644 --- a/compiler/backend/cgen.nim +++ b/compiler/backend/cgen.nim @@ -41,6 +41,7 @@ import msgs ], compiler/utils/[ + containers, platform, nversion, bitsets, @@ -58,7 +59,8 @@ import ccgutils, cgendata ], - compiler/plugins/[ + experimental/[ + dod_helpers ] # xxx: reports are a code smell meaning data types are misplaced... @@ -145,8 +147,7 @@ proc isSimpleConst(typ: PType): bool = proc useHeader(m: BModule, sym: PSym) = if exfHeader in sym.extFlags: - assert(sym.annex != nil) - let str = getStr(sym.annex.path) + let str = getStr(m.g.graph.libs[sym.itemId.module][sym.annex[]].path) m.includeHeader(str) proc cgsym(m: BModule, name: string): Rope @@ -628,13 +629,12 @@ include ccgexprs # ----------------------------- dynamic library handling ----------------- # We don't finalize dynamic libs as the OS does this for us. -proc isGetProcAddr(lib: PLib): bool = +proc isGetProcAddr(lib: TLib): bool = let n = lib.path result = n.kind in nkCallKinds and n.typ != nil and n.typ.kind in {tyPointer, tyProc} -proc loadDynamicLib(m: BModule, lib: PLib) = - assert(lib != nil) +proc loadDynamicLib(m: BModule, lib: var TLib) = if not lib.generated: lib.generated = true var tmp = getTempName(m) @@ -686,10 +686,10 @@ proc mangleDynLibProc(sym: PSym): Rope = result = rope(strutils.`%`("Dl_$1_", $sym.id)) proc symInDynamicLib*(m: BModule, sym: PSym) = - var lib = sym.annex - let isCall = isGetProcAddr(lib) + var lib = addr m.g.graph.libs[sym.itemId.module][sym.annex[]] + let isCall = isGetProcAddr(lib[]) let extname = sym.extname - if not isCall: loadDynamicLib(m, lib) + if not isCall: loadDynamicLib(m, lib[]) let tmp = mangleDynLibProc(sym) # XXX: dynlib procedures should be treated as globals here (because that's # what they are, really) @@ -725,9 +725,9 @@ proc symInDynamicLib*(m: BModule, sym: PSym) = m.s[cfsVars].addf("$2 $1;$n", [tmp, getTypeDesc(m, sym.typ, skVar)]) proc varInDynamicLib(m: BModule, sym: PSym) = - var lib = sym.annex + var lib = addr m.g.graph.libs[sym.itemId.module][sym.annex[]] let extname = sym.extname - loadDynamicLib(m, lib) + loadDynamicLib(m, lib[]) let tmp = mangleDynLibProc(sym) incl(m.globals[sym].flags, lfIndirect) m.globals[sym].r = tmp # from now on we only need the internal name diff --git a/compiler/ic/cbackend.nim b/compiler/ic/cbackend.nim index 850079b3c82..e8d282c0460 100644 --- a/compiler/ic/cbackend.nim +++ b/compiler/ic/cbackend.nim @@ -131,6 +131,7 @@ proc generateCode*(g: ModuleGraph) = # XXX: these state changes were already applied during semantic analysis, # but ``resetForBackend`` (unnecessarily) throws them away again for i in 0..high(g.packed): + replayLibs(g, i) replayBackendRoutines(g, i) var alive = computeAliveSyms(g.packed, g, g.config) diff --git a/compiler/ic/ic.nim b/compiler/ic/ic.nim index ce0596658c5..acbc15fb804 100644 --- a/compiler/ic/ic.nim +++ b/compiler/ic/ic.nim @@ -25,6 +25,7 @@ import options ], compiler/utils/[ + containers, ropes, pathutils ] @@ -69,6 +70,7 @@ type attachedOps*: seq[(TTypeAttachedOp, PackedItemId, PackedItemId)] methodsPerType*: seq[(PackedItemId, int, PackedItemId)] enumToStringProcs*: seq[(PackedItemId, PackedItemId)] + libs*: seq[PackedLib] emittedTypeInfo*: seq[string] backendFlags*: set[ModuleBackendFlag] @@ -383,9 +385,7 @@ proc storeType(t: PType; c: var PackedEncoder; m: var PackedModule): PackedItemI # fill the reserved slot, nothing else: m.types[t.uniqueId.item] = p -proc toPackedLib(l: PLib; c: var PackedEncoder; m: var PackedModule): PackedLib = - ## the plib hangs off the psym via the .annex field - if l.isNil: return +proc toPackedLib(l: TLib; c: var PackedEncoder; m: var PackedModule): PackedLib = result.kind = l.kind result.generated = l.generated result.isOverriden = l.isOverriden @@ -424,7 +424,7 @@ proc storeSym*(s: PSym; c: var PackedEncoder; m: var PackedModule): PackedItemId p.typ = s.typ.storeType(c, m) c.addMissing s.owner p.owner = s.owner.safeItemId(c, m) - p.annex = toPackedLib(s.annex, c, m) + p.annex = s.annex # fill the reserved slot, nothing else: m.syms[s.itemId.item] = p @@ -558,6 +558,10 @@ proc storeAttachedOp*(c: var PackedEncoder; m: var PackedModule, kind: TTypeAtta ## Records a type-bound operator attachment action to module `m`. m.attachedOps.add((kind, storeTypeLater(t, c, m), storeSymLater(s, c, m))) +proc storeLib*(c: var PackedEncoder, m: var PackedModule, lib: TLib) = + m.libs.add toPackedLib(lib, c, m) + flush c, m + proc loadError(err: RodFileError; filename: AbsoluteFile; config: ConfigRef;) = case err of cannotOpen: @@ -626,6 +630,7 @@ proc loadRodFile*(filename: AbsoluteFile; m: var PackedModule; config: ConfigRef loadSeqSection attachedOpsSection, m.attachedOps loadSeqSection methodsPerTypeSection, m.methodsPerType loadSeqSection enumToStringProcsSection, m.enumToStringProcs + loadSeqSection libsSection, m.libs loadSeqSection typeInfoSection, m.emittedTypeInfo f.loadSection backendFlagsSection @@ -692,6 +697,7 @@ proc saveRodFile*(filename: AbsoluteFile; encoder: var PackedEncoder; m: var Pac storeSeqSection attachedOpsSection, m.attachedOps storeSeqSection methodsPerTypeSection, m.methodsPerType storeSeqSection enumToStringProcsSection, m.enumToStringProcs + storeSeqSection libsSection, m.libs storeSeqSection typeInfoSection, m.emittedTypeInfo f.storeSection backendFlagsSection @@ -852,14 +858,10 @@ template loadAstBodyLazy(p, field) = result.field = loadProcHeader(c, g, si, g[si].fromDisk.bodies, NodePos p.field) proc loadLib(c: var PackedDecoder; g: var PackedModuleGraph; - si, item: int32; l: PackedLib): PLib = - # XXX: hack; assume a zero LitId means the PackedLib is all zero (empty) - if l.name.int == 0: - result = nil - else: - result = PLib(generated: l.generated, isOverriden: l.isOverriden, - kind: l.kind, name: rope g[si].fromDisk.strings[l.name]) - loadAstBody(l, path) + si: int; l: PackedLib): TLib = + result = TLib(generated: l.generated, isOverriden: l.isOverriden, + kind: l.kind, name: rope g[si].fromDisk.strings[l.name]) + loadAstBody(l, path) proc symBodyFromPacked(c: var PackedDecoder; g: var PackedModuleGraph; s: PackedSym; si, item: int32; result: PSym) = @@ -869,7 +871,7 @@ proc symBodyFromPacked(c: var PackedDecoder; g: var PackedModuleGraph; loadAstBodyLazy(s, ast) else: loadAstBody(s, ast) - result.annex = loadLib(c, g, si, item, s.annex) + result.annex = s.annex if s.kind in {skLet, skVar, skField, skForVar}: result.guard = loadSym(c, g, si, s.guard) @@ -1094,6 +1096,12 @@ proc loadSymFromId*(config: ConfigRef, cache: IdentCache; setupDecoder() result = loadSym(decoder, g, module, id) +proc loadLibs*(config: ConfigRef, cache: IdentCache, + g: var PackedModuleGraph, module: int): Store[LibId, TLib] = + setupDecoder() + for it in g[module].fromDisk.libs.items: + discard result.add(loadLib(decoder, g, module, it)) + proc translateId*(id: PackedItemId; g: PackedModuleGraph; thisModule: int; config: ConfigRef): ItemId = if id.module == LitId(0): ItemId(module: thisModule.int32, item: id.item) @@ -1235,6 +1243,10 @@ proc rodViewer*(rodfile: AbsoluteFile; config: ConfigRef, cache: IdentCache) = else: echo " local ID: ", i, " kind ", m.syms[i].kind + echo "all lib objects" + for it in m.libs.items: + echo " kind: ", it.kind, " name: ", it.name, " path: ", it.path + echo "symbols: ", m.syms.len, " types: ", m.types.len, " top level nodes: ", m.topLevel.nodes.len, " other nodes: ", m.bodies.nodes.len, " strings: ", m.strings.len, " numbers: ", m.numbers.len diff --git a/compiler/ic/packed_ast.nim b/compiler/ic/packed_ast.nim index e718ad57164..112e69b9b85 100644 --- a/compiler/ic/packed_ast.nim +++ b/compiler/ic/packed_ast.nim @@ -13,6 +13,7 @@ ## it is superior. import std/[hashes, tables, strtabs] +import experimental/[dod_helpers] import compiler/ic/bitabs import compiler/ast/ast, compiler/front/options @@ -61,7 +62,7 @@ type offset*: int externalName*: LitId # instead of TLoc extFlags*: ExternalFlags - annex*: PackedLib + annex*: opt(LibId) constraint*: NodeId PackedType* = object diff --git a/compiler/ic/replayer.nim b/compiler/ic/replayer.nim index 44c6679792e..6c8f1a1cd16 100644 --- a/compiler/ic/replayer.nim +++ b/compiler/ic/replayer.nim @@ -186,3 +186,11 @@ proc replayGenericCacheInformation*(g: ModuleGraph; module: int) = for it in mitems(g.packed[module].fromDisk.pureEnums): let symId = FullId(module: module, packed: PackedItemId(module: LitId(0), item: it)) g.ifaces[module].pureEnums.add LazySym(id: symId, sym: nil) + +proc replayLibs*(g: ModuleGraph, module: int) = + ## Loads the packed library information for `module` into `g`. + # XXX: libs are not really replayed state changes... + if module >= g.libs.len: + g.libs.setLen(module + 1) + + g.libs[module] = loadLibs(g.config, g.cache, g.packed, module) \ No newline at end of file diff --git a/compiler/ic/rodfiles.nim b/compiler/ic/rodfiles.nim index a52bd18b33a..8da2518fb46 100644 --- a/compiler/ic/rodfiles.nim +++ b/compiler/ic/rodfiles.nim @@ -93,6 +93,7 @@ type attachedOpsSection methodsPerTypeSection enumToStringProcsSection + libsSection typeInfoSection # required by the backend backendFlagsSection aliveSymsSection # beware, this is stored in a `.alivesyms` file. diff --git a/compiler/modules/modulegraphs.nim b/compiler/modules/modulegraphs.nim index b615efe4231..6c2823781e3 100644 --- a/compiler/modules/modulegraphs.nim +++ b/compiler/modules/modulegraphs.nim @@ -29,6 +29,7 @@ import idents, ], compiler/utils/[ + containers, pathutils, btrees, ropes, @@ -98,6 +99,8 @@ type enumToStringProcs*: Table[ItemId, LazySym] emittedTypeInfo*: Table[string, FileIndex] + libs*: seq[Store[LibId, TLib]] ## indexed by module position + startupPackedConfig*: PackedConfig packageSyms*: TStrTable deps*: IntSet # the dependency graph or potentially its transitive closure. @@ -377,6 +380,10 @@ proc getToStringProc*(g: ModuleGraph; t: PType): PSym = proc setToStringProc*(g: ModuleGraph; t: PType; value: PSym) = g.enumToStringProcs[t.itemId] = LazySym(sym: value) +proc storeLib*(g: ModuleGraph, module: int, lib: TLib) = + if g.config.symbolFiles != disabledSf: + storeLib(g.encoders[module], g.packed[module].fromDisk, lib) + iterator methodsForGeneric*(g: ModuleGraph; t: PType): (int, PSym) = if g.methodsPerType.contains(t.itemId): for it in mitems g.methodsPerType[t.itemId]: diff --git a/compiler/modules/modules.nim b/compiler/modules/modules.nim index 85df1cfba8f..3286cff44fb 100644 --- a/compiler/modules/modules.nim +++ b/compiler/modules/modules.nim @@ -144,6 +144,7 @@ proc compileModule*(graph: ModuleGraph; fileIdx: FileIndex; flags: TSymFlags, fr registerModuleById(graph, m) replayStateChanges(graph.packed[m.int].module, graph) replayGenericCacheInformation(graph, m.int) + replayLibs(graph, m.int) elif graph.isDirty(result): result.flags.excl sfDirty # reset module fields: diff --git a/compiler/sem/pragmas.nim b/compiler/sem/pragmas.nim index 86e1f848232..d6f919ae5fa 100644 --- a/compiler/sem/pragmas.nim +++ b/compiler/sem/pragmas.nim @@ -27,13 +27,15 @@ import lineinfos ], compiler/modules/[ - magicsys + magicsys, + modulegraphs ], compiler/front/[ msgs, options ], compiler/utils/[ + containers, pathutils, debugutils, idioms @@ -44,6 +46,9 @@ import ], compiler/backend/[ extccomp + ], + experimental/[ + dod_helpers ] # xxx: reports are a code smell meaning data types are misplaced @@ -353,16 +358,17 @@ proc processCallConv(c: PContext, n: PNode): PNode = else: result = c.config.newError(n, PAstDiag(kind: adSemCallconvExpected)) -proc getLib(c: PContext, kind: TLibKind, path: PNode): PLib = - for it in c.libs: +proc getLib(c: PContext, kind: TLibKind, path: PNode): LibId = + for id, it in c.libs.pairs: if it.kind == kind and trees.exprStructuralEquivalent(it.path, path): - return it + return id - result = newLib(kind) - result.path = path - c.libs.add result + var lib = initLib(kind) + lib.path = path if path.kind in {nkStrLit..nkTripleStrLit}: - result.isOverriden = options.isDynlibOverride(c.config, path.strVal) + lib.isOverriden = options.isDynlibOverride(c.config, path.strVal) + + result = c.libs.add(lib) proc expectDynlibNode(c: PContext, n: PNode): PNode = ## `n` must be a callable, this will produce the ast for the callable or @@ -389,8 +395,8 @@ proc processDynLib(c: PContext, n: PNode, sym: PSym): PNode = result = libNode else: let lib = getLib(c, libDynamic, libNode) - if not lib.isOverriden: - c.optionStack[^1].dynlib = lib + if not c.libs[lib].isOverriden: + c.optionStack[^1].dynlib = someOpt(lib) else: if n.kind in nkPragmaCallKinds: let libNode = expectDynlibNode(c, n) @@ -399,7 +405,7 @@ proc processDynLib(c: PContext, n: PNode, sym: PSym): PNode = result = libNode else: var lib = getLib(c, libDynamic, libNode) - if not lib.isOverriden: + if not c.libs[lib].isOverriden: addToLib(lib, sym) incl(sym.extFlags, exfDynamicLib) else: @@ -1851,10 +1857,10 @@ proc inheritDynlib*(c: PContext, sym: PSym) = ## applicable. The dynlib pragma can be applied if the symbol is marked as ## imported, but no header nor dynlib are specified. let lib = c.optionStack[^1].dynlib - if lib != nil and sfImportc in sym.flags and + if lib.isSome and sfImportc in sym.flags and {exfDynamicLib, exfHeader} * sym.extFlags == {}: incl(sym.extFlags, exfDynamicLib) - addToLib(lib, sym) + addToLib(lib[], sym) if sym.extname == "": # XXX: this looks like a unnecessary defensive check. If the symbol is # marked as imported, it already has an external name set diff --git a/compiler/sem/sem.nim b/compiler/sem/sem.nim index bfdd1f8438b..da9cf163cf7 100644 --- a/compiler/sem/sem.nim +++ b/compiler/sem/sem.nim @@ -46,6 +46,7 @@ import msgs ], compiler/utils/[ + containers, ropes, platform, nversion, @@ -829,6 +830,9 @@ proc myOpen(graph: ModuleGraph; module: PSym; graph.config.internalAssert(c.p == nil, module.info, "sem.myOpen") + if module.position >= graph.libs.len: + graph.libs.setLen(module.position + 1) + c.semConstExpr = semConstExpr c.semExpr = semExpr c.semTryExpr = tryExpr @@ -935,6 +939,10 @@ proc myClose(graph: ModuleGraph; context: PPassContext, n: PNode): PNode = var c = PContext(context) if c.config.cmd == cmdIdeTools and not c.suggestionsMade: suggestSentinel(c) + + for it in c.libs.items: + storeLib(graph, c.module.position, it) + closeScope(c) # close module's scope rawCloseScope(c) # imported symbols; don't check for unused ones! reportUnusedModules(c) diff --git a/compiler/sem/semdata.nim b/compiler/sem/semdata.nim index 3d1270f9a8a..0538f7ea495 100644 --- a/compiler/sem/semdata.nim +++ b/compiler/sem/semdata.nim @@ -38,8 +38,10 @@ import ic ], compiler/utils/[ - pathutils, - astrepr, + containers + ], + experimental/[ + dod_helpers ] from compiler/ast/reports_sem import reportAst, @@ -55,7 +57,7 @@ type TOptionEntry* = object ## entries to put on a stack for pragma parsing options*: TOptions defaultCC*: TCallingConvention - dynlib*: PLib + dynlib*: opt(LibId) notes*: ReportKinds features*: set[Feature] otherPragmas*: PNode ## every pragma can be pushed @@ -563,14 +565,6 @@ type ## - pragmas: query user pragmas to see if we need to process it now ## - semstmts: query user pragmas for proc/conv/etc annotation lookup ## - semtypes: query user pragmas for type section pragmas - libs*: seq[PLib] - ## all libs used by this module, mostly setup pragmas. - ## - ## written: - ## - semdata: init - ## - pragmas: add to dynamic libs - ## read: - ## - pragmas: query dynamic libs # hacks used for lookups isAmbiguous*: bool # little hack <-- it never is "little" @@ -805,7 +799,7 @@ proc newOptionEntry*(conf: ConfigRef): POptionEntry = new(result) result.options = conf.options result.defaultCC = ccNimCall - result.dynlib = nil + result.dynlib = noneOpt(LibId) result.notes = conf.notes result.warningAsErrors = conf.warningAsErrors @@ -830,7 +824,6 @@ proc popOptionEntry*(c: PContext) = proc newContext*(graph: ModuleGraph; module: PSym): PContext = new(result) result.optionStack = @[newOptionEntry(graph.config)] - result.libs = @[] result.module = module result.friendModules = @[module] result.converters = @[] @@ -904,14 +897,16 @@ proc reexportSym*(c: PContext; s: PSym) = if c.config.symbolFiles != disabledSf: addReexport(c.encoder, c.packedRepr, s) -proc newLib*(kind: TLibKind): PLib = - new(result) - result.kind = kind #initObjectSet(result.syms) +template libs*(c: PContext): Store[LibId, TLib] = + c.graph.libs[c.module.position] + +proc initLib*(kind: TLibKind): TLib = + result = TLib(kind: kind) -proc addToLib*(lib: PLib, sym: PSym) = +proc addToLib*(lib: LibId, sym: PSym) = #if sym.annex != nil and not isGenericRoutine(sym): # LocalError(sym.info, errInvalidPragma) - sym.annex = lib + sym.annex = someOpt(lib) proc newTypeS*(kind: TTypeKind, c: PContext): PType = result = newType(kind, nextTypeId(c.idgen), getCurrOwner(c)) From 6f3988a15c41cfc24a394bd9daab6d8bc843339b Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Sat, 15 Jul 2023 14:36:48 +0000 Subject: [PATCH 2/3] support foreign `TLib` references A symbol can now reference the `TLib` belonging to a module that is not the one the symbol is attached to. This works around an issue with aliased structural types. Also, use the ID provided by the module's ID generator instead of the `skModule`s position. While both represent the same thing (a ``FileIndex``), querying the ID generator is more consistent with how it works elsewhere. --- compiler/ast/ast.nim | 5 +---- compiler/ast/ast_types.nim | 24 +++++++++++++++++------- compiler/backend/backends.nim | 5 +---- compiler/backend/ccgtypes.nim | 2 +- compiler/backend/cgen.nim | 11 +++++------ compiler/ic/ic.nim | 5 ++--- compiler/ic/packed_ast.nim | 3 +-- compiler/modules/modulegraphs.nim | 20 +++++++++++++++++--- compiler/sem/pragmas.nim | 18 +++++++----------- compiler/sem/sem.nim | 6 +----- compiler/sem/semdata.nim | 28 ++++++++++++++++------------ 11 files changed, 69 insertions(+), 58 deletions(-) diff --git a/compiler/ast/ast.nim b/compiler/ast/ast.nim index 93002e9294d..54bbe3187b2 100644 --- a/compiler/ast/ast.nim +++ b/compiler/ast/ast.nim @@ -29,9 +29,6 @@ import std/[ strutils, tables # For symbol table mapping - ], - experimental/[ - dod_helpers ] export ast_types, ast_idgen, ast_query, int128 @@ -327,7 +324,7 @@ proc assignType*(dest, src: PType) = if src.sym != nil: if dest.sym != nil: dest.sym.flags.incl src.sym.flags-{sfExported} - if dest.sym.annex.isNone: dest.sym.annex = src.sym.annex + if dest.sym.annex.isNil: dest.sym.annex = src.sym.annex dest.sym.extFlags.incl src.sym.extFlags if dest.sym.extname.len == 0: dest.sym.extname = src.sym.extname diff --git a/compiler/ast/ast_types.nim b/compiler/ast/ast_types.nim index 9e7aed856d5..a7e5619e7d7 100644 --- a/compiler/ast/ast_types.nim +++ b/compiler/ast/ast_types.nim @@ -1,7 +1,6 @@ import compiler/ast/lineinfos import compiler/utils/ropes import std/[hashes] -import experimental/dod_helpers from compiler/ast/idents import PIdent, TIdent @@ -45,16 +44,11 @@ type type NodeId* = distinct int32 - LibId* = distinct uint32 proc `==`*(a, b: NodeId): bool {.borrow.} proc hash*(a: NodeId): Hash {.borrow.} proc `$`*(a: NodeId): string {.borrow.} -template indexLike*(_: typedesc[LibId]) = - # makes ``LibId`` available to use with ``opt`` - discard - type TNodeKind* = enum ## order is important, because ranges are used to check whether a node @@ -1621,6 +1615,20 @@ type name*: Rope path*: PNode ## can be a string literal! + LibId* = object + ## Identifies a ``TLib`` instance. The default value means 'none'. + # XXX: ideally, a ``LibId`` would be a single 32-bit index into the + # surrounding module, but this is not possible at the moment, because + # of how aliased structural types work. + # + # type A {.header: ... .} = int # declared in module 'A' + # type B = A # declared in module 'B' + # + # Here, 'B' is not a ``tyAlias`` type, but rather a ``tyInt``, with + # the symbol information from 'A' (including the ``LibId``) copied + # over. + module*: int32 ## the ID of the module the lib object is part + index*: uint32 ## 1-based index. Zero means 'none' CompilesId* = int ## id that is used for the caching logic within ## ``system.compiles``. See the seminst module. @@ -1695,7 +1703,7 @@ type ## generation locId*: uint32 ## associates the symbol with a loc in the C code ## generator. 0 means unset. - annex*: opt(LibId) ## additional fields (seldom used, so we use a + annex*: LibId ## additional fields (seldom used, so we use a ## reference to another object to save space) constraint*: PNode ## additional constraints like 'lit|result'; also ## misused for the codegenDecl pragma in the hope @@ -1897,3 +1905,5 @@ proc `comment=`*(n: PNode, a: string) = gconfig.comments.del(n.id) proc setUseIc*(useIc: bool) = gconfig.useIc = useIc + +func isNil*(id: LibId): bool = id.index == 0 \ No newline at end of file diff --git a/compiler/backend/backends.nim b/compiler/backend/backends.nim index 79c20ad46fe..c2914090c52 100644 --- a/compiler/backend/backends.nim +++ b/compiler/backend/backends.nim @@ -36,9 +36,6 @@ import compiler/utils/[ containers, idioms - ], - experimental/[ - dod_helpers ] type @@ -568,7 +565,7 @@ proc preprocessDynlib(graph: ModuleGraph, idgen: IdGenerator, # be removed once handling of dynlib procedures and globals is fully # implemented in the ``process`` iterator if exfDynamicLib in sym.extFlags: - let lib = addr graph.libs[moduleId(sym)][sym.annex[]] + let lib = addr graph.getLib(sym.annex) if lib.path.kind in nkStrKinds: # it's a string, no need to transform nor scan it discard diff --git a/compiler/backend/ccgtypes.nim b/compiler/backend/ccgtypes.nim index 8e32947ed4f..a93f9e4944d 100644 --- a/compiler/backend/ccgtypes.nim +++ b/compiler/backend/ccgtypes.nim @@ -11,7 +11,7 @@ # ------------------------- Name Mangling -------------------------------- -import compiler/sem/sighashes, compiler/modules/modulegraphs +import compiler/sem/sighashes proc isKeyword(w: PIdent): bool = # Nim and C++ share some keywords diff --git a/compiler/backend/cgen.nim b/compiler/backend/cgen.nim index 1c781a60de9..ee620b1e6c3 100644 --- a/compiler/backend/cgen.nim +++ b/compiler/backend/cgen.nim @@ -35,13 +35,13 @@ import ], compiler/modules/[ magicsys, + modulegraphs ], compiler/front/[ options, msgs ], compiler/utils/[ - containers, platform, nversion, bitsets, @@ -59,8 +59,7 @@ import ccgutils, cgendata ], - experimental/[ - dod_helpers + compiler/plugins/[ ] # xxx: reports are a code smell meaning data types are misplaced... @@ -147,7 +146,7 @@ proc isSimpleConst(typ: PType): bool = proc useHeader(m: BModule, sym: PSym) = if exfHeader in sym.extFlags: - let str = getStr(m.g.graph.libs[sym.itemId.module][sym.annex[]].path) + let str = getStr(m.g.graph.getLib(sym.annex).path) m.includeHeader(str) proc cgsym(m: BModule, name: string): Rope @@ -686,7 +685,7 @@ proc mangleDynLibProc(sym: PSym): Rope = result = rope(strutils.`%`("Dl_$1_", $sym.id)) proc symInDynamicLib*(m: BModule, sym: PSym) = - var lib = addr m.g.graph.libs[sym.itemId.module][sym.annex[]] + var lib = addr m.g.graph.getLib(sym.annex) let isCall = isGetProcAddr(lib[]) let extname = sym.extname if not isCall: loadDynamicLib(m, lib[]) @@ -725,7 +724,7 @@ proc symInDynamicLib*(m: BModule, sym: PSym) = m.s[cfsVars].addf("$2 $1;$n", [tmp, getTypeDesc(m, sym.typ, skVar)]) proc varInDynamicLib(m: BModule, sym: PSym) = - var lib = addr m.g.graph.libs[sym.itemId.module][sym.annex[]] + var lib = addr m.g.graph.getLib(sym.annex) let extname = sym.extname loadDynamicLib(m, lib[]) let tmp = mangleDynLibProc(sym) diff --git a/compiler/ic/ic.nim b/compiler/ic/ic.nim index acbc15fb804..faa553de988 100644 --- a/compiler/ic/ic.nim +++ b/compiler/ic/ic.nim @@ -25,7 +25,6 @@ import options ], compiler/utils/[ - containers, ropes, pathutils ] @@ -1097,10 +1096,10 @@ proc loadSymFromId*(config: ConfigRef, cache: IdentCache; result = loadSym(decoder, g, module, id) proc loadLibs*(config: ConfigRef, cache: IdentCache, - g: var PackedModuleGraph, module: int): Store[LibId, TLib] = + g: var PackedModuleGraph, module: int): seq[TLib] = setupDecoder() for it in g[module].fromDisk.libs.items: - discard result.add(loadLib(decoder, g, module, it)) + result.add(loadLib(decoder, g, module, it)) proc translateId*(id: PackedItemId; g: PackedModuleGraph; thisModule: int; config: ConfigRef): ItemId = if id.module == LitId(0): diff --git a/compiler/ic/packed_ast.nim b/compiler/ic/packed_ast.nim index 112e69b9b85..3a1beca8676 100644 --- a/compiler/ic/packed_ast.nim +++ b/compiler/ic/packed_ast.nim @@ -13,7 +13,6 @@ ## it is superior. import std/[hashes, tables, strtabs] -import experimental/[dod_helpers] import compiler/ic/bitabs import compiler/ast/ast, compiler/front/options @@ -62,7 +61,7 @@ type offset*: int externalName*: LitId # instead of TLoc extFlags*: ExternalFlags - annex*: opt(LibId) + annex*: LibId constraint*: NodeId PackedType* = object diff --git a/compiler/modules/modulegraphs.nim b/compiler/modules/modulegraphs.nim index 6c2823781e3..050b28fd4e9 100644 --- a/compiler/modules/modulegraphs.nim +++ b/compiler/modules/modulegraphs.nim @@ -99,7 +99,7 @@ type enumToStringProcs*: Table[ItemId, LazySym] emittedTypeInfo*: Table[string, FileIndex] - libs*: seq[Store[LibId, TLib]] ## indexed by module position + libs*: seq[seq[TLib]] ## indexed by ``LibId`` startupPackedConfig*: PackedConfig packageSyms*: TStrTable @@ -380,9 +380,23 @@ proc getToStringProc*(g: ModuleGraph; t: PType): PSym = proc setToStringProc*(g: ModuleGraph; t: PType; value: PSym) = g.enumToStringProcs[t.itemId] = LazySym(sym: value) -proc storeLib*(g: ModuleGraph, module: int, lib: TLib) = +func getLib*(g: ModuleGraph, id: LibId): var TLib = + assert not isNil(id), "id is 'none'" + result = g.libs[id.module][id.index - 1] + +proc addLib*(g: ModuleGraph, module: int, lib: sink TLib): LibId = + ## Registers (adds) `lib` with the given module and returns the ID through + ## which the instance can be accessed from now on. + g.libs[module].add lib + # the index is 1-based, so we can directly use the length + result = LibId(module: module.int32, index: g.libs[module].len.uint32) + +proc storeLibs*(g: ModuleGraph, module: int) = + ## Writes the ``TLib`` instances associated with `module` to the module's + ## packed representation. Only relevant for IC. if g.config.symbolFiles != disabledSf: - storeLib(g.encoders[module], g.packed[module].fromDisk, lib) + for lib in g.libs[module].items: + storeLib(g.encoders[module], g.packed[module].fromDisk, lib) iterator methodsForGeneric*(g: ModuleGraph; t: PType): (int, PSym) = if g.methodsPerType.contains(t.itemId): diff --git a/compiler/sem/pragmas.nim b/compiler/sem/pragmas.nim index d6f919ae5fa..1daf651b77c 100644 --- a/compiler/sem/pragmas.nim +++ b/compiler/sem/pragmas.nim @@ -35,7 +35,6 @@ import options ], compiler/utils/[ - containers, pathutils, debugutils, idioms @@ -46,9 +45,6 @@ import ], compiler/backend/[ extccomp - ], - experimental/[ - dod_helpers ] # xxx: reports are a code smell meaning data types are misplaced @@ -359,7 +355,7 @@ proc processCallConv(c: PContext, n: PNode): PNode = result = c.config.newError(n, PAstDiag(kind: adSemCallconvExpected)) proc getLib(c: PContext, kind: TLibKind, path: PNode): LibId = - for id, it in c.libs.pairs: + for id, it in c.libs: if it.kind == kind and trees.exprStructuralEquivalent(it.path, path): return id @@ -368,7 +364,7 @@ proc getLib(c: PContext, kind: TLibKind, path: PNode): LibId = if path.kind in {nkStrLit..nkTripleStrLit}: lib.isOverriden = options.isDynlibOverride(c.config, path.strVal) - result = c.libs.add(lib) + result = c.addLib(lib) proc expectDynlibNode(c: PContext, n: PNode): PNode = ## `n` must be a callable, this will produce the ast for the callable or @@ -395,8 +391,8 @@ proc processDynLib(c: PContext, n: PNode, sym: PSym): PNode = result = libNode else: let lib = getLib(c, libDynamic, libNode) - if not c.libs[lib].isOverriden: - c.optionStack[^1].dynlib = someOpt(lib) + if not c[lib].isOverriden: + c.optionStack[^1].dynlib = lib else: if n.kind in nkPragmaCallKinds: let libNode = expectDynlibNode(c, n) @@ -405,7 +401,7 @@ proc processDynLib(c: PContext, n: PNode, sym: PSym): PNode = result = libNode else: var lib = getLib(c, libDynamic, libNode) - if not c.libs[lib].isOverriden: + if not c[lib].isOverriden: addToLib(lib, sym) incl(sym.extFlags, exfDynamicLib) else: @@ -1857,10 +1853,10 @@ proc inheritDynlib*(c: PContext, sym: PSym) = ## applicable. The dynlib pragma can be applied if the symbol is marked as ## imported, but no header nor dynlib are specified. let lib = c.optionStack[^1].dynlib - if lib.isSome and sfImportc in sym.flags and + if not lib.isNil and sfImportc in sym.flags and {exfDynamicLib, exfHeader} * sym.extFlags == {}: incl(sym.extFlags, exfDynamicLib) - addToLib(lib[], sym) + addToLib(lib, sym) if sym.extname == "": # XXX: this looks like a unnecessary defensive check. If the symbol is # marked as imported, it already has an external name set diff --git a/compiler/sem/sem.nim b/compiler/sem/sem.nim index da9cf163cf7..3b9c001fe26 100644 --- a/compiler/sem/sem.nim +++ b/compiler/sem/sem.nim @@ -46,7 +46,6 @@ import msgs ], compiler/utils/[ - containers, ropes, platform, nversion, @@ -939,10 +938,7 @@ proc myClose(graph: ModuleGraph; context: PPassContext, n: PNode): PNode = var c = PContext(context) if c.config.cmd == cmdIdeTools and not c.suggestionsMade: suggestSentinel(c) - - for it in c.libs.items: - storeLib(graph, c.module.position, it) - + storeLibs(graph, c.idgen.module) closeScope(c) # close module's scope rawCloseScope(c) # imported symbols; don't check for unused ones! reportUnusedModules(c) diff --git a/compiler/sem/semdata.nim b/compiler/sem/semdata.nim index 0538f7ea495..85ee7324019 100644 --- a/compiler/sem/semdata.nim +++ b/compiler/sem/semdata.nim @@ -36,12 +36,6 @@ import ], compiler/ic/[ ic - ], - compiler/utils/[ - containers - ], - experimental/[ - dod_helpers ] from compiler/ast/reports_sem import reportAst, @@ -57,7 +51,7 @@ type TOptionEntry* = object ## entries to put on a stack for pragma parsing options*: TOptions defaultCC*: TCallingConvention - dynlib*: opt(LibId) + dynlib*: LibId notes*: ReportKinds features*: set[Feature] otherPragmas*: PNode ## every pragma can be pushed @@ -799,7 +793,7 @@ proc newOptionEntry*(conf: ConfigRef): POptionEntry = new(result) result.options = conf.options result.defaultCC = ccNimCall - result.dynlib = noneOpt(LibId) + result.dynlib = LibId() result.notes = conf.notes result.warningAsErrors = conf.warningAsErrors @@ -897,16 +891,26 @@ proc reexportSym*(c: PContext; s: PSym) = if c.config.symbolFiles != disabledSf: addReexport(c.encoder, c.packedRepr, s) -template libs*(c: PContext): Store[LibId, TLib] = - c.graph.libs[c.module.position] - proc initLib*(kind: TLibKind): TLib = result = TLib(kind: kind) proc addToLib*(lib: LibId, sym: PSym) = #if sym.annex != nil and not isGenericRoutine(sym): # LocalError(sym.info, errInvalidPragma) - sym.annex = someOpt(lib) + assert not isNil(lib) + sym.annex = lib + +proc addLib*(c: PContext, lib: sink TLib): LibId = + c.graph.addLib(c.idgen.module, lib) + +func `[]`*(c: PContext, id: LibId): var TLib = + c.graph.getLib(id) + +iterator libs*(c: PContext): (LibId, var TLib) = + ## Returns all ``TLib`` instances associated with `c`. + let pos = c.idgen.module + for i in 0.. Date: Sat, 15 Jul 2023 14:36:48 +0000 Subject: [PATCH 3/3] ic: properly handle module references A module ID isn't necessarily the same before and after loading/storing a rodfile. Loading and storing of `LibId`s now considers this. --- compiler/ic/ic.nim | 17 +++++++++++++---- compiler/ic/packed_ast.nim | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/compiler/ic/ic.nim b/compiler/ic/ic.nim index faa553de988..1cb200c0f2c 100644 --- a/compiler/ic/ic.nim +++ b/compiler/ic/ic.nim @@ -423,7 +423,8 @@ proc storeSym*(s: PSym; c: var PackedEncoder; m: var PackedModule): PackedItemId p.typ = s.typ.storeType(c, m) c.addMissing s.owner p.owner = s.owner.safeItemId(c, m) - p.annex = s.annex + if s.annex.index != 0: + p.annex = (toLitId(s.annex.module.FileIndex, c, m), s.annex.index) # fill the reserved slot, nothing else: m.syms[s.itemId.item] = p @@ -832,10 +833,15 @@ proc loadProcBody(c: var PackedDecoder; g: var PackedModuleGraph; thisModule: in result = loadNodes(c, g, thisModule, tree, n0) inc i +proc moduleIndex(c: var PackedDecoder; g: PackedModuleGraph; thisModule: int; + module: LitId): int32 {.inline.} = + ## Returns the module ID for the stored ``FileIndex`` identified by `module`. + result = if module == LitId(0): thisModule.int32 + else: toFileIndexCached(c, g, thisModule, module).int32 + proc moduleIndex*(c: var PackedDecoder; g: PackedModuleGraph; thisModule: int; s: PackedItemId): int32 {.inline.} = - result = if s.module == LitId(0): thisModule.int32 - else: toFileIndexCached(c, g, thisModule, s.module).int32 + result = moduleIndex(c, g, thisModule, s.module) proc symHeaderFromPacked(c: var PackedDecoder; g: var PackedModuleGraph; s: PackedSym; si, item: int32): PSym = @@ -870,7 +876,10 @@ proc symBodyFromPacked(c: var PackedDecoder; g: var PackedModuleGraph; loadAstBodyLazy(s, ast) else: loadAstBody(s, ast) - result.annex = s.annex + + if s.annex.index != 0: + result.annex = LibId(module: moduleIndex(c, g, si, s.annex.module), + index: s.annex.index) if s.kind in {skLet, skVar, skField, skForVar}: result.guard = loadSym(c, g, si, s.guard) diff --git a/compiler/ic/packed_ast.nim b/compiler/ic/packed_ast.nim index 3a1beca8676..fd81319ad56 100644 --- a/compiler/ic/packed_ast.nim +++ b/compiler/ic/packed_ast.nim @@ -61,7 +61,7 @@ type offset*: int externalName*: LitId # instead of TLoc extFlags*: ExternalFlags - annex*: LibId + annex*: tuple[module: LitId, index: uint32] constraint*: NodeId PackedType* = object