From c5e1aa5402b50e549b7116d452a3da8bebbadd49 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Tue, 12 Sep 2023 22:32:46 +0200 Subject: [PATCH] cgen: fix redundant initialization of temporaries (#896) ## Summary Fix multiple places in the C code generator where temporaries inserted by the C code generator (and not earlier) were redundantly initialized twice. While this is unlikely to have an impact on the performance of the generated binaries, it nonetheless reduces the pressure on the C compiler's optimizer, reduces the amount of work for the NimSkull compiler, and also leads to the C artifacts becoming a bit smaller. ## Details The C code generator creates new temporary variables itself, usually for the purpose of upholding evaluation order expectations. It allocates them through `cgen.getTemp`, which allocates a name, emits the declaration, and optionally (indicated by the `needsInit` parameter) default-initializes the location. ### Redundant initializations * for calls that use the return-value optimization and where the destination location is empty (`fixupCall`/`genClosureCall`; this happens, for example, for `f(rvoCall())`), the created temporary was initialized. This is redundant, however, as the called procedure is responsible for resetting the result location (if needed) * for potentially-raising calls that require a temporary to behave as expected (`fixupCall`), the temporary was initialized, but this is unnecessary, as its immediately assigned to after * for object constructions where the destination locations is empty (`genObjConst`; happens, for example, for `f(Obj(...)))`, the type header fields were initialized twice. This is because `getTemp` calls `constructLoc`, which by default always initializes type headers In addition, `constructLoc` (which was unconditionally used by `getTemp`) always assigned the zero value to non-complex locations (ints, floats, pointers, seqs, etc.), even if `hasTemp` is false, causing many redundant assignments. ### Improved handling Apart from the aforementioned places where setting `needsInit` to 'true' was unnecessary, there was only a single place where the `needsInit` parameter was used (`cgen.resetLoc`). Therefore, the parameter plus the `constructLoc` call are removed from `getTemp`; its usage sites are now responsible for initializing the temporary. The `isTemp` parameter for `constructLoc` is also not needed anymore and is thus removed. All usage sites of `getTemp` are audited for whether they rely on the initialization of scalars or type fields previously performed by `getTemp`, but none does. --- compiler/backend/ccgcalls.nim | 19 +++++++++---------- compiler/backend/ccgexprs.nim | 8 ++++---- compiler/backend/cgen.nim | 18 +++++++----------- 3 files changed, 20 insertions(+), 25 deletions(-) diff --git a/compiler/backend/ccgcalls.nim b/compiler/backend/ccgcalls.nim index a0df77c57a2..1e66e031414 100644 --- a/compiler/backend/ccgcalls.nim +++ b/compiler/backend/ccgcalls.nim @@ -100,10 +100,10 @@ proc fixupCall(p: BProc, le, ri: CgNode, d: var TLoc, if d.k notin {locTemp, locNone}: reportObservableStore(p, le, ri) - if d.k == locNone: getTemp(p, typ[0], d, needsInit=true) - elif d.k notin {locTemp} and not hasNoInit(ri): - # reset before pass as 'result' var: - discard "resetLoc(p, d)" + # resetting the result location is the responsibility of the called + # procedure + if d.k == locNone: + getTemp(p, typ[0], d) pl.add(addrLoc(p.config, d)) pl.add(~");$n") line(p, cpsStmts, pl) @@ -120,7 +120,7 @@ proc fixupCall(p: BProc, le, ri: CgNode, d: var TLoc, exitCall(p, ri[0], canRaise) else: var tmp: TLoc - getTemp(p, typ[0], tmp, needsInit=true) + getTemp(p, typ[0], tmp) var list: TLoc initLoc(list, locCall, d.lode, OnUnknown) list.r = pl @@ -237,7 +237,7 @@ proc openArrayLoc(p: BProc, formalType: PType, n: CgNode): Rope = else: internalError(p.config, "openArrayLoc: " & typeToString(a.t)) proc literalsNeedsTmp(p: BProc, a: TLoc): TLoc = - getTemp(p, a.lode.typ, result, needsInit=false) + getTemp(p, a.lode.typ, result) genAssignment(p, result, a) proc genArgStringToCString(p: BProc, n: CgNode): Rope {.inline.} = @@ -333,11 +333,10 @@ proc genClosureCall(p: BProc, le, ri: CgNode, d: var TLoc) = if d.k notin {locTemp, locNone}: reportObservableStore(p, le, ri) + # resetting the result location is the responsibility of the called + # procedure if d.k == locNone: - getTemp(p, typ[0], d, needsInit=true) - elif d.k notin {locTemp} and not hasNoInit(ri): - # reset before pass as 'result' var: - discard "resetLoc(p, d)" + getTemp(p, typ[0], d) pl.add(addrLoc(p.config, d)) genCallPattern() exitCall(p, ri[0], canRaise) diff --git a/compiler/backend/ccgexprs.nim b/compiler/backend/ccgexprs.nim index afdca36efe8..8039b9eb55b 100644 --- a/compiler/backend/ccgexprs.nim +++ b/compiler/backend/ccgexprs.nim @@ -135,7 +135,6 @@ proc genSetNode(p: BProc, n: CgNode): Rope = proc genOpenArrayConv(p: BProc; d: TLoc; a: TLoc) = assert d.k != locNone - # getTemp(p, d.t, d) case a.t.skipTypes(abstractVar).kind of tyOpenArray, tyVarargs: @@ -909,8 +908,9 @@ proc genStrAppend(p: BProc, e: CgNode, d: var TLoc) = p.s(cpsStmts).add appends proc genDefault(p: BProc; n: CgNode; d: var TLoc) = - if d.k == locNone: getTemp(p, n.typ, d, needsInit=true) - else: resetLoc(p, d) + if d.k == locNone: + getTemp(p, n.typ, d) + resetLoc(p, d) proc rawGenNew(p: BProc, a: var TLoc, sizeExpr: Rope; needsInit: bool; doInitObj = true) = var sizeExpr = sizeExpr @@ -952,7 +952,7 @@ proc genNewSeqOfCap(p: BProc; e: CgNode; d: var TLoc) = var a: TLoc initLocExpr(p, e[1], a) block: - if d.k == locNone: getTemp(p, e.typ, d, needsInit=false) + if d.k == locNone: getTemp(p, e.typ, d) linefmt(p, cpsStmts, "$1.len = 0; $1.p = ($4*) #newSeqPayload($2, sizeof($3), NIM_ALIGNOF($3));$n", [d.rdLoc, a.rdLoc, getTypeDesc(p.module, seqtype.lastSon), getSeqPayloadType(p.module, seqtype), diff --git a/compiler/backend/cgen.nim b/compiler/backend/cgen.nim index 2607898a403..737521e3359 100644 --- a/compiler/backend/cgen.nim +++ b/compiler/backend/cgen.nim @@ -410,7 +410,7 @@ proc genObjectInit(p: BProc, section: TCProcSection, t: PType, a: TLoc, let tmp = defaultValueExpr(p, t, a.lode.info) genAssignment(p, a, tmp) -proc constructLoc(p: BProc, loc: var TLoc, isTemp = false; doInitObj = true) = +proc constructLoc(p: BProc, loc: var TLoc; doInitObj = true) = let kind = mapTypeChooser(loc) case mapType(p.config, loc.t, kind) of ctChar, ctBool, ctInt, ctInt8, ctInt16, ctInt32, ctInt64, @@ -424,11 +424,8 @@ proc constructLoc(p: BProc, loc: var TLoc, isTemp = false; doInitObj = true) = of ctNimStr, ctNimSeq: linefmt(p, cpsStmts, "$1.len = 0; $1.p = NIM_NIL;$n", [rdLoc(loc)]) of ctArray, ctStruct, ctNimOpenArray: - if not isTemp: - # don't use nimZeroMem for temporary values for performance if we can - # avoid it - linefmt(p, cpsStmts, "#nimZeroMem((void*)$1, sizeof($2));$n", - [addrLoc(p.config, loc), getTypeDesc(p.module, loc.t, kind)]) + linefmt(p, cpsStmts, "#nimZeroMem((void*)$1, sizeof($2));$n", + [addrLoc(p.config, loc), getTypeDesc(p.module, loc.t, kind)]) if doInitObj: genObjectInit(p, cpsStmts, loc.t, loc, constructObj) @@ -436,9 +433,9 @@ proc constructLoc(p: BProc, loc: var TLoc, isTemp = false; doInitObj = true) = unreachable() proc resetLoc(p: BProc, loc: var TLoc; doInitObj = true) = - # we always want to clear out the destination, so pass `false` for - # ``isTemp`` - constructLoc(p, loc, false, doInitObj) + # resetting the loc is achieved by constructing a new empty value inside + # it + constructLoc(p, loc, doInitObj) proc initLocalVar(p: BProc, v: PSym, immediateAsgn: bool) = if sfNoInit notin v.flags: @@ -452,7 +449,7 @@ proc initLocalVar(p: BProc, v: PSym, immediateAsgn: bool) = if not immediateAsgn: constructLoc(p, p.locals[v]) -proc getTemp(p: BProc, t: PType, result: var TLoc; needsInit=false) = +proc getTemp(p: BProc, t: PType, result: var TLoc) = inc(p.labels) result.r = "T" & rope(p.labels) & "_" linefmt(p, cpsLocals, "$1 $2;$n", [getTypeDesc(p.module, t, skVar), result.r]) @@ -460,7 +457,6 @@ proc getTemp(p: BProc, t: PType, result: var TLoc; needsInit=false) = result.lode = lodeTyp t result.storage = OnStack result.flags = {} - constructLoc(p, result, not needsInit) when false: # XXX Introduce a compiler switch in order to detect these easily. if getSize(p.config, t) > 1024 * 1024: