Skip to content

Commit

Permalink
cgen: fix redundant initialization of temporaries (#896)
Browse files Browse the repository at this point in the history
## Summary

Fix multiple places in the C code generator where temporaries inserted
by the C code generator (and not earlier) were redundantly initialized
twice.

While this is unlikely to have an impact on the performance of the
generated binaries, it nonetheless reduces the pressure on the C
compiler's optimizer, reduces the amount of work for the NimSkull
compiler, and also leads to the C artifacts becoming a bit smaller. 

## Details

The C code generator creates new temporary variables itself, usually for
the purpose of upholding evaluation order expectations. It allocates
them through `cgen.getTemp`, which allocates a name, emits the
declaration, and optionally (indicated by the `needsInit` parameter)
default-initializes the location.

### Redundant initializations

* for calls that use the return-value optimization and where the
  destination location is empty (`fixupCall`/`genClosureCall`; this
  happens, for example, for `f(rvoCall())`), the created temporary was
  initialized. This is redundant, however, as the called procedure is
  responsible for resetting the result location (if needed)
* for potentially-raising calls that require a temporary to behave as
  expected (`fixupCall`), the temporary was initialized, but this is
  unnecessary, as its immediately assigned to after
* for object constructions where the destination locations is empty
  (`genObjConst`; happens, for example, for `f(Obj(...)))`, the type
  header fields were initialized twice. This is because `getTemp` calls
  `constructLoc`, which by default always initializes type headers

In addition, `constructLoc` (which was unconditionally used by
`getTemp`) always assigned the zero value to non-complex locations
(ints, floats, pointers, seqs, etc.), even if `hasTemp` is false,
causing many redundant assignments.

### Improved handling

Apart from the aforementioned places where setting `needsInit` to 'true'
was unnecessary, there was only a single place where the `needsInit`
parameter was used (`cgen.resetLoc`). Therefore, the parameter plus
the `constructLoc` call are removed from `getTemp`; its usage sites are
now responsible for initializing the temporary. The `isTemp` parameter
for `constructLoc` is also not needed anymore and is thus removed.

All usage sites of `getTemp` are audited for whether they rely on the
initialization of scalars or type fields previously performed by
`getTemp`, but none does.
  • Loading branch information
zerbina authored Sep 12, 2023
1 parent f6e6de0 commit c5e1aa5
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 25 deletions.
19 changes: 9 additions & 10 deletions compiler/backend/ccgcalls.nim
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,10 @@ proc fixupCall(p: BProc, le, ri: CgNode, d: var TLoc,
if d.k notin {locTemp, locNone}:
reportObservableStore(p, le, ri)

if d.k == locNone: getTemp(p, typ[0], d, needsInit=true)
elif d.k notin {locTemp} and not hasNoInit(ri):
# reset before pass as 'result' var:
discard "resetLoc(p, d)"
# resetting the result location is the responsibility of the called
# procedure
if d.k == locNone:
getTemp(p, typ[0], d)
pl.add(addrLoc(p.config, d))
pl.add(~");$n")
line(p, cpsStmts, pl)
Expand All @@ -120,7 +120,7 @@ proc fixupCall(p: BProc, le, ri: CgNode, d: var TLoc,
exitCall(p, ri[0], canRaise)
else:
var tmp: TLoc
getTemp(p, typ[0], tmp, needsInit=true)
getTemp(p, typ[0], tmp)
var list: TLoc
initLoc(list, locCall, d.lode, OnUnknown)
list.r = pl
Expand Down Expand Up @@ -237,7 +237,7 @@ proc openArrayLoc(p: BProc, formalType: PType, n: CgNode): Rope =
else: internalError(p.config, "openArrayLoc: " & typeToString(a.t))

proc literalsNeedsTmp(p: BProc, a: TLoc): TLoc =
getTemp(p, a.lode.typ, result, needsInit=false)
getTemp(p, a.lode.typ, result)
genAssignment(p, result, a)

proc genArgStringToCString(p: BProc, n: CgNode): Rope {.inline.} =
Expand Down Expand Up @@ -333,11 +333,10 @@ proc genClosureCall(p: BProc, le, ri: CgNode, d: var TLoc) =
if d.k notin {locTemp, locNone}:
reportObservableStore(p, le, ri)

# resetting the result location is the responsibility of the called
# procedure
if d.k == locNone:
getTemp(p, typ[0], d, needsInit=true)
elif d.k notin {locTemp} and not hasNoInit(ri):
# reset before pass as 'result' var:
discard "resetLoc(p, d)"
getTemp(p, typ[0], d)
pl.add(addrLoc(p.config, d))
genCallPattern()
exitCall(p, ri[0], canRaise)
Expand Down
8 changes: 4 additions & 4 deletions compiler/backend/ccgexprs.nim
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ proc genSetNode(p: BProc, n: CgNode): Rope =

proc genOpenArrayConv(p: BProc; d: TLoc; a: TLoc) =
assert d.k != locNone
# getTemp(p, d.t, d)

case a.t.skipTypes(abstractVar).kind
of tyOpenArray, tyVarargs:
Expand Down Expand Up @@ -909,8 +908,9 @@ proc genStrAppend(p: BProc, e: CgNode, d: var TLoc) =
p.s(cpsStmts).add appends

proc genDefault(p: BProc; n: CgNode; d: var TLoc) =
if d.k == locNone: getTemp(p, n.typ, d, needsInit=true)
else: resetLoc(p, d)
if d.k == locNone:
getTemp(p, n.typ, d)
resetLoc(p, d)

proc rawGenNew(p: BProc, a: var TLoc, sizeExpr: Rope; needsInit: bool; doInitObj = true) =
var sizeExpr = sizeExpr
Expand Down Expand Up @@ -952,7 +952,7 @@ proc genNewSeqOfCap(p: BProc; e: CgNode; d: var TLoc) =
var a: TLoc
initLocExpr(p, e[1], a)
block:
if d.k == locNone: getTemp(p, e.typ, d, needsInit=false)
if d.k == locNone: getTemp(p, e.typ, d)
linefmt(p, cpsStmts, "$1.len = 0; $1.p = ($4*) #newSeqPayload($2, sizeof($3), NIM_ALIGNOF($3));$n",
[d.rdLoc, a.rdLoc, getTypeDesc(p.module, seqtype.lastSon),
getSeqPayloadType(p.module, seqtype),
Expand Down
18 changes: 7 additions & 11 deletions compiler/backend/cgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ proc genObjectInit(p: BProc, section: TCProcSection, t: PType, a: TLoc,
let tmp = defaultValueExpr(p, t, a.lode.info)
genAssignment(p, a, tmp)

proc constructLoc(p: BProc, loc: var TLoc, isTemp = false; doInitObj = true) =
proc constructLoc(p: BProc, loc: var TLoc; doInitObj = true) =
let kind = mapTypeChooser(loc)
case mapType(p.config, loc.t, kind)
of ctChar, ctBool, ctInt, ctInt8, ctInt16, ctInt32, ctInt64,
Expand All @@ -424,21 +424,18 @@ proc constructLoc(p: BProc, loc: var TLoc, isTemp = false; doInitObj = true) =
of ctNimStr, ctNimSeq:
linefmt(p, cpsStmts, "$1.len = 0; $1.p = NIM_NIL;$n", [rdLoc(loc)])
of ctArray, ctStruct, ctNimOpenArray:
if not isTemp:
# don't use nimZeroMem for temporary values for performance if we can
# avoid it
linefmt(p, cpsStmts, "#nimZeroMem((void*)$1, sizeof($2));$n",
[addrLoc(p.config, loc), getTypeDesc(p.module, loc.t, kind)])
linefmt(p, cpsStmts, "#nimZeroMem((void*)$1, sizeof($2));$n",
[addrLoc(p.config, loc), getTypeDesc(p.module, loc.t, kind)])

if doInitObj:
genObjectInit(p, cpsStmts, loc.t, loc, constructObj)
of ctVoid:
unreachable()

proc resetLoc(p: BProc, loc: var TLoc; doInitObj = true) =
# we always want to clear out the destination, so pass `false` for
# ``isTemp``
constructLoc(p, loc, false, doInitObj)
# resetting the loc is achieved by constructing a new empty value inside
# it
constructLoc(p, loc, doInitObj)

proc initLocalVar(p: BProc, v: PSym, immediateAsgn: bool) =
if sfNoInit notin v.flags:
Expand All @@ -452,15 +449,14 @@ proc initLocalVar(p: BProc, v: PSym, immediateAsgn: bool) =
if not immediateAsgn:
constructLoc(p, p.locals[v])

proc getTemp(p: BProc, t: PType, result: var TLoc; needsInit=false) =
proc getTemp(p: BProc, t: PType, result: var TLoc) =
inc(p.labels)
result.r = "T" & rope(p.labels) & "_"
linefmt(p, cpsLocals, "$1 $2;$n", [getTypeDesc(p.module, t, skVar), result.r])
result.k = locTemp
result.lode = lodeTyp t
result.storage = OnStack
result.flags = {}
constructLoc(p, result, not needsInit)
when false:
# XXX Introduce a compiler switch in order to detect these easily.
if getSize(p.config, t) > 1024 * 1024:
Expand Down

0 comments on commit c5e1aa5

Please sign in to comment.