Skip to content

Commit

Permalink
mir: separate index/bound checks from array access (#1076)
Browse files Browse the repository at this point in the history
## Summary

* code generators don't decide index and bound check placement -- the
  AST -> MIR translation does
* data-flow analysis considers index and bound errors, fixing
  destructors not being run when the only possibly raised exception is
  an `IndexDefect`
* bound checks are performed for `toOpenArray` calls when using the JS
  backend
* MIR semantics: allow mutations of projection from aliases created
  with `Bind`

## Details

### Translation changes

A statement like `arr[i] = (inc i; 1)` was previously translated to:
```
def _1 = i
bind_mut _1 = arr[_1]
inc(name i)
_2 = 1
```
It is now translated to:
```
def _1 = i
chckIndex(arg arr, arg _1)
inc(name i)
arr[_1] = 1 
```

Similarily, `call(toOpenArray(a, lo, hi))` is now translated to:
```
chckBounds(arg a, arg lo, arg hi)
def _1 = toOpenArray(arg a, arg lo, arg hi)
call(arg _1)
```

### Consequences

* `mnkPathArray` never has side-effects now
* `mnkToSlice` never has side-effects now
* exceptional control-flow arising from index and bound checks encoded
  in the MIR, making it accessible for data-flow analysis 
* decision making regarding the initial placement of index and bound
  checks happens in a single place instead of in each code generator
* code generators no longer need to query the local `optBoundChecks`
  option

This is progress towards:
* an optimization pass that eliminates redundant index and bound checks
* making all lvalue expressions free of side-effects 
* all possible exceptional control-flow being encoded in the MIR
* restoring local `.push`/`.pop` support for all checks

### Implementation details

* index and bound checks are represented in the MIR via the new
  internal `mChckIndex` and `mChckBounds` magics
* `isPure` and `isStable` analysis is simplified, now that
  `mnkPathArray` is free of side-effects
* the operand in the array slot of an `nkBracketExpr` may be either an
  rvalue, lvalue, or literal. The new `mirgen.capture` procedure
  implements the logic for correctly capturing the operand  
* `jsgen` now implements code generation for bound checks

**Changes to the VM:**
* the `opcIndexCheck` opcode is introduced; it implements the index
  check (`mChckIndex`)
* the index check logic is removed from all handle-computation-only
  operation; the access checks of memory load/store operations ensures
  that memory safety is not compromised. Operations that write directly
  to a memory location (`opcWrStrIdx` and `opcWrArr`) still need to
  perform the index check 

### MIR semantics

For for both efficiency and implementation simplicity, aliases created
through the `Bind` statement support indirect mutations (e.g.,
`x.a = 1`), but not direct assignments or being passed to a mutable
by-name parameter.

Without this adjustment, `mirgen.capture` would need to know whether a
projection (e.g., `x[1]` or `x.a`) of the alias is used for mutation.

---------

Co-authored-by: Saem Ghani <saemghani+github@gmail.com>
  • Loading branch information
zerbina and saem authored Jan 9, 2024
1 parent bcedd54 commit 6ebe027
Show file tree
Hide file tree
Showing 17 changed files with 374 additions and 187 deletions.
6 changes: 6 additions & 0 deletions compiler/ast/ast_types.nim
Original file line number Diff line number Diff line change
Expand Up @@ -820,6 +820,12 @@ type
mChckRange
## chckRange(v, lower, upper); conversion + range check -- returns
## either the type-converted value or raises a defect
mChckIndex
## chckIndex(arr, idx); raise an error when `idx` is not within `arr`'s
## bounds
mChckBounds
## chckBounds(arr, lo, hi); raise an error when `lo` and/or `hi` is not
## within `arr`'s bounds
mSamePayload
## returns whether both seq/string operands share the same payload
mCopyInternal
Expand Down
5 changes: 0 additions & 5 deletions compiler/backend/ccgcalls.nim
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,6 @@ proc fixupCall(p: BProc, le, ri: CgNode, d: var TLoc,
line(p, cpsStmts, pl)
exitCall(p, ri[0], canRaise)

proc genBoundsCheck(p: BProc; arr, a, b: TLoc)

proc reifiedOpenArray(p: BProc, n: CgNode): bool {.inline.} =
# all non-parameter openArrays are reified
not(n.kind == cnkLocal and p.locals[n.local].k == locParam)
Expand All @@ -122,9 +120,6 @@ proc genOpenArraySlice(p: BProc; q: CgNode; formalType, destType: PType): (Rope,
initLocExpr(p, q[0], a)
initLocExpr(p, q[1], b)
initLocExpr(p, q[2], c)
# but first produce the required index checks:
if optBoundsCheck in p.options:
genBoundsCheck(p, a, b, c)
let ty = skipTypes(a.t, abstractVar+{tyPtr, tyRef, tyLent})
let dest = getTypeDesc(p.module, destType)
let lengthExpr = "($1)-($2)+1" % [rdLoc(c), rdLoc(b)]
Expand Down
81 changes: 46 additions & 35 deletions compiler/backend/ccgexprs.nim
Original file line number Diff line number Diff line change
Expand Up @@ -692,29 +692,6 @@ proc genArrayElem(p: BProc, n, x, y: CgNode, d: var TLoc) =
initLocExpr(p, y, b)
var ty = skipTypes(a.t, abstractVarRange + abstractPtrs + tyUserTypeClasses)
var first = intLiteral(firstOrd(p.config, ty))
# emit range check:
if optBoundsCheck in p.options and ty.kind != tyUncheckedArray:
if not isDeepConstExpr(y):
# semantic pass has already checked for const index expressions
if firstOrd(p.config, ty) == 0 and lastOrd(p.config, ty) >= 0:
if (firstOrd(p.config, b.t) < firstOrd(p.config, ty)) or
(lastOrd(p.config, b.t) > lastOrd(p.config, ty)):

linefmt(p, cpsStmts, "if ((NU)($1) > (NU)($2)){ #raiseIndexError2($1, $2); $3}$n",
[rdCharLoc(b), intLiteral(lastOrd(p.config, ty)), raiseInstr(p)])
else:
linefmt(p, cpsStmts, "if ($1 < $2 || $1 > $3){ #raiseIndexError3($1, $2, $3); $4}$n",
[rdCharLoc(b), first, intLiteral(lastOrd(p.config, ty)), raiseInstr(p)])
else:
let idx = getOrdValue(y)
if idx < firstOrd(p.config, ty) or lastOrd(p.config, ty) < idx:
localReport(
p.config, x.info, SemReport(
kind: rsemStaticOutOfBounds,
indexSpec: (
usedIdx: idx,
minIdx: firstOrd(p.config, ty),
maxIdx: lastOrd(p.config, ty))))

d.inheritLocation(a)
putIntoDest(p, d, n,
Expand Down Expand Up @@ -760,22 +737,49 @@ proc genBoundsCheck(p: BProc; arr, a, b: TLoc) =
else:
unreachable(ty.kind)

proc genIndexCheck(p: BProc; x: CgNode, arr, idx: TLoc) =
## Emits the index check logic + subsequent raise operation. `x` is
## the array expression the `arr` loc resulted from from.
let ty = arr.t.skipTypes(abstractVar + tyUserTypeClasses +
{tyPtr, tyRef, tyLent, tyVar})
case ty.kind
of tyArray:
var first = intLiteral(firstOrd(p.config, ty))
if firstOrd(p.config, ty) == 0 and lastOrd(p.config, ty) >= 0:
if firstOrd(p.config, idx.t) < firstOrd(p.config, ty) or
lastOrd(p.config, idx.t) > lastOrd(p.config, ty):
linefmt(p, cpsStmts, "if ((NU)($1) > (NU)($2)){ #raiseIndexError2($1, $2); $3}$n",
[rdCharLoc(idx), intLiteral(lastOrd(p.config, ty)),
raiseInstr(p)])
else:
linefmt(p, cpsStmts, "if ($1 < $2 || $1 > $3){ #raiseIndexError3($1, $2, $3); $4}$n",
[rdCharLoc(idx), first, intLiteral(lastOrd(p.config, ty)),
raiseInstr(p)])
of tySequence, tyString:
linefmt(p, cpsStmts,
"if ((NU)($1) >= (NU)$2){ #raiseIndexError2($1,$2-1); $3}$n",
[rdCharLoc(idx), lenExpr(p, arr), raiseInstr(p)])
of tyOpenArray, tyVarargs:
if reifiedOpenArray(p, x):
linefmt(p, cpsStmts, "if ((NU)($1) >= (NU)($2.Field1)){ #raiseIndexError2($1,$2.Field1-1); $3}$n",
[rdCharLoc(idx), rdLoc(arr), raiseInstr(p)])
else:
linefmt(p, cpsStmts, "if ((NU)($1) >= (NU)($2Len_0)){ #raiseIndexError2($1,$2Len_0-1); $3}$n",
[rdCharLoc(idx), rdLoc(arr), raiseInstr(p)])
of tyCstring:
discard "no bound checks"
else:
unreachable()

proc genOpenArrayElem(p: BProc, n, x, y: CgNode, d: var TLoc) =
var a, b: TLoc
initLocExpr(p, x, a)
initLocExpr(p, y, b)
if not reifiedOpenArray(p, x):
# emit range check:
if optBoundsCheck in p.options:
linefmt(p, cpsStmts, "if ((NU)($1) >= (NU)($2Len_0)){ #raiseIndexError2($1,$2Len_0-1); $3}$n",
[rdCharLoc(b), rdLoc(a), raiseInstr(p)]) # BUGFIX: ``>=`` and not ``>``!
inheritLocation(d, a)
putIntoDest(p, d, n,
ropecg(p.module, "$1[$2]", [rdLoc(a), rdCharLoc(b)]), a.storage)
else:
if optBoundsCheck in p.options:
linefmt(p, cpsStmts, "if ((NU)($1) >= (NU)($2.Field1)){ #raiseIndexError2($1,$2.Field1-1); $3}$n",
[rdCharLoc(b), rdLoc(a), raiseInstr(p)]) # BUGFIX: ``>=`` and not ``>``!
inheritLocation(d, a)
putIntoDest(p, d, n,
ropecg(p.module, "$1.Field0[$2]", [rdLoc(a), rdCharLoc(b)]), a.storage)
Expand All @@ -786,11 +790,7 @@ proc genSeqElem(p: BProc, n, x, y: CgNode, d: var TLoc) =
initLocExpr(p, y, b)
var ty = skipTypes(a.t, abstractVarRange)
if ty.kind in {tyRef, tyPtr}:
ty = skipTypes(ty.lastSon, abstractVarRange) # emit range check:
if optBoundsCheck in p.options:
linefmt(p, cpsStmts,
"if ((NU)($1) >= (NU)$2){ #raiseIndexError2($1,$2-1); $3}$n",
[rdCharLoc(b), lenExpr(p, a), raiseInstr(p)])
ty = skipTypes(ty.lastSon, abstractVarRange)
if d.k == locNone: d.storage = OnHeap
if skipTypes(a.t, abstractVar).kind in {tyRef, tyPtr}:
a.r = ropecg(p.module, "(*$1)", [a.r])
Expand Down Expand Up @@ -1873,6 +1873,17 @@ proc genMagicExpr(p: BProc, e: CgNode, d: var TLoc, op: TMagic) =
linefmt(p, cpsStmts, "$1 = ($2)($3);$n", [a.r, typ, rdLoc(b)])
of mChckRange:
genRangeChck(p, e, d)
of mChckIndex:
var arr, a: TLoc
initLocExpr(p, e[1], arr)
initLocExpr(p, e[2], a)
genIndexCheck(p, e[1], arr, a)
of mChckBounds:
var arr, a, b: TLoc
initLocExpr(p, e[1], arr)
initLocExpr(p, e[2], a)
initLocExpr(p, e[3], b)
genBoundsCheck(p, arr, a, b)
of mSamePayload:
var a, b: TLoc
initLocExpr(p, e[1], a)
Expand Down
46 changes: 37 additions & 9 deletions compiler/backend/jsgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,11 @@ include jstypes
proc gen(p: PProc, n: CgNode, r: var TCompRes)
proc genStmt(p: PProc, n: CgNode)

proc gen(p: PProc, n: CgNode): TCompRes {.inline.} =
## Convenience procedure that returns instead of requiring a ``var``
## parameter.
gen(p, n, result)

proc useMagic(p: PProc, name: string) =
if name.len == 0: return
var s = magicsys.getCompilerProc(p.module.graph, name)
Expand Down Expand Up @@ -1092,19 +1097,12 @@ proc genArrayAddr(p: PProc, n: CgNode, r: var TCompRes) =
gen(p, m[0], a)
gen(p, m[1], b)
#internalAssert p.config, a.typ != etyBaseIndex and b.typ != etyBaseIndex
let x, tmp = a.rdLoc
let x = a.rdLoc
r.address = x
var typ = skipTypes(m[0].typ, abstractPtrs)
if typ.kind == tyArray:
first = firstOrd(p.config, typ[0])
if optBoundsCheck in p.options:
useMagic(p, "chckIndx")
if first == 0: # save a couple chars
r.res = "chckIndx($1, 0, ($2).length - 1)" % [b.res, tmp]
else:
r.res = "chckIndx($1, $2, ($3).length + ($2) - 1) - ($2)" % [
b.res, rope(first), tmp]
elif first != 0:
if first != 0:
r.res = "($1) - ($2)" % [b.res, rope(first)]
else:
r.res = b.res
Expand Down Expand Up @@ -1943,6 +1941,36 @@ proc genMagic(p: PProc, n: CgNode, r: var TCompRes) =
r.kind = resExpr
of mChckRange:
genRangeChck(p, n, r)
of mChckIndex:
let
first = firstOrd(p.config, n[1].typ)
arr = gen(p, n[1])
idx = gen(p, n[2])

useMagic(p, "chckIndx")
if first == 0:
lineF(p, "(chckIndx($2, 0, ($1).length - 1));$n",
[rdLoc(arr), rdLoc(idx)])
else:
# can only be a statically-sized array
lineF(p, "(chckIndx($1, $2, $3));$n",
[rdLoc(idx), rope(first), rope(lastOrd(p.config, n[1].typ))])
of mChckBounds:
let
first = firstOrd(p.config, n[1].typ)
arr = gen(p, n[1])
lo = gen(p, n[2])
hi = gen(p, n[3])

useMagic(p, "chckBounds")
if first == 0:
lineF(p, "chckBounds($2, $3, 0, ($1).length - 1);$n",
[rdLoc(arr), rdLoc(lo), rdLoc(hi)])
else:
# can only be a statically-sized array
lineF(p, "(chckBounds($1, $2, $3, $4));$n",
[rdLoc(lo), rdLoc(hi), rope(first),
rope(lastOrd(p.config, n[1].typ))])
else:
genCall(p, n, r)
#else internalError(p.config, e.info, 'genMagic: ' + magicToStr[op]);
Expand Down
103 changes: 77 additions & 26 deletions compiler/mir/mirgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ import
compiler/front/[
options
],
compiler/sem/[
ast_analysis
],
compiler/utils/[
containers,
idioms
Expand Down Expand Up @@ -266,15 +269,10 @@ func isPure(tree: MirTree, n: NodePosition): bool =
of mnkPathArray:
let arr = NodePosition tree.operand(n, 0)
case tree[arr].typ.skipTypes(abstractVar).kind
of tyArray:
# static array; pure when the array expression is pure and no index
# errors are possible
tree[n, 1].kind == mnkLiteral and isPure(tree, arr)
of tyUncheckedArray:
# no index errors are possible
isPure(tree, arr)
of tyString, tySequence, tyOpenArray, tyVarargs, tyCstring:
false # dynamically-sized arrays; index errors are possible
of tyArray, tyUncheckedArray, tyString, tySequence, tyOpenArray, tyVarargs,
tyCstring:
# pure when the both the array and index operands are pure
isPure(tree, tree.child(n, 1)) and isPure(tree, arr)
else:
unreachable(tree[arr].kind)
else:
Expand All @@ -289,14 +287,13 @@ func isStable(tree: MirTree, n: NodePosition): bool =
of mnkPathArray:
let arr = NodePosition tree.operand(n, 0)
case tree[arr].typ.skipTypes(abstractVar).kind
of tyArray:
# static arrays; stable when no index errors are possible
tree[n, 1].kind == mnkLiteral and isStable(tree, arr)
of tyUncheckedArray:
# cannot raise
isStable(tree, arr)
of tyArray, tyUncheckedArray:
# static arrays
isPure(tree, tree.child(n, 1)) and isStable(tree, arr)
of tyString, tySequence, tyOpenArray, tyVarargs, tyCstring:
false # run-time arrays; could always raise
# run-time arrays; stable when neither the array nor index operand
# depends on mutable state
isPure(tree, tree.child(n, 1)) and isPure(tree, arr)
else:
unreachable()
of mnkPathNamed, mnkPathPos:
Expand Down Expand Up @@ -528,6 +525,29 @@ proc genRd(c: var TCtx, n: PNode; consume=false): Value =
# either an rvalue or some non-pure lvalue -> capture
result = captureInTemp(c, f, consume)

proc capture(c: var TCtx, n: PNode): Value =
## If not a stable lvalue expression, captures the result of the
## expression `n` in a temporary.
## * rvalue expression are captured in temporaries
## * lvalue expressions are captured as non-assignable aliases
## * literals are not captured
const Skip = abstractInstTypeClass + {tyVar}
let f = c.builder.push: genx(c, n)
case detectKind(c.builder.staging, f.pos, sink=false)
of Rvalue, OwnedRvalue:
captureInTemp(c, f, sink=false)
of Literal:
c.builder.popSingle(f)
of Lvalue:
if c.builder.staging[f.pos].kind in Atoms:
# atoms are always stable and can thus be used directly
c.builder.popSingle(f)
elif n.typ.skipTypes(Skip).kind in {tyOpenArray, tyVarargs}:
# don't create an alias for openArrays, a copy of the view suffices
captureInTemp(c, f, sink=false)
else:
captureName(c, f, mutable=false)

proc genOperand(c: var TCtx, n: PNode; sink = false) =
## Translates expression AST `n` to a MIR operand expression. `sink` signals
## whether the result is used in a sink context -- the flag is propagated
Expand Down Expand Up @@ -622,9 +642,23 @@ proc genBracketExpr(c: var TCtx, n: PNode) =
genOperand(c, n[0])
of tyArray, tySequence, tyOpenArray, tyVarargs, tyUncheckedArray, tyString,
tyCstring:
c.buildOp mnkPathArray, elemType(typ):
genOperand(c, n[0])
c.use genRd(c, n[1])
if optBoundsCheck in c.userOptions and needsIndexCheck(n[0], n[1]):
let
arr = capture(c, n[0])
idx = genRd(c, n[1])

c.buildStmt mnkVoid:
c.buildDefectMagicCall mChckIndex, typeOrVoid(c, nil):
c.emitByVal arr
c.emitByVal idx

c.buildOp mnkPathArray, elemType(typ):
c.use arr
c.use idx
else:
c.buildOp mnkPathArray, elemType(typ):
genOperand(c, n[0])
c.use genRd(c, n[1])
else: unreachable()

proc genVariantAccess(c: var TCtx, n: PNode) =
Expand Down Expand Up @@ -1017,13 +1051,30 @@ proc genMagic(c: var TCtx, n: PNode; m: TMagic) =
genx(c, n[1])
c.userOptions = orig
of mSlice:
c.buildTree mnkToSlice, n.typ:
# XXX: a HiddenStdConv erroneously ends up in the array position
# sometimes, which would, if kept, lead to index errors when the
# array doesn't start at index 0
genOperand(c, skipConv(n[1]))
genArgExpression(c, n[2], sink=false)
genArgExpression(c, n[3], sink=false)
# XXX: a HiddenStdConv erroneously ends up in the array position
# sometimes, which would, if kept, lead to index errors when the
# array doesn't start at index 0
let arr = skipConv(n[1])
if optBoundsCheck in c.userOptions and needsBoundCheck(arr, n[2], n[3]):
let
arr = capture(c, arr)
lo = genRd(c, n[2])
hi = genRd(c, n[3])
c.buildStmt mnkVoid:
c.buildDefectMagicCall mChckBounds, typeOrVoid(c, nil):
c.emitByVal arr
c.emitByVal lo
c.emitByVal hi

c.buildTree mnkToSlice, n.typ:
c.use arr
c.use lo
c.use hi
else:
c.buildTree mnkToSlice, n.typ:
genOperand(c, arr)
genArgExpression(c, n[2], sink=false)
genArgExpression(c, n[3], sink=false)

# magics that use incomplete symbols (most of them are generated by
# ``liftdestructors``):
Expand Down
9 changes: 5 additions & 4 deletions compiler/mir/mirtrees.nim
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,11 @@ type
# ownership in ``mirgen``. There's only going to be the ``Def`` kind
mnkDefUnpack ## intermediate hack required by destructor injection. Don't
## use
mnkBind ## introduces an alias that may only be used for read access.
## The source expression must not be empty
mnkBindMut ## introduces an alias that may be used for write access.
## The source expression must not be empty
mnkBind ## introduces an alias that may be used for read/write
## access, but not for direct assignments. The source
## expression must not be empty
mnkBindMut ## introduces an alias that may be used for read/write access
## and assignments. The source expression must not be empty

mnkFastAsgn ## assignment that cannot be rewritten into copy, move, or
## hook call
Expand Down
Loading

0 comments on commit 6ebe027

Please sign in to comment.