From 6ebe02736bdc7fbf59e6a9cf85b6788d70581023 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 10 Jan 2024 00:20:37 +0100 Subject: [PATCH] mir: separate index/bound checks from array access (#1076) ## Summary * code generators don't decide index and bound check placement -- the AST -> MIR translation does * data-flow analysis considers index and bound errors, fixing destructors not being run when the only possibly raised exception is an `IndexDefect` * bound checks are performed for `toOpenArray` calls when using the JS backend * MIR semantics: allow mutations of projection from aliases created with `Bind` ## Details ### Translation changes A statement like `arr[i] = (inc i; 1)` was previously translated to: ``` def _1 = i bind_mut _1 = arr[_1] inc(name i) _2 = 1 ``` It is now translated to: ``` def _1 = i chckIndex(arg arr, arg _1) inc(name i) arr[_1] = 1 ``` Similarily, `call(toOpenArray(a, lo, hi))` is now translated to: ``` chckBounds(arg a, arg lo, arg hi) def _1 = toOpenArray(arg a, arg lo, arg hi) call(arg _1) ``` ### Consequences * `mnkPathArray` never has side-effects now * `mnkToSlice` never has side-effects now * exceptional control-flow arising from index and bound checks encoded in the MIR, making it accessible for data-flow analysis * decision making regarding the initial placement of index and bound checks happens in a single place instead of in each code generator * code generators no longer need to query the local `optBoundChecks` option This is progress towards: * an optimization pass that eliminates redundant index and bound checks * making all lvalue expressions free of side-effects * all possible exceptional control-flow being encoded in the MIR * restoring local `.push`/`.pop` support for all checks ### Implementation details * index and bound checks are represented in the MIR via the new internal `mChckIndex` and `mChckBounds` magics * `isPure` and `isStable` analysis is simplified, now that `mnkPathArray` is free of side-effects * the operand in the array slot of an `nkBracketExpr` may be either an rvalue, lvalue, or literal. The new `mirgen.capture` procedure implements the logic for correctly capturing the operand * `jsgen` now implements code generation for bound checks **Changes to the VM:** * the `opcIndexCheck` opcode is introduced; it implements the index check (`mChckIndex`) * the index check logic is removed from all handle-computation-only operation; the access checks of memory load/store operations ensures that memory safety is not compromised. Operations that write directly to a memory location (`opcWrStrIdx` and `opcWrArr`) still need to perform the index check ### MIR semantics For for both efficiency and implementation simplicity, aliases created through the `Bind` statement support indirect mutations (e.g., `x.a = 1`), but not direct assignments or being passed to a mutable by-name parameter. Without this adjustment, `mirgen.capture` would need to know whether a projection (e.g., `x[1]` or `x.a`) of the alias is used for mutation. --------- Co-authored-by: Saem Ghani --- compiler/ast/ast_types.nim | 6 + compiler/backend/ccgcalls.nim | 5 - compiler/backend/ccgexprs.nim | 81 ++++++++------ compiler/backend/jsgen.nim | 46 ++++++-- compiler/mir/mirgen.nim | 103 +++++++++++++----- compiler/mir/mirtrees.nim | 9 +- compiler/sem/ast_analysis.nim | 40 ++++++- compiler/vm/vm.nim | 18 +-- compiler/vm/vm_enums.nim | 1 + compiler/vm/vmgen.nim | 7 ++ doc/mir.rst | 10 +- lib/system/jssys.nim | 7 ++ .../tdestruction_when_checks_failed.nim | 35 ++++-- tests/lang_objects/destructor/tv2_cast.nim | 1 + tests/magics/ttoopenarray.nim | 102 +++++++++++++++++ .../misc/ttoopenarray_array_bound_checks.nim | 1 - tests/system/tsystem_misc.nim | 89 +-------------- 17 files changed, 374 insertions(+), 187 deletions(-) create mode 100644 tests/magics/ttoopenarray.nim diff --git a/compiler/ast/ast_types.nim b/compiler/ast/ast_types.nim index 17d2a63b410..6db9670c146 100644 --- a/compiler/ast/ast_types.nim +++ b/compiler/ast/ast_types.nim @@ -820,6 +820,12 @@ type mChckRange ## chckRange(v, lower, upper); conversion + range check -- returns ## either the type-converted value or raises a defect + mChckIndex + ## chckIndex(arr, idx); raise an error when `idx` is not within `arr`'s + ## bounds + mChckBounds + ## chckBounds(arr, lo, hi); raise an error when `lo` and/or `hi` is not + ## within `arr`'s bounds mSamePayload ## returns whether both seq/string operands share the same payload mCopyInternal diff --git a/compiler/backend/ccgcalls.nim b/compiler/backend/ccgcalls.nim index ac4d806f5c7..81a77db3ad6 100644 --- a/compiler/backend/ccgcalls.nim +++ b/compiler/backend/ccgcalls.nim @@ -111,8 +111,6 @@ proc fixupCall(p: BProc, le, ri: CgNode, d: var TLoc, line(p, cpsStmts, pl) exitCall(p, ri[0], canRaise) -proc genBoundsCheck(p: BProc; arr, a, b: TLoc) - proc reifiedOpenArray(p: BProc, n: CgNode): bool {.inline.} = # all non-parameter openArrays are reified not(n.kind == cnkLocal and p.locals[n.local].k == locParam) @@ -122,9 +120,6 @@ proc genOpenArraySlice(p: BProc; q: CgNode; formalType, destType: PType): (Rope, initLocExpr(p, q[0], a) initLocExpr(p, q[1], b) initLocExpr(p, q[2], c) - # but first produce the required index checks: - if optBoundsCheck in p.options: - genBoundsCheck(p, a, b, c) let ty = skipTypes(a.t, abstractVar+{tyPtr, tyRef, tyLent}) let dest = getTypeDesc(p.module, destType) let lengthExpr = "($1)-($2)+1" % [rdLoc(c), rdLoc(b)] diff --git a/compiler/backend/ccgexprs.nim b/compiler/backend/ccgexprs.nim index c80a03e7c06..3aae65a893f 100644 --- a/compiler/backend/ccgexprs.nim +++ b/compiler/backend/ccgexprs.nim @@ -692,29 +692,6 @@ proc genArrayElem(p: BProc, n, x, y: CgNode, d: var TLoc) = initLocExpr(p, y, b) var ty = skipTypes(a.t, abstractVarRange + abstractPtrs + tyUserTypeClasses) var first = intLiteral(firstOrd(p.config, ty)) - # emit range check: - if optBoundsCheck in p.options and ty.kind != tyUncheckedArray: - if not isDeepConstExpr(y): - # semantic pass has already checked for const index expressions - if firstOrd(p.config, ty) == 0 and lastOrd(p.config, ty) >= 0: - if (firstOrd(p.config, b.t) < firstOrd(p.config, ty)) or - (lastOrd(p.config, b.t) > lastOrd(p.config, ty)): - - linefmt(p, cpsStmts, "if ((NU)($1) > (NU)($2)){ #raiseIndexError2($1, $2); $3}$n", - [rdCharLoc(b), intLiteral(lastOrd(p.config, ty)), raiseInstr(p)]) - else: - linefmt(p, cpsStmts, "if ($1 < $2 || $1 > $3){ #raiseIndexError3($1, $2, $3); $4}$n", - [rdCharLoc(b), first, intLiteral(lastOrd(p.config, ty)), raiseInstr(p)]) - else: - let idx = getOrdValue(y) - if idx < firstOrd(p.config, ty) or lastOrd(p.config, ty) < idx: - localReport( - p.config, x.info, SemReport( - kind: rsemStaticOutOfBounds, - indexSpec: ( - usedIdx: idx, - minIdx: firstOrd(p.config, ty), - maxIdx: lastOrd(p.config, ty)))) d.inheritLocation(a) putIntoDest(p, d, n, @@ -760,22 +737,49 @@ proc genBoundsCheck(p: BProc; arr, a, b: TLoc) = else: unreachable(ty.kind) +proc genIndexCheck(p: BProc; x: CgNode, arr, idx: TLoc) = + ## Emits the index check logic + subsequent raise operation. `x` is + ## the array expression the `arr` loc resulted from from. + let ty = arr.t.skipTypes(abstractVar + tyUserTypeClasses + + {tyPtr, tyRef, tyLent, tyVar}) + case ty.kind + of tyArray: + var first = intLiteral(firstOrd(p.config, ty)) + if firstOrd(p.config, ty) == 0 and lastOrd(p.config, ty) >= 0: + if firstOrd(p.config, idx.t) < firstOrd(p.config, ty) or + lastOrd(p.config, idx.t) > lastOrd(p.config, ty): + linefmt(p, cpsStmts, "if ((NU)($1) > (NU)($2)){ #raiseIndexError2($1, $2); $3}$n", + [rdCharLoc(idx), intLiteral(lastOrd(p.config, ty)), + raiseInstr(p)]) + else: + linefmt(p, cpsStmts, "if ($1 < $2 || $1 > $3){ #raiseIndexError3($1, $2, $3); $4}$n", + [rdCharLoc(idx), first, intLiteral(lastOrd(p.config, ty)), + raiseInstr(p)]) + of tySequence, tyString: + linefmt(p, cpsStmts, + "if ((NU)($1) >= (NU)$2){ #raiseIndexError2($1,$2-1); $3}$n", + [rdCharLoc(idx), lenExpr(p, arr), raiseInstr(p)]) + of tyOpenArray, tyVarargs: + if reifiedOpenArray(p, x): + linefmt(p, cpsStmts, "if ((NU)($1) >= (NU)($2.Field1)){ #raiseIndexError2($1,$2.Field1-1); $3}$n", + [rdCharLoc(idx), rdLoc(arr), raiseInstr(p)]) + else: + linefmt(p, cpsStmts, "if ((NU)($1) >= (NU)($2Len_0)){ #raiseIndexError2($1,$2Len_0-1); $3}$n", + [rdCharLoc(idx), rdLoc(arr), raiseInstr(p)]) + of tyCstring: + discard "no bound checks" + else: + unreachable() + proc genOpenArrayElem(p: BProc, n, x, y: CgNode, d: var TLoc) = var a, b: TLoc initLocExpr(p, x, a) initLocExpr(p, y, b) if not reifiedOpenArray(p, x): - # emit range check: - if optBoundsCheck in p.options: - linefmt(p, cpsStmts, "if ((NU)($1) >= (NU)($2Len_0)){ #raiseIndexError2($1,$2Len_0-1); $3}$n", - [rdCharLoc(b), rdLoc(a), raiseInstr(p)]) # BUGFIX: ``>=`` and not ``>``! inheritLocation(d, a) putIntoDest(p, d, n, ropecg(p.module, "$1[$2]", [rdLoc(a), rdCharLoc(b)]), a.storage) else: - if optBoundsCheck in p.options: - linefmt(p, cpsStmts, "if ((NU)($1) >= (NU)($2.Field1)){ #raiseIndexError2($1,$2.Field1-1); $3}$n", - [rdCharLoc(b), rdLoc(a), raiseInstr(p)]) # BUGFIX: ``>=`` and not ``>``! inheritLocation(d, a) putIntoDest(p, d, n, ropecg(p.module, "$1.Field0[$2]", [rdLoc(a), rdCharLoc(b)]), a.storage) @@ -786,11 +790,7 @@ proc genSeqElem(p: BProc, n, x, y: CgNode, d: var TLoc) = initLocExpr(p, y, b) var ty = skipTypes(a.t, abstractVarRange) if ty.kind in {tyRef, tyPtr}: - ty = skipTypes(ty.lastSon, abstractVarRange) # emit range check: - if optBoundsCheck in p.options: - linefmt(p, cpsStmts, - "if ((NU)($1) >= (NU)$2){ #raiseIndexError2($1,$2-1); $3}$n", - [rdCharLoc(b), lenExpr(p, a), raiseInstr(p)]) + ty = skipTypes(ty.lastSon, abstractVarRange) if d.k == locNone: d.storage = OnHeap if skipTypes(a.t, abstractVar).kind in {tyRef, tyPtr}: a.r = ropecg(p.module, "(*$1)", [a.r]) @@ -1873,6 +1873,17 @@ proc genMagicExpr(p: BProc, e: CgNode, d: var TLoc, op: TMagic) = linefmt(p, cpsStmts, "$1 = ($2)($3);$n", [a.r, typ, rdLoc(b)]) of mChckRange: genRangeChck(p, e, d) + of mChckIndex: + var arr, a: TLoc + initLocExpr(p, e[1], arr) + initLocExpr(p, e[2], a) + genIndexCheck(p, e[1], arr, a) + of mChckBounds: + var arr, a, b: TLoc + initLocExpr(p, e[1], arr) + initLocExpr(p, e[2], a) + initLocExpr(p, e[3], b) + genBoundsCheck(p, arr, a, b) of mSamePayload: var a, b: TLoc initLocExpr(p, e[1], a) diff --git a/compiler/backend/jsgen.nim b/compiler/backend/jsgen.nim index ad6c01e40df..d5441d21645 100644 --- a/compiler/backend/jsgen.nim +++ b/compiler/backend/jsgen.nim @@ -384,6 +384,11 @@ include jstypes proc gen(p: PProc, n: CgNode, r: var TCompRes) proc genStmt(p: PProc, n: CgNode) +proc gen(p: PProc, n: CgNode): TCompRes {.inline.} = + ## Convenience procedure that returns instead of requiring a ``var`` + ## parameter. + gen(p, n, result) + proc useMagic(p: PProc, name: string) = if name.len == 0: return var s = magicsys.getCompilerProc(p.module.graph, name) @@ -1092,19 +1097,12 @@ proc genArrayAddr(p: PProc, n: CgNode, r: var TCompRes) = gen(p, m[0], a) gen(p, m[1], b) #internalAssert p.config, a.typ != etyBaseIndex and b.typ != etyBaseIndex - let x, tmp = a.rdLoc + let x = a.rdLoc r.address = x var typ = skipTypes(m[0].typ, abstractPtrs) if typ.kind == tyArray: first = firstOrd(p.config, typ[0]) - if optBoundsCheck in p.options: - useMagic(p, "chckIndx") - if first == 0: # save a couple chars - r.res = "chckIndx($1, 0, ($2).length - 1)" % [b.res, tmp] - else: - r.res = "chckIndx($1, $2, ($3).length + ($2) - 1) - ($2)" % [ - b.res, rope(first), tmp] - elif first != 0: + if first != 0: r.res = "($1) - ($2)" % [b.res, rope(first)] else: r.res = b.res @@ -1943,6 +1941,36 @@ proc genMagic(p: PProc, n: CgNode, r: var TCompRes) = r.kind = resExpr of mChckRange: genRangeChck(p, n, r) + of mChckIndex: + let + first = firstOrd(p.config, n[1].typ) + arr = gen(p, n[1]) + idx = gen(p, n[2]) + + useMagic(p, "chckIndx") + if first == 0: + lineF(p, "(chckIndx($2, 0, ($1).length - 1));$n", + [rdLoc(arr), rdLoc(idx)]) + else: + # can only be a statically-sized array + lineF(p, "(chckIndx($1, $2, $3));$n", + [rdLoc(idx), rope(first), rope(lastOrd(p.config, n[1].typ))]) + of mChckBounds: + let + first = firstOrd(p.config, n[1].typ) + arr = gen(p, n[1]) + lo = gen(p, n[2]) + hi = gen(p, n[3]) + + useMagic(p, "chckBounds") + if first == 0: + lineF(p, "chckBounds($2, $3, 0, ($1).length - 1);$n", + [rdLoc(arr), rdLoc(lo), rdLoc(hi)]) + else: + # can only be a statically-sized array + lineF(p, "(chckBounds($1, $2, $3, $4));$n", + [rdLoc(lo), rdLoc(hi), rope(first), + rope(lastOrd(p.config, n[1].typ))]) else: genCall(p, n, r) #else internalError(p.config, e.info, 'genMagic: ' + magicToStr[op]); diff --git a/compiler/mir/mirgen.nim b/compiler/mir/mirgen.nim index 7da0324d4b0..aa42c7687af 100644 --- a/compiler/mir/mirgen.nim +++ b/compiler/mir/mirgen.nim @@ -79,6 +79,9 @@ import compiler/front/[ options ], + compiler/sem/[ + ast_analysis + ], compiler/utils/[ containers, idioms @@ -266,15 +269,10 @@ func isPure(tree: MirTree, n: NodePosition): bool = of mnkPathArray: let arr = NodePosition tree.operand(n, 0) case tree[arr].typ.skipTypes(abstractVar).kind - of tyArray: - # static array; pure when the array expression is pure and no index - # errors are possible - tree[n, 1].kind == mnkLiteral and isPure(tree, arr) - of tyUncheckedArray: - # no index errors are possible - isPure(tree, arr) - of tyString, tySequence, tyOpenArray, tyVarargs, tyCstring: - false # dynamically-sized arrays; index errors are possible + of tyArray, tyUncheckedArray, tyString, tySequence, tyOpenArray, tyVarargs, + tyCstring: + # pure when the both the array and index operands are pure + isPure(tree, tree.child(n, 1)) and isPure(tree, arr) else: unreachable(tree[arr].kind) else: @@ -289,14 +287,13 @@ func isStable(tree: MirTree, n: NodePosition): bool = of mnkPathArray: let arr = NodePosition tree.operand(n, 0) case tree[arr].typ.skipTypes(abstractVar).kind - of tyArray: - # static arrays; stable when no index errors are possible - tree[n, 1].kind == mnkLiteral and isStable(tree, arr) - of tyUncheckedArray: - # cannot raise - isStable(tree, arr) + of tyArray, tyUncheckedArray: + # static arrays + isPure(tree, tree.child(n, 1)) and isStable(tree, arr) of tyString, tySequence, tyOpenArray, tyVarargs, tyCstring: - false # run-time arrays; could always raise + # run-time arrays; stable when neither the array nor index operand + # depends on mutable state + isPure(tree, tree.child(n, 1)) and isPure(tree, arr) else: unreachable() of mnkPathNamed, mnkPathPos: @@ -528,6 +525,29 @@ proc genRd(c: var TCtx, n: PNode; consume=false): Value = # either an rvalue or some non-pure lvalue -> capture result = captureInTemp(c, f, consume) +proc capture(c: var TCtx, n: PNode): Value = + ## If not a stable lvalue expression, captures the result of the + ## expression `n` in a temporary. + ## * rvalue expression are captured in temporaries + ## * lvalue expressions are captured as non-assignable aliases + ## * literals are not captured + const Skip = abstractInstTypeClass + {tyVar} + let f = c.builder.push: genx(c, n) + case detectKind(c.builder.staging, f.pos, sink=false) + of Rvalue, OwnedRvalue: + captureInTemp(c, f, sink=false) + of Literal: + c.builder.popSingle(f) + of Lvalue: + if c.builder.staging[f.pos].kind in Atoms: + # atoms are always stable and can thus be used directly + c.builder.popSingle(f) + elif n.typ.skipTypes(Skip).kind in {tyOpenArray, tyVarargs}: + # don't create an alias for openArrays, a copy of the view suffices + captureInTemp(c, f, sink=false) + else: + captureName(c, f, mutable=false) + proc genOperand(c: var TCtx, n: PNode; sink = false) = ## Translates expression AST `n` to a MIR operand expression. `sink` signals ## whether the result is used in a sink context -- the flag is propagated @@ -622,9 +642,23 @@ proc genBracketExpr(c: var TCtx, n: PNode) = genOperand(c, n[0]) of tyArray, tySequence, tyOpenArray, tyVarargs, tyUncheckedArray, tyString, tyCstring: - c.buildOp mnkPathArray, elemType(typ): - genOperand(c, n[0]) - c.use genRd(c, n[1]) + if optBoundsCheck in c.userOptions and needsIndexCheck(n[0], n[1]): + let + arr = capture(c, n[0]) + idx = genRd(c, n[1]) + + c.buildStmt mnkVoid: + c.buildDefectMagicCall mChckIndex, typeOrVoid(c, nil): + c.emitByVal arr + c.emitByVal idx + + c.buildOp mnkPathArray, elemType(typ): + c.use arr + c.use idx + else: + c.buildOp mnkPathArray, elemType(typ): + genOperand(c, n[0]) + c.use genRd(c, n[1]) else: unreachable() proc genVariantAccess(c: var TCtx, n: PNode) = @@ -1017,13 +1051,30 @@ proc genMagic(c: var TCtx, n: PNode; m: TMagic) = genx(c, n[1]) c.userOptions = orig of mSlice: - c.buildTree mnkToSlice, n.typ: - # XXX: a HiddenStdConv erroneously ends up in the array position - # sometimes, which would, if kept, lead to index errors when the - # array doesn't start at index 0 - genOperand(c, skipConv(n[1])) - genArgExpression(c, n[2], sink=false) - genArgExpression(c, n[3], sink=false) + # XXX: a HiddenStdConv erroneously ends up in the array position + # sometimes, which would, if kept, lead to index errors when the + # array doesn't start at index 0 + let arr = skipConv(n[1]) + if optBoundsCheck in c.userOptions and needsBoundCheck(arr, n[2], n[3]): + let + arr = capture(c, arr) + lo = genRd(c, n[2]) + hi = genRd(c, n[3]) + c.buildStmt mnkVoid: + c.buildDefectMagicCall mChckBounds, typeOrVoid(c, nil): + c.emitByVal arr + c.emitByVal lo + c.emitByVal hi + + c.buildTree mnkToSlice, n.typ: + c.use arr + c.use lo + c.use hi + else: + c.buildTree mnkToSlice, n.typ: + genOperand(c, arr) + genArgExpression(c, n[2], sink=false) + genArgExpression(c, n[3], sink=false) # magics that use incomplete symbols (most of them are generated by # ``liftdestructors``): diff --git a/compiler/mir/mirtrees.nim b/compiler/mir/mirtrees.nim index 0fcc2a06ca0..f3c8428bddc 100644 --- a/compiler/mir/mirtrees.nim +++ b/compiler/mir/mirtrees.nim @@ -92,10 +92,11 @@ type # ownership in ``mirgen``. There's only going to be the ``Def`` kind mnkDefUnpack ## intermediate hack required by destructor injection. Don't ## use - mnkBind ## introduces an alias that may only be used for read access. - ## The source expression must not be empty - mnkBindMut ## introduces an alias that may be used for write access. - ## The source expression must not be empty + mnkBind ## introduces an alias that may be used for read/write + ## access, but not for direct assignments. The source + ## expression must not be empty + mnkBindMut ## introduces an alias that may be used for read/write access + ## and assignments. The source expression must not be empty mnkFastAsgn ## assignment that cannot be rewritten into copy, move, or ## hook call diff --git a/compiler/sem/ast_analysis.nim b/compiler/sem/ast_analysis.nim index c98758c495d..dcb8015ef56 100644 --- a/compiler/sem/ast_analysis.nim +++ b/compiler/sem/ast_analysis.nim @@ -5,8 +5,13 @@ import ast_query, ast_types, types + ], + compiler/utils/[ + idioms ] +const instTypes = {tyGenericInst, tyAlias, tySink} + tyUserTypeClasses + proc canUseView*(n: PNode): bool = ## Computes whether the expression `n` evaluates to something of which a ## view can be created. @@ -34,4 +39,37 @@ proc canUseView*(n: PNode): bool = # do not return classifyBackendView(n.typ) != bvcNone else: - return false \ No newline at end of file + return false + +proc needsIndexCheck*(arr, idx: PNode): bool = + ## Uses the expressions' type and shape to infer whether index checks are + ## required for an ``arr[idx]`` access. + case arr.typ.skipTypes(instTypes + {tyDistinct, tyVar}).kind + of tyArray: + # statement-list expressions are, at present, not checked at compile- + # time, so they're not skipped here + idx.kind notin nkIntLiterals + of tyUncheckedArray: + false + of tyString, tySequence, tyOpenArray, tyVarargs: + true + of tyCstring: + # XXX: depends on the targeted backend (i.e., index checks are used with + # the JS and VM backend, but not with the C one) + true + else: + unreachable() + +proc needsBoundCheck*(arr, lo, hi: PNode): bool = + ## Uses the expression's type and shape to infer whether bound checks are + ## required for a ``toOpenArray(arr, lo, hi)`` call. + case arr.typ.skipTypes(instTypes + {tyDistinct, tyVar}).kind + of tyPtr, tyUncheckedArray: + false + of tyArray, tyString, tySequence, tyOpenArray, tyVarargs: + true + of tyCstring: + # XXX: depends on the targeted backend + true + else: + unreachable() \ No newline at end of file diff --git a/compiler/vm/vm.nim b/compiler/vm/vm.nim index a059c7eee83..1f1be883fe6 100644 --- a/compiler/vm/vm.nim +++ b/compiler/vm/vm.nim @@ -1003,10 +1003,6 @@ proc rawExecute(c: var TCtx, t: var VmThread, pc: var int): YieldReason = let idx = regs[rc].intVal.int srcTyp = regs[rb].handle.typ - L = arrayLen(regs[rb].handle) - - if unlikely(idx >=% L): - raiseVmError(reportVmIdx(idx, L-1)) case srcTyp.kind of akString: @@ -1029,9 +1025,6 @@ proc rawExecute(c: var TCtx, t: var VmThread, pc: var int): YieldReason = checkHandle(regs[rb]) let src = regs[rb].handle - if unlikely(idx >=% arrayLen(src)): - raiseVmError(reportVmIdx(idx, arrayLen(src) - 1)) - case src.typ.kind of akString: # XXX: see todo and comment in `opcLdArr` for the reasons why @@ -1083,7 +1076,7 @@ proc rawExecute(c: var TCtx, t: var VmThread, pc: var int): YieldReason = akDiscriminator: unreachable(dTyp.kind) - if idx <% slice.len: + if idx >= 0 and idx <% slice.len: checkHandle(regs[rc]) writeLoc(slice[idx], regs[rc], c.memory) else: @@ -1839,7 +1832,16 @@ proc rawExecute(c: var TCtx, t: var VmThread, pc: var int): YieldReason = regs[ra].toStr, "[" & regs[rb].toStr & ".." & regs[rc].toStr & "]" ])) + of opcIndexChck: + # raise an error if c is not within b's bounds + let + rb = instr.regB + idx = regs[instr.regC].intVal + checkHandle(regs[rb]) + let len = arrayLen(regs[rb].handle) + if idx < 0 or idx >=% len: + raiseVmError(reportVmIdx(idx, len-1)) of opcArrCopy: let rb = instr.regB let rc = instr.regC diff --git a/compiler/vm/vm_enums.nim b/compiler/vm/vm_enums.nim index 571d2a8c9cd..5a1e71118f7 100644 --- a/compiler/vm/vm_enums.nim +++ b/compiler/vm/vm_enums.nim @@ -82,6 +82,7 @@ type opcAddStrStr, opcAddSeqElem, opcRangeChck, + opcIndexChck, ## abort execution if the index is not in bounds opcArrCopy, diff --git a/compiler/vm/vmgen.nim b/compiler/vm/vmgen.nim index 88b37b75f18..6c2fa2e8736 100644 --- a/compiler/vm/vmgen.nim +++ b/compiler/vm/vmgen.nim @@ -2058,6 +2058,13 @@ proc genMagic(c: var TCtx; n: CgNode; dest: var TDest; m: TMagic) = c.freeTemp(tmp0) else: dest = tmp0 + of mChckIndex: + let + arr = c.genx(n[1]) + idx = c.genIndex(n[2], n[1].typ) + c.gABC(n, opcIndexChck, 0, arr, idx) + c.freeTemp(idx) + c.freeTemp(arr) else: # mGCref, mGCunref, mFinished, etc. fail(n.info, vmGenDiagCodeGenUnhandledMagic, m) diff --git a/doc/mir.rst b/doc/mir.rst index 1e96cac2c5c..707ed6dd6d7 100644 --- a/doc/mir.rst +++ b/doc/mir.rst @@ -110,10 +110,14 @@ Semantics | DefCursor NAME # definition of non-owning location | DefCursor NAME FULL_VALUE # same as above, but with initial # assignment - | Bind LVALUE # bind the lvalue to the given alias + | Bind LVALUE # bind the lvalue to the given alias. + # May be used for mutation, but must + # not be used as an assignment's + # destination or `Tag`'s operand | BindMut LVALUE # bind the lvalue to the given alias. - # The alias may be used for mutations - # (e.g., on the left of assignments) + # The alias may be used as an + # assignment's destination or as a + # `Tag`'s operand | Void LVALUE # evaluates the lvalue for side-effects # and acts as a usage of the lvalue # during data-flow analysis diff --git a/lib/system/jssys.nim b/lib/system/jssys.nim index 890386504c0..869c8b833f5 100644 --- a/lib/system/jssys.nim +++ b/lib/system/jssys.nim @@ -172,6 +172,9 @@ proc raiseRangeError() {.compilerproc, noreturn.} = proc raiseIndexError(i, a, b: int) {.compilerproc, noreturn.} = raise newException(IndexDefect, formatErrorIndexBound(int(i), int(a), int(b))) +proc raiseIndexError1() {.compilerproc, noreturn.} = + raise newException(IndexDefect, "index out of bounds") + proc raiseFieldError2(f: string, discVal: string) {.compilerproc, noreturn.} = raise newException(FieldDefect, formatFieldDefect(f, discVal)) @@ -678,6 +681,10 @@ proc chckIndx(i, a, b: int): int {.compilerproc.} = if i >= a and i <= b: return i else: raiseIndexError(i, a, b) +proc chckBounds(lo, hi, a, b: int) {.compilerproc.} = + if hi-lo != -1 and (hi-lo < -1 or lo < a or lo > b or hi > b or hi < a): + raiseIndexError1() + proc chckRange(i, a, b: int): int {.compilerproc.} = if i >= a and i <= b: return i else: raiseRangeError() diff --git a/tests/lang_objects/destructor/tdestruction_when_checks_failed.nim b/tests/lang_objects/destructor/tdestruction_when_checks_failed.nim index 7e70cabb9a8..0ade72d73d1 100644 --- a/tests/lang_objects/destructor/tdestruction_when_checks_failed.nim +++ b/tests/lang_objects/destructor/tdestruction_when_checks_failed.nim @@ -14,6 +14,18 @@ var numDestroy = 0 proc `=destroy`(x: var Object) = inc numDestroy +template runTest(body: untyped) = + var raised = false + numDestroy = 0 + try: + body + except: + raised = true + + doAssert raised, "no defect was raised?" + doAssert numDestroy == 1, "the destructor wasn't called" + + # with how destructor injection works at the time of writing, when a scope is # only left through structured control-flow, no hidden ``try`` statement is # injected. The tests here make sure that the unstructured control-flow arising @@ -24,12 +36,21 @@ block range_checks: var obj = Object() # obj stores an alive value that needs to be destroyed discard range[0..1](x) - var raised = false - numDestroy = 0 - try: + runTest: test(2) # provoke a range-check failure - except: - raised = true - doAssert raised, "no defect was raised?" - doAssert numDestroy == 1, "the destructor wasn't called" +block index_checks: + proc test(a: seq[int], x: int) = + var obj = Object() # obj stores an value that needs to be destroyed + discard a[x] + + runTest: + test(@[], 1) # provoke an index-check failure + +block bound_checks: + proc test(a: seq[int]) = + var obj = Object() # obj stores an value that needs to be destroyed + discard toOpenArray(a, 1, 2) + + runTest: + test(@[]) # provoke a bound-check failure \ No newline at end of file diff --git a/tests/lang_objects/destructor/tv2_cast.nim b/tests/lang_objects/destructor/tv2_cast.nim index b22e7b0c06e..4b879140791 100644 --- a/tests/lang_objects/destructor/tv2_cast.nim +++ b/tests/lang_objects/destructor/tv2_cast.nim @@ -26,6 +26,7 @@ scope: def_cursor _0: string = s def_cursor _1: int = len(arg _0) def_cursor _2: int = -(arg _1, arg 1) + chckBounds(arg s, arg 0, arg _2) (raises) def_cursor _3: openArray[byte] = toOpenArray s, 0, _2 def _4: seq[byte] = encode(arg _3) (raises) def data: string diff --git a/tests/magics/ttoopenarray.nim b/tests/magics/ttoopenarray.nim new file mode 100644 index 00000000000..493842429cc --- /dev/null +++ b/tests/magics/ttoopenarray.nim @@ -0,0 +1,102 @@ +discard """ + description: ''' + Specify the behaviour of the ``toOpenArray`` built-in procedure + ''' + targets: "c js vm" + knownIssue.vm: "The magic is not yet supported by the VM backend" +""" + +template test(a, b: untyped) = + ## Temporary helper template for a comparison that is expected to fail with + ## the JS backend. + {.line.}: + when defined(js): + doAssert a != b + else: + doAssert a == b + +proc toSeq(a: openArray[int]): seq[int] {.noinline.} = + ## Intended to prevent comparisons being folded away by the compiler. + for x in a.items: + result.add x + +proc toSeqByte(a: openArray[byte]): seq[byte] {.noinline.} = + for x in a.items: + result.add x + +block from_array_construction: + # test with an array construction as the array operand + doAssert toSeq(toOpenArray([1, 2, 3], 0, 0)) == [1] + doAssert toSeq(toOpenArray([1, 2, 3], 0, 2)) == [1, 2, 3] + +block from_postive_range_based_array: + var arr: array[8..12, int] = [11, 12, 13, 14, 15] + + # knownIssue: the bounds are not properly offset + test toSeq(toOpenArray(arr, 8, 12)), [11, 12, 13, 14, 15] + + doAssertRaises(IndexDefect): + discard toOpenArray(arr, 10, 8) + +block from_negative_range_based_array: + var arr: array[-3 .. -1, int] = [1, 2, 3] + # knownIssue: the bounds are not properly offset + test toSeq(toOpenArray(arr, -3, -1)), [1, 2, 3] + doAssert toSeq(toOpenArray(arr, 0, -1)) == [] + doAssert toSeq(toOpenArray(arr, -3, -4)) == [] + doAssertRaises(IndexDefect): + discard toOpenArray(arr, -4, -1) + doAssertRaises(IndexDefect): + discard toOpenArray(arr, -1, 0) + doAssertRaises(IndexDefect): + discard toOpenArray(arr, -1, -3) + +block from_seq: + var s = @[1, 2, 3, 4, 5] + doAssert toSeq(toOpenArray(s, 1, 3)) == [2, 3, 4] + + # test creating an empty openArray + # https://github.com/nim-lang/nim/issues/7904 + doAssert toSeq(toOpenArray(s, 0, -1)) == [] + doAssert toSeq(toOpenArray(s, 1, 0)) == [] + doAssertRaises(IndexDefect): + discard toOpenArray(s, 0, -2) + + doAssert toSeq(toOpenArray(s, 9, 8)) == [] + doAssert toSeq(toOpenArray(s, 0, -1)) == [] + doAssert toSeq(toOpenArray(s, 1, 0)) == [] + +proc first(a: openArray[int]): int = + toSeq(toOpenArray(a, 0, 0))[0] + +# test openArray of openArray +block from_openArray: + var s = @[1, 2, 3] + + proc testEmpty(a: openArray[int]) = + doAssert toSeq(toOpenArray(a, 0, -1)) == [] + + # create empty openArray from empty openArray + testEmpty(toOpenArray(s, 0, -1)) + testEmpty(toOpenArray(s, 1, 0)) + # create empty openArray from non-empty openArray + testEmpty(toOpenArray(s, 1, 2)) + doAssert first(toOpenArray(s, 1, s.len-1)) == 2 + +block from_unchecked_array: + let + s = @[1, 2, 3, 4, 5] + p = cast[ptr UncheckedArray[int]](addr s[0]) + + when defined(js): + # knownIssue: the ``UncheckedArray`` overload is not yet + # supported by the JS backend + doAssert not compiles(toOpenArray(p, 1, 3)), "it works now" + else: + doAssert first(toOpenArray(p, 1, 3)) == 2 + +block from_string: + # test the byte-casting overload + let str = "0123456789" + doAssert toSeqByte(toOpenArrayByte(str, 0, str.high)) == + [0x30'u8, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39] \ No newline at end of file diff --git a/tests/misc/ttoopenarray_array_bound_checks.nim b/tests/misc/ttoopenarray_array_bound_checks.nim index ec369495846..dd121db578d 100644 --- a/tests/misc/ttoopenarray_array_bound_checks.nim +++ b/tests/misc/ttoopenarray_array_bound_checks.nim @@ -4,7 +4,6 @@ discard """ Regression test for ``toOpenArray`` calls not bound-checking dereferenced pointer-to-array values ''' - knownIssue.js: "No bound checks are emitted for `toOpenArray`" knownIssue.vm: "`toOpenArray` is not yet supported" """ diff --git a/tests/system/tsystem_misc.nim b/tests/system/tsystem_misc.nim index 13b8c18a952..50cd64f80e2 100644 --- a/tests/system/tsystem_misc.nim +++ b/tests/system/tsystem_misc.nim @@ -1,33 +1,5 @@ discard """ - output:'''1 -1 -2 -3 -11 -12 -13 -14 -15 -2 -3 -4 -2 -1 -2 -3 -2 -48 -49 -50 -51 -52 -53 -54 -55 -56 -57 -2 -''' + output: "2" """ @@ -59,54 +31,6 @@ doAssert high(float) > low(float) doAssert high(float32) > low(float32) doAssert high(float64) > low(float64) -proc foo(a: openArray[int]) = - for x in a: echo x - -foo(toOpenArray([1, 2, 3], 0, 0)) - -foo(toOpenArray([1, 2, 3], 0, 2)) - -var arr: array[8..12, int] = [11, 12, 13, 14, 15] - -foo(toOpenArray(arr, 8, 12)) - -var seqq = @[1, 2, 3, 4, 5] -foo(toOpenArray(seqq, 1, 3)) - -# empty openArray issue #7904 -foo(toOpenArray(seqq, 0, -1)) -foo(toOpenArray(seqq, 1, 0)) -doAssertRaises(IndexDefect): - foo(toOpenArray(seqq, 0, -2)) - -foo(toOpenArray(arr, 9, 8)) -foo(toOpenArray(arr, 0, -1)) -foo(toOpenArray(arr, 1, 0)) -doAssertRaises(IndexDefect): - foo(toOpenArray(arr, 10, 8)) - -# test openArray of openArray -proc oaEmpty(a: openArray[int]) = - foo(toOpenArray(a, 0, -1)) - -proc oaFirstElm(a: openArray[int]) = - foo(toOpenArray(a, 0, 0)) - -oaEmpty(toOpenArray(seqq, 0, -1)) -oaEmpty(toOpenArray(seqq, 1, 0)) -oaEmpty(toOpenArray(seqq, 1, 2)) -oaFirstElm(toOpenArray(seqq, 1, seqq.len-1)) - -var arrNeg: array[-3 .. -1, int] = [1, 2, 3] -foo(toOpenArray(arrNeg, -3, -1)) -foo(toOpenArray(arrNeg, 0, -1)) -foo(toOpenArray(arrNeg, -3, -4)) -doAssertRaises(IndexDefect): - foo(toOpenArray(arrNeg, -4, -1)) -doAssertRaises(IndexDefect): - foo(toOpenArray(arrNeg, -1, 0)) -doAssertRaises(IndexDefect): - foo(toOpenArray(arrNeg, -1, -3)) doAssertRaises(Exception): raise newException(Exception, "foo") @@ -120,17 +44,6 @@ block: didThrow = true doAssert didThrow -type seqqType = ptr UncheckedArray[int] -let qData = cast[seqqType](addr seqq[0]) -oaFirstElm(toOpenArray(qData, 1, 3)) - -proc foo(a: openArray[byte]) = - for x in a: echo x - -let str = "0123456789" -foo(toOpenArrayByte(str, 0, str.high)) - - template boundedOpenArray[T](x: seq[T], first, last: int): openarray[T] = toOpenarray(x, max(0, first), min(x.high, last))