vmgen: fix two overflow bugs with sets (#801)

## Summary Fix either the compiler crashing or the VM failing at run-time with an over- or underflow defect for `incl`, `contains`, and construction operations for `set`s where the element range crossed the `int64` upper boundary. The fixed bugs only affected VM bytecode generation and code running in the VM. ## Details There were two related problems: - the relevant `vmgen` logic performed all math with signed integer values - preparing (i.e., offsetting) a run-time value for a `set` operation always happened via `SubInt` (subtract signed integer), ignoring the signed-ness of the operands When the lower bound of the elements' range was beyond the `int64` upper bound, this didn't cause problems: reinterpreting the integer bits always yields negative values there. For example, for a `set[Low..High]` where `Low == high(uint64)-3` and `High = high(uint)`, subtracting the lower inclusive-bound (-4) from the upper inclusive-bound (-1) both didn't cause and overflow and also resulted in the correct value (3). However, when `Low == uint64 high(int64)` and `High == Low + 4`, this doesn't work. When the operand to an operation involving such sets was a literal integer outside the `int64` range, the compiler crashed - when it was a run-time value, the VM erroneously reported an overflow. The logic for generating the code for loading set elements now uses `Int128` to get around over- and underflow problems, and the `Subu` operation is emitted for offsetting unsigned run-time values. In addition, generating the bytecode for loading literal unsigned integers now happens via `loadInt`, meaning that the values are now loaded via `LdImmInt` if they're less than 2^23. Previously, literal unsigned integers were always loaded via the slightly less efficient `LdConst` operation.
nim-works · Jul 16, 2023 · c0ba526 · c0ba526
1 parent aabe9eb
commit c0ba526
Show file tree

Hide file tree

Showing 2 changed files with 91 additions and 24 deletions.
diff --git a/compiler/vm/vmgen.nim b/compiler/vm/vmgen.nim
@@ -751,6 +751,10 @@ makeCnstFunc(toFloatCnst, BiggestFloat, cnstFloat, floatVal, cmpFloatRep)
 
 makeCnstFunc(toStringCnst, string, cnstString, strVal, `==`)
 
+proc toIntCnst(c: var TCtx, val: Int128): int =
+  # integer constants are stored as their raw bit representation
+  toIntCnst(c, BiggestInt(toInt64(val)))
+
 proc genLiteral(c: var TCtx, n: PNode): int =
   ## Create a constant, add it to the `c.constants` list and return
   ## the index of where it's located there
@@ -1229,12 +1233,11 @@ proc genVarargsABC(c: var TCtx; n: PNode; dest: TRegister; opc: TOpcode) =
   c.freeTempRange(x, n.len-1)
 
 proc isInt8Lit(n: PNode): bool =
-  if n.kind in {nkCharLit..nkUInt64Lit}:
-    result = n.intVal >= low(int8) and n.intVal <= high(int8)
-
-proc isInt16Lit(n: PNode): bool =
-  if n.kind in {nkCharLit..nkUInt64Lit}:
-    result = n.intVal >= low(int16) and n.intVal <= high(int16)
+  ## Returns whether `n` represents an integer value (signed or
+  ## unsigned) that fits into the range of an 8-bit signed integer.
+  if n.kind in nkIntKinds:
+    let val = getInt(n)
+    result = val >= low(int8) and val <= high(int8)
 
 proc genAddSubInt(c: var TCtx; n: PNode; dest: var TDest; opc: TOpcode) =
   if n[2].isInt8Lit:
@@ -1506,35 +1509,42 @@ proc genVoidBC(c: var TCtx, n: PNode, dest: TDest, opcode: TOpcode) =
   c.freeTemp(tmp1)
   c.freeTemp(tmp2)
 
-proc loadInt(c: var TCtx, n: PNode, dest: TRegister, val: BiggestInt) =
+proc loadInt(c: var TCtx, n: PNode, dest: TRegister, val: Int128) =
   ## Loads the integer `val` into `dest`, choosing the most efficient way to
   ## do so.
   if val in regBxMin-1..regBxMax:
     # can be loaded as an immediate
-    c.gABx(n, opcLdImmInt, dest, val.int)
+    c.gABx(n, opcLdImmInt, dest, toInt(val))
   else:
     # requires a constant
     c.gABx(n, opcLdConst, dest, c.toIntCnst(val))
 
-proc genSetElem(c: var TCtx, n: PNode, first: int): TRegister =
+proc genSetElem(c: var TCtx, n: PNode, first: Int128): TRegister =
   result = c.getTemp(n.typ)
 
   if first != 0:
     if n.kind in nkIntKinds:
-      # a literal value
-      c.gABx(n, opcLdImmInt, result, int(n.intVal - first))
+      # a literal value. Since sem makes sure sets cannot store elements
+      # with an adjusted value of >= 2^16, we know that the result of the
+      # subtraction fits into the encodable range for ABX
+      c.gABx(n, opcLdImmInt, result, toInt(getInt(n) - first))
     else:
       gen(c, n, result)
       if first notin -127..127:
         # too large for the ABI encoding; we need to load a constant
-        let tmp = c.getTemp(n.typ)
+        let
+          typ = skipTypes(n.typ, IrrelevantTypes + {tyRange})
+          tmp = c.getTemp(typ)
+          opc =
+            if isUnsigned(typ): opcSubu
+            else:               opcSubInt
         c.loadInt(n, tmp, first)
-        c.gABC(n, opcSubInt, result, result, tmp)
+        c.gABC(n, opc, result, result, tmp)
         c.freeTemp(tmp)
       elif first > 0:
-        c.gABI(n, opcSubImmInt, result, result, first)
+        c.gABI(n, opcSubImmInt, result, result, toInt(first))
       else:
-        c.gABI(n, opcAddImmInt, result, result, -first)
+        c.gABI(n, opcAddImmInt, result, result, toInt(-first))
 
   else:
     gen(c, n, result)
@@ -1547,7 +1557,7 @@ proc genSetElem(c: var TCtx, n: PNode, typ: PType): TRegister {.inline.} =
   # `first` can't be reliably derived from `n.typ` since the type may not
   # match the set element type. This happens with the set in a
   # `nkCheckedFieldExpr` for example
-  let first = toInt(c.config.firstOrd(t))
+  let first = c.config.firstOrd(t)
   genSetElem(c, n, first)
 
 func fitsRegister(t: PType): bool =
@@ -2885,7 +2895,7 @@ proc genSetConstr(c: var TCtx, n: PNode, dest: var TDest) =
   c.gABx(n, opcLdNull, dest, c.genType(n.typ))
   # XXX: since `first` stays the same across the loop, we could invert
   #      the loop around `genSetElem`'s logic...
-  let first = firstOrd(c.config, n.typ.skipTypes(abstractInst)).toInt()
+  let first = firstOrd(c.config, n.typ.skipTypes(abstractInst))
   for x in n:
     if x.kind == nkRange:
       let a = c.genSetElem(x[0], first)
@@ -3026,13 +3036,10 @@ proc gen(c: var TCtx; n: PNode; dest: var TDest) =
     else:
       genCall(c, n, dest)
       clearDest(c, n, dest)
-  of nkCharLit..nkInt64Lit:
-    if isInt16Lit(n):
-      if dest.isUnset: dest = c.getTemp(n.typ)
-      c.gABx(n, opcLdImmInt, dest, n.intVal.int)
-    else:
-      genLit(c, n, dest)
-  of nkUIntLit..pred(nkNilLit): genLit(c, n, dest)
+  of nkIntKinds:
+    prepare(c, dest, n.typ)
+    c.loadInt(n, dest, getInt(n))
+  of nkFloatKinds, nkStrKinds: genLit(c, n, dest)
   of nkNilLit:
     if not n.typ.isEmptyType:
       let t = n.typ.skipTypes(abstractInst)

diff --git a/tests/stdlib/types/sets/trange_crosses_int64_bounds.nim b/tests/stdlib/types/sets/trange_crosses_int64_bounds.nim
@@ -0,0 +1,60 @@
+discard """
+  targets: "c vm !js"
+  description: '''
+    Test the `incl`, `contains`, and set construction operation for sets where
+    the element range crosses the signed 64-bit integer upper bound
+  '''
+"""
+
+# knownIssue: sets with elements beyond 2^53-1 don't work the JavaScript
+#             backend
+
+const
+  Low  = uint64(high(int64))
+  High = Low + 3
+
+type
+  Range   = range[Low .. High]
+  SetType = set[Range]
+
+const
+  a = Range(Low + 0)
+  b = Range(Low + 1)
+  c = Range(Low + 2)
+  d = Range(Low + 3)
+
+var
+  s: SetType
+  val: Range
+
+# test `incl` and `contains` and set construction with a constant value that
+# is still in the int64 range
+s.incl a
+doAssert s.contains(a)
+doAssert s == SetType({ a })
+doAssert card(s) == 1
+
+# now test with a constant value that's outside of the int64 range
+s.incl d
+doAssert d in s
+doAssert s == SetType({ a, d })
+doAssert card(s) == 2
+
+s = {}
+
+# test with a run-time value that's in the int64 range
+val = a
+s.incl val
+doAssert s.contains(a)
+doAssert s.contains(val)
+doAssert s == SetType({ a }) # check that the constant set agrees
+doAssert s == SetType({ val })
+doAssert card(s) == 1
+
+# now test with a run-time value that's outside int64 range
+val = d
+s.incl val
+doAssert s.contains(d)
+doAssert s.contains(val)
+doAssert s == SetType({ a, d })
+doAssert s == SetType({ a, val })