Skip to content

Commit

Permalink
vmgen: fix two overflow bugs with sets (#801)
Browse files Browse the repository at this point in the history
## Summary

Fix either the compiler crashing or the VM failing at run-time with an
over- or underflow defect for `incl`, `contains`, and construction
operations for `set`s where the element range crossed the `int64` upper
boundary.

The fixed bugs only affected VM bytecode generation and code running in
the VM.

## Details

There were two related problems:
- the relevant `vmgen` logic performed all math with signed integer
  values
- preparing (i.e., offsetting) a run-time value for a `set` operation
  always happened via `SubInt` (subtract signed integer), ignoring
  the signed-ness of the operands

When the lower bound of the elements' range was beyond the `int64` upper
bound, this didn't cause problems: reinterpreting the integer bits
always yields negative values there. For example, for a `set[Low..High]`
where `Low == high(uint64)-3` and `High = high(uint)`, subtracting the
lower inclusive-bound (-4) from the upper inclusive-bound (-1) both
didn't cause and overflow and also resulted in the correct value (3).

However, when `Low == uint64 high(int64)` and `High == Low + 4`, this
doesn't work. When the operand to an operation involving such sets was a
literal integer outside the `int64` range, the compiler crashed - when
it was a run-time value, the VM erroneously reported an overflow.

The logic for generating the code for loading set elements now uses
`Int128` to get around over- and underflow problems, and the `Subu`
operation is emitted for offsetting unsigned run-time values.

In addition, generating the bytecode for loading literal unsigned
integers now happens via `loadInt`, meaning that the values are now
loaded via `LdImmInt` if they're less than 2^23. Previously, literal
unsigned integers were always loaded via the slightly less efficient
`LdConst` operation.
  • Loading branch information
zerbina authored Jul 16, 2023
1 parent aabe9eb commit c0ba526
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 24 deletions.
55 changes: 31 additions & 24 deletions compiler/vm/vmgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,10 @@ makeCnstFunc(toFloatCnst, BiggestFloat, cnstFloat, floatVal, cmpFloatRep)

makeCnstFunc(toStringCnst, string, cnstString, strVal, `==`)

proc toIntCnst(c: var TCtx, val: Int128): int =
# integer constants are stored as their raw bit representation
toIntCnst(c, BiggestInt(toInt64(val)))

proc genLiteral(c: var TCtx, n: PNode): int =
## Create a constant, add it to the `c.constants` list and return
## the index of where it's located there
Expand Down Expand Up @@ -1229,12 +1233,11 @@ proc genVarargsABC(c: var TCtx; n: PNode; dest: TRegister; opc: TOpcode) =
c.freeTempRange(x, n.len-1)

proc isInt8Lit(n: PNode): bool =
if n.kind in {nkCharLit..nkUInt64Lit}:
result = n.intVal >= low(int8) and n.intVal <= high(int8)

proc isInt16Lit(n: PNode): bool =
if n.kind in {nkCharLit..nkUInt64Lit}:
result = n.intVal >= low(int16) and n.intVal <= high(int16)
## Returns whether `n` represents an integer value (signed or
## unsigned) that fits into the range of an 8-bit signed integer.
if n.kind in nkIntKinds:
let val = getInt(n)
result = val >= low(int8) and val <= high(int8)

proc genAddSubInt(c: var TCtx; n: PNode; dest: var TDest; opc: TOpcode) =
if n[2].isInt8Lit:
Expand Down Expand Up @@ -1506,35 +1509,42 @@ proc genVoidBC(c: var TCtx, n: PNode, dest: TDest, opcode: TOpcode) =
c.freeTemp(tmp1)
c.freeTemp(tmp2)

proc loadInt(c: var TCtx, n: PNode, dest: TRegister, val: BiggestInt) =
proc loadInt(c: var TCtx, n: PNode, dest: TRegister, val: Int128) =
## Loads the integer `val` into `dest`, choosing the most efficient way to
## do so.
if val in regBxMin-1..regBxMax:
# can be loaded as an immediate
c.gABx(n, opcLdImmInt, dest, val.int)
c.gABx(n, opcLdImmInt, dest, toInt(val))
else:
# requires a constant
c.gABx(n, opcLdConst, dest, c.toIntCnst(val))

proc genSetElem(c: var TCtx, n: PNode, first: int): TRegister =
proc genSetElem(c: var TCtx, n: PNode, first: Int128): TRegister =
result = c.getTemp(n.typ)

if first != 0:
if n.kind in nkIntKinds:
# a literal value
c.gABx(n, opcLdImmInt, result, int(n.intVal - first))
# a literal value. Since sem makes sure sets cannot store elements
# with an adjusted value of >= 2^16, we know that the result of the
# subtraction fits into the encodable range for ABX
c.gABx(n, opcLdImmInt, result, toInt(getInt(n) - first))
else:
gen(c, n, result)
if first notin -127..127:
# too large for the ABI encoding; we need to load a constant
let tmp = c.getTemp(n.typ)
let
typ = skipTypes(n.typ, IrrelevantTypes + {tyRange})
tmp = c.getTemp(typ)
opc =
if isUnsigned(typ): opcSubu
else: opcSubInt
c.loadInt(n, tmp, first)
c.gABC(n, opcSubInt, result, result, tmp)
c.gABC(n, opc, result, result, tmp)
c.freeTemp(tmp)
elif first > 0:
c.gABI(n, opcSubImmInt, result, result, first)
c.gABI(n, opcSubImmInt, result, result, toInt(first))
else:
c.gABI(n, opcAddImmInt, result, result, -first)
c.gABI(n, opcAddImmInt, result, result, toInt(-first))

else:
gen(c, n, result)
Expand All @@ -1547,7 +1557,7 @@ proc genSetElem(c: var TCtx, n: PNode, typ: PType): TRegister {.inline.} =
# `first` can't be reliably derived from `n.typ` since the type may not
# match the set element type. This happens with the set in a
# `nkCheckedFieldExpr` for example
let first = toInt(c.config.firstOrd(t))
let first = c.config.firstOrd(t)
genSetElem(c, n, first)

func fitsRegister(t: PType): bool =
Expand Down Expand Up @@ -2885,7 +2895,7 @@ proc genSetConstr(c: var TCtx, n: PNode, dest: var TDest) =
c.gABx(n, opcLdNull, dest, c.genType(n.typ))
# XXX: since `first` stays the same across the loop, we could invert
# the loop around `genSetElem`'s logic...
let first = firstOrd(c.config, n.typ.skipTypes(abstractInst)).toInt()
let first = firstOrd(c.config, n.typ.skipTypes(abstractInst))
for x in n:
if x.kind == nkRange:
let a = c.genSetElem(x[0], first)
Expand Down Expand Up @@ -3026,13 +3036,10 @@ proc gen(c: var TCtx; n: PNode; dest: var TDest) =
else:
genCall(c, n, dest)
clearDest(c, n, dest)
of nkCharLit..nkInt64Lit:
if isInt16Lit(n):
if dest.isUnset: dest = c.getTemp(n.typ)
c.gABx(n, opcLdImmInt, dest, n.intVal.int)
else:
genLit(c, n, dest)
of nkUIntLit..pred(nkNilLit): genLit(c, n, dest)
of nkIntKinds:
prepare(c, dest, n.typ)
c.loadInt(n, dest, getInt(n))
of nkFloatKinds, nkStrKinds: genLit(c, n, dest)
of nkNilLit:
if not n.typ.isEmptyType:
let t = n.typ.skipTypes(abstractInst)
Expand Down
60 changes: 60 additions & 0 deletions tests/stdlib/types/sets/trange_crosses_int64_bounds.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
discard """
targets: "c vm !js"
description: '''
Test the `incl`, `contains`, and set construction operation for sets where
the element range crosses the signed 64-bit integer upper bound
'''
"""

# knownIssue: sets with elements beyond 2^53-1 don't work the JavaScript
# backend

const
Low = uint64(high(int64))
High = Low + 3

type
Range = range[Low .. High]
SetType = set[Range]

const
a = Range(Low + 0)
b = Range(Low + 1)
c = Range(Low + 2)
d = Range(Low + 3)

var
s: SetType
val: Range

# test `incl` and `contains` and set construction with a constant value that
# is still in the int64 range
s.incl a
doAssert s.contains(a)
doAssert s == SetType({ a })
doAssert card(s) == 1

# now test with a constant value that's outside of the int64 range
s.incl d
doAssert d in s
doAssert s == SetType({ a, d })
doAssert card(s) == 2

s = {}

# test with a run-time value that's in the int64 range
val = a
s.incl val
doAssert s.contains(a)
doAssert s.contains(val)
doAssert s == SetType({ a }) # check that the constant set agrees
doAssert s == SetType({ val })
doAssert card(s) == 1

# now test with a run-time value that's outside int64 range
val = d
s.incl val
doAssert s.contains(d)
doAssert s.contains(val)
doAssert s == SetType({ a, d })
doAssert s == SetType({ a, val })

0 comments on commit c0ba526

Please sign in to comment.