Skip to content

Commit

Permalink
sem: decide storage type for enums on definition (#839)
Browse files Browse the repository at this point in the history
## Summary

Centralize the decision about the underlying storage types for enum
values in semantic analysis. Since the decision now happens in a single
place instead of being scattered across the compiler, this makes it
significantly easier to change the enum storage behaviour later on.

In addition, this fixes two bugs with 1- and 2-byte enum values (which
use unsigned integer storage) where the most significant bit was set
that affected code running in the VM:
- loading the values from memory resulted in non-valid values
- (only affected the VM and JS backend) storing them inside aggregate
  `const`s resulted in them having invalid values at run-time

## Details

The key changes are:
- `tyEnum` now stores the storage type in the second type slot, meaning
  that `skipTypes(tyEnum)` yields the storage type
- size and alignment computation redirects to the storage type
- `isUnsigned` considers a `tyEnum`'s storage type
- the code generators use the `tyEnum`'s provided storage type and don't
  decide it themselves

Using the second type slot of `tyEnum` is done for both forward- and
backward-compatibility: it's possible that some of the current code
still depends on an enum type's first slot always being `nil`, and in
the future the slot could possibly be used to store the enum's base
type.

The storage type is directly set on enum definition (`semEnum`). Before
analyzing the enum fields, a preliminary storage type (`tyInt`) is
already added for the enum: this allows for the enum's values to be used
*before* the enum type is fully produced. Once the full value range is
known, the correct storage type is selected.

Choosing the storage type happens in the same way as it previously did
in the C code generator (`ccgtypes`). Apart from the information now
being available during semantic analysis already, this only affects the
VM target, where non-negative enum values requiring 32 or 64 bit of
storage were previously stored as unsigned integer (they're now stored
as *signed* integers, in line with the C target).

For the VM code generator, `tyEnum` is added to the `IrrelevantTypes`
set, meaning that the code generator now transparently treats all enum
types as their storage type, with an exception being made for the to-
string operation.
  • Loading branch information
zerbina authored Aug 13, 2023
1 parent 071c823 commit f5fd262
Show file tree
Hide file tree
Showing 12 changed files with 142 additions and 74 deletions.
7 changes: 6 additions & 1 deletion compiler/ast/ast.nim
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,12 @@ proc newIntTypeNode*(intVal: BiggestInt, typ: PType): PNode =
of tyUInt16: result = newNode(nkUInt16Lit)
of tyUInt32: result = newNode(nkUInt32Lit)
of tyUInt64: result = newNode(nkUInt64Lit)
of tyBool, tyEnum:
of tyEnum:
# XXX: the kind for the underlying type should be used here, but too much
# code still relies on literal enum values being represented as
# exactly ``nkIntLit``
result = newNode(nkIntLit)
of tyBool:
# XXX: does this really need to be the kind nkIntLit?
result = newNode(nkIntLit)
of tyStatic: # that's a pre-existing bug, will fix in another PR
Expand Down
3 changes: 3 additions & 0 deletions compiler/ast/ast_types.nim
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,9 @@ type
tyGenericParam, ## ``a`` in the above patterns
tyDistinct,
tyEnum,
## enum type. `base` stores the base type (currently always nil) and
## `lastSon` the storage type (also referred to as "underlying type"),
## which is the type used for the in-memory representation
tyOrdinal, ## integer types (including enums and boolean)
tyArray,
tyObject,
Expand Down
2 changes: 1 addition & 1 deletion compiler/ast/types.nim
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ proc isPureObject*(typ: PType): bool =
result = t.sym != nil and sfPure in t.sym.flags

func isUnsigned*(t: PType): bool {.inline.} =
t.skipTypes(abstractInst).kind in {tyChar, tyUInt..tyUInt64}
t.skipTypes(abstractInst + {tyEnum}).kind in {tyChar, tyUInt..tyUInt64}

proc getOrdValue*(n: PNode; onError = high(Int128)): Int128 =
let k =
Expand Down
26 changes: 3 additions & 23 deletions compiler/backend/ccgtypes.nim
Original file line number Diff line number Diff line change
Expand Up @@ -129,18 +129,8 @@ proc mapType(conf: ConfigRef; typ: PType; kind: TSymKind): TCTypeKind =
doAssert typ.isResolvedUserTypeClass
return mapType(conf, typ.lastSon, kind)
of tyGenericBody, tyGenericInst, tyGenericParam, tyDistinct, tyOrdinal,
tyTypeDesc, tyAlias, tySink, tyInferred:
tyTypeDesc, tyAlias, tySink, tyInferred, tyEnum:
result = mapType(conf, lastSon(typ), kind)
of tyEnum:
if firstOrd(conf, typ) < 0:
result = ctInt32
else:
case int(getSize(conf, typ))
of 1: result = ctUInt8
of 2: result = ctUInt16
of 4: result = ctInt32
of 8: result = ctInt64
else: result = ctInt32
of tyRange: result = mapType(conf, typ[0], kind)
of tyPtr, tyVar, tyLent, tyRef:
var base = skipTypes(typ.lastSon, typedescInst)
Expand Down Expand Up @@ -661,18 +651,8 @@ proc getTypeDescAux(m: BModule, origTyp: PType, check: var IntSet; kind: TSymKin
result = getTypeName(m, origTyp, sig)
if not (sfImportc in t.sym.flags and t.sym.magic == mNone):
m.typeCache[sig] = result
var size: int
if firstOrd(m.config, t) < 0:
m.s[cfsTypes].addf("typedef NI32 $1;$n", [result])
size = 4
else:
size = int(getSize(m.config, t))
case size
of 1: m.s[cfsTypes].addf("typedef NU8 $1;$n", [result])
of 2: m.s[cfsTypes].addf("typedef NU16 $1;$n", [result])
of 4: m.s[cfsTypes].addf("typedef NI32 $1;$n", [result])
of 8: m.s[cfsTypes].addf("typedef NI64 $1;$n", [result])
else: internalError(m.config, t.sym.info, "getTypeDescAux: enum")
m.s[cfsTypes].addf("typedef $1 $2;$n",
[getTypeDescAux(m, t.lastSon, check, skVar), result])
when false:
let owner = hashOwner(t.sym)
if not gDebugInfo.hasEnum(t.sym.name.s, t.sym.info.line, owner):
Expand Down
8 changes: 4 additions & 4 deletions compiler/backend/cgirgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,8 @@ proc translateLit*(val: PNode): CgNode =
case val.kind
of nkIntLiterals:
# use the type for deciding what whether it's a signed or unsigned value
case val.typ.skipTypes(abstractRange).kind
of tyInt..tyInt64, tyEnum, tyBool:
case val.typ.skipTypes(abstractRange + {tyEnum}).kind
of tyInt..tyInt64, tyBool:
node(cnkIntLit, intVal, val.intVal)
of tyUInt..tyUInt64, tyChar:
node(cnkUIntLit, intVal, val.intVal)
Expand Down Expand Up @@ -713,11 +713,11 @@ proc tbRegion(tree: TreeWithSource, cl: var TranslateCl, prev: sink Values,
cr: var TreeCursor): CgNode

proc newIntLit(val: Int128, t: PType): CgNode =
case t.skipTypes(abstractVarRange).kind
case t.skipTypes(abstractVarRange + {tyEnum}).kind
of tyUInt..tyUInt64, tyChar:
CgNode(kind: cnkUIntLit, info: unknownLineInfo, typ: t,
intVal: cast[BiggestInt](toUInt(val)))
of tyInt..tyInt64, tyBool, tyEnum:
of tyInt..tyInt64, tyBool:
CgNode(kind: cnkIntLit, info: unknownLineInfo, typ: t,
intVal: toInt(val))
else:
Expand Down
4 changes: 2 additions & 2 deletions compiler/backend/jsgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ proc mapType(typ: PType; indirect = false): TJSTypeKind =
of tyRange, tyDistinct, tyOrdinal, tyProxy, tyLent:
# tyLent is no-op as JS has pass-by-reference semantics
result = mapType(t[0], indirect)
of tyInt..tyInt64, tyUInt..tyUInt64, tyEnum, tyChar: result = etyInt
of tyInt..tyInt64, tyUInt..tyUInt64, tyChar: result = etyInt
of tyBool: result = etyBool
of tyFloat..tyFloat128: result = etyFloat
of tySet: result = etyObject # map a set to a table
Expand All @@ -224,7 +224,7 @@ proc mapType(typ: PType; indirect = false): TJSTypeKind =
tyAnd, tyOr, tyNot, tyAnything, tyVoid:
result = etyNone
of tyGenericInst, tyInferred, tyAlias, tyUserTypeClass, tyUserTypeClassInst,
tySink:
tySink, tyEnum:
result = mapType(typ.lastSon, indirect)
of tyStatic:
if t.n != nil: result = mapType(t.lastSon, indirect)
Expand Down
31 changes: 31 additions & 0 deletions compiler/sem/semtypes.nim
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ proc semEnum(c: PContext, n: PNode, prev: PType): PType =
localReport(c.config, n[0], reportAst(rsemIllformedAst, n[0]))

rawAddSon(result, nil) # base type; always nil
# add a preliminary storage type such that the enum can already be
# used in its own definition
rawAddSon(result, getSysType(c.graph, n.info, tyInt))

let isPure = result.sym != nil and sfPure in result.sym.flags
var symbols: TStrTable
if isPure: initStrTable(symbols)
Expand Down Expand Up @@ -143,6 +147,33 @@ proc semEnum(c: PContext, n: PNode, prev: PType): PType =
wrongRedefinition(c, e.info, e, conflict)

inc(counter)

# now that we know the full value range, select the correct storage type
# of the enum:
let tk =
if firstOrd(c.config, result) < Zero:
tyInt32 # use signed int32
elif result.size != szUncomputedSize:
# use the manually specified size for selecting the storage type
let s = result.size
if s <= 1: tyUInt8
elif s <= 2: tyUInt16
elif s <= 4: tyInt32
elif s <= 8: tyInt64
else: unreachable()
else:
let lastOrd = lastOrd(c.config, result)
if lastOrd < (1 shl 8):
tyUInt8
elif lastOrd < (1 shl 16):
tyUInt16
elif lastOrd < (BiggestInt(1) shl 32):
tyInt32
else:
tyInt64

result[1] = getSysType(c.graph, n.info, tk)

if isPure and sfExported in result.sym.flags:
addPureEnum(c, LazySym(sym: result.sym))

Expand Down
22 changes: 5 additions & 17 deletions compiler/sem/sizealignoffsetimpl.nim
Original file line number Diff line number Diff line change
Expand Up @@ -277,23 +277,11 @@ proc computeSizeAlign(conf: ConfigRef; typ: PType) =
typ.align = base.align

of tyEnum:
if firstOrd(conf, typ) < Zero:
typ.size = 4 # use signed int32
typ.align = 4
else:
let lastOrd = toInt64(lastOrd(conf, typ)) # BUGFIX: use lastOrd!
if lastOrd < `shl`(1, 8):
typ.size = 1
typ.align = 1
elif lastOrd < `shl`(1, 16):
typ.size = 2
typ.align = 2
elif lastOrd < `shl`(BiggestInt(1), 32):
typ.size = 4
typ.align = 4
else:
typ.size = 8
typ.align = int16(conf.floatInt64Align)
let base = typ.lastSon
computeSizeAlign(conf, base)
# use the size and alignment of the underlying type:
typ.size = base.size
typ.align = base.align
of tySet:
if typ[0].kind == tyGenericParam:
typ.size = szUncomputedSize
Expand Down
11 changes: 8 additions & 3 deletions compiler/vm/vmcompilerserdes.nim
Original file line number Diff line number Diff line change
Expand Up @@ -272,14 +272,19 @@ proc deserialize(c: TCtx, m: VmMemoryRegion, vt: PVmType, formal, t: PType, info
of akInt: readUInt(m)
of akDiscriminator: readDiscriminant(m, vt.numBits)
else: unreachable()
setResult(nkUIntLit, intVal, i)
of tyBool, tyEnum, tyInt..tyInt64:
result = newIntTypeNode(i, formal)
result.info = info
of tyBool, tyInt..tyInt64:
let i =
case vt.kind
of akInt: signExtended(readIntBits(m), BiggestInt(s))
of akDiscriminator: readDiscriminant(m, vt.numBits)
else: unreachable()
setResult(nkIntLit, intVal, i)
result = newIntTypeNode(i, formal)
result.info = info
of tyEnum:
# the value is stored as the enum's underlying type
result = deserialize(c, m, vt, formal, t.lastSon, info)
of tyFloat32:
assert vt.kind == akFloat
setResult(nkFloat32Lit, floatVal, readFloat32(m))
Expand Down
22 changes: 12 additions & 10 deletions compiler/vm/vmgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ type
val: TRegister ## the register holding the loaded value

const
IrrelevantTypes = abstractInst + {tyStatic} - {tyTypeDesc}
IrrelevantTypes = abstractInst + {tyStatic, tyEnum} - {tyTypeDesc}
## the set of types that are not relevant to the VM. ``tyTypeDesc``, while
## not relevant right now, is likely going to be in the future.

Expand Down Expand Up @@ -333,8 +333,8 @@ proc patch(c: var TCtx, p: TPosition) =
TInstrType(diff+wordExcess) shl regBxShift).TInstr

proc getSlotKind(t: PType): TSlotKind =
case t.skipTypes(abstractRange-{tyTypeDesc}).kind
of tyBool, tyChar, tyEnum, tyOrdinal, tyInt..tyInt64, tyUInt..tyUInt64:
case t.skipTypes(IrrelevantTypes+{tyRange}).kind
of tyBool, tyChar, tyInt..tyInt64, tyUInt..tyUInt64:
slotTempInt
of tyString, tyCstring:
slotTempStr
Expand Down Expand Up @@ -1207,7 +1207,7 @@ proc genNumberConv(c: var TCtx, info: CgNode, dest, src: TRegister,
## numeric types.
const
Floats = {tyFloat..tyFloat64}
Signed = {tyInt..tyInt64, tyEnum}
Signed = {tyInt..tyInt64}
Unsigned = {tyUInt..tyUInt64, tyChar, tyBool}

template payload(op: NumericConvKind): uint16 =
Expand Down Expand Up @@ -1291,10 +1291,12 @@ proc genConv(c: var TCtx; n, arg: CgNode; dest: var TDest) =

proc genToStr(c: var TCtx, n, arg: CgNode, dest: var TDest) =
# TODO: don't use ``opcConv`` for to-string conversions
# don't skip enum types
const Skip = IrrelevantTypes - {tyEnum}
prepare(c, dest, n, n.typ)
let tmp = c.genx(arg)
c.gABx(n, opcConv, dest, c.genTypeInfo(n.typ.skipTypes(IrrelevantTypes)))
c.gABx(n, opcConv, tmp, c.genTypeInfo(arg.typ.skipTypes(IrrelevantTypes)))
c.gABx(n, opcConv, dest, c.genTypeInfo(n.typ.skipTypes(Skip)))
c.gABx(n, opcConv, tmp, c.genTypeInfo(arg.typ.skipTypes(Skip)))
c.freeTemp(tmp)

proc genObjConv(c: var TCtx, n: CgNode, dest: var TDest) =
Expand Down Expand Up @@ -1513,17 +1515,17 @@ proc genSetElem(c: var TCtx, n: CgNode, typ: PType): TRegister {.inline.} =
genSetElem(c, n, first)

func fitsRegister(t: PType): bool =
let st = t.skipTypes(abstractInst + {tyStatic} - {tyTypeDesc})
st.kind in { tyRange, tyEnum, tyBool, tyInt..tyUInt64, tyChar, tyPtr, tyPointer} or
let st = t.skipTypes(IrrelevantTypes + {tyRange})
st.kind in { tyBool, tyInt..tyUInt64, tyChar, tyPtr, tyPointer} or
(st.sym != nil and st.sym.magic == mPNimrodNode) # NimNode goes into register too

proc ldNullOpcode(t: PType): TOpcode =
assert t != nil
if fitsRegister(t): opcLdNullReg else: opcLdNull

proc whichAsgnOpc(n: CgNode; requiresCopy = true): TOpcode =
case n.typ.skipTypes(abstractRange-{tyTypeDesc}).kind
of tyBool, tyChar, tyEnum, tyOrdinal, tyInt..tyInt64, tyUInt..tyUInt64:
case n.typ.skipTypes(IrrelevantTypes + {tyRange}).kind
of tyBool, tyChar, tyInt..tyInt64, tyUInt..tyUInt64:
opcAsgnInt
of tyFloat..tyFloat128:
opcAsgnFloat
Expand Down
15 changes: 2 additions & 13 deletions compiler/vm/vmtypegen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -130,19 +130,8 @@ func getAtomicType(cache: TypeInfoCache, conf: ConfigRef, t: PType): Option[PVmT
if t.sym != nil and t.sym.magic == mPNimrodNode: cache.nodeType
else: nil
of tyEnum:
# This mirrors the logic for `tyEnum` in `types.getSize`
{.cast(noSideEffect).}: # cast away the reporting related side effects
# of `firstOrd`/`lastOrd`
if firstOrd(conf, t) < Zero:
cache.intTypes[tyInt32]
else:
let lastOrd = toInt64(lastOrd(conf, t)) # BUGFIX: use lastOrd!
let tk =
if lastOrd < (1 shl 8): tyUInt8
elif lastOrd < (1 shl 16): tyUInt16
elif lastOrd < (BiggestInt(1) shl 32): tyUInt32
else: tyUInt64
cache.uintTypes[tk]
# use the underlying type:
getAtomicType(cache, conf, t.lastSon).get()
else:
nil

Expand Down
65 changes: 65 additions & 0 deletions tests/lang_defs/enum/tstored_as_unsigned.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
discard """
targets: "c js vm"
description: "
Ensure that enum values stored as usigned integers are properly read as
such
"
"""

type
EnumA = enum
enuA_a = 0
enuA_b = high(uint8)

EnumB = enum
enuB_a = 0
enuB_b = high(uint16)

doAssert sizeof(EnumA) == sizeof(uint8)
doAssert sizeof(EnumB) == sizeof(uint16)

proc test() =
# globals could interfere with the test, so use a procedure in order to
# ensure that locals are used. In addition, assign the enum values to
# variables so that the ``ord`` calls are not folded away
var val1 = enuA_b
doAssert ord(val1) == int high(uint8)

var val2 = enuB_b
doAssert ord(val2) == int high(uint16)

# the VM has to narrow the value after loading it from a memory
# location, so also test that case by accessing an array

var arr1 = [enuA_b]
doAssert ord(arr1[0]) == int high(uint8)

var arr2 = [enuB_b]
doAssert ord(arr2[0]) == int high(uint16)

test()

block compile_time_run_time_boundary:
# make sure that enum values stored as unsigned integers properly cross the
# compile-time/run-time boundary
proc get[T](): T = high(T)

block uint8_value:
const
folded = (enuA_b,) # the VM is not used
computed = (get[EnumA](),) # ``vmcompilerserdes`` is used

var val = folded
doAssert val[0] == enuA_b
val = computed
doAssert val[0] == enuA_b

block uint16_value:
const
folded = (enuB_b,) # the VM is not used
computed = (get[EnumB](),) # ``vmcompilerserdes`` is used

var val = folded
doAssert val[0] == enuB_b
val = computed
doAssert val[0] == enuB_b

0 comments on commit f5fd262

Please sign in to comment.