From f5fd262cd3be554b97ee6019051e8b58f08947e8 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Sun, 13 Aug 2023 15:40:59 +0200 Subject: [PATCH] sem: decide storage type for enums on definition (#839) ## Summary Centralize the decision about the underlying storage types for enum values in semantic analysis. Since the decision now happens in a single place instead of being scattered across the compiler, this makes it significantly easier to change the enum storage behaviour later on. In addition, this fixes two bugs with 1- and 2-byte enum values (which use unsigned integer storage) where the most significant bit was set that affected code running in the VM: - loading the values from memory resulted in non-valid values - (only affected the VM and JS backend) storing them inside aggregate `const`s resulted in them having invalid values at run-time ## Details The key changes are: - `tyEnum` now stores the storage type in the second type slot, meaning that `skipTypes(tyEnum)` yields the storage type - size and alignment computation redirects to the storage type - `isUnsigned` considers a `tyEnum`'s storage type - the code generators use the `tyEnum`'s provided storage type and don't decide it themselves Using the second type slot of `tyEnum` is done for both forward- and backward-compatibility: it's possible that some of the current code still depends on an enum type's first slot always being `nil`, and in the future the slot could possibly be used to store the enum's base type. The storage type is directly set on enum definition (`semEnum`). Before analyzing the enum fields, a preliminary storage type (`tyInt`) is already added for the enum: this allows for the enum's values to be used *before* the enum type is fully produced. Once the full value range is known, the correct storage type is selected. Choosing the storage type happens in the same way as it previously did in the C code generator (`ccgtypes`). Apart from the information now being available during semantic analysis already, this only affects the VM target, where non-negative enum values requiring 32 or 64 bit of storage were previously stored as unsigned integer (they're now stored as *signed* integers, in line with the C target). For the VM code generator, `tyEnum` is added to the `IrrelevantTypes` set, meaning that the code generator now transparently treats all enum types as their storage type, with an exception being made for the to- string operation. --- compiler/ast/ast.nim | 7 ++- compiler/ast/ast_types.nim | 3 + compiler/ast/types.nim | 2 +- compiler/backend/ccgtypes.nim | 26 +------- compiler/backend/cgirgen.nim | 8 +-- compiler/backend/jsgen.nim | 4 +- compiler/sem/semtypes.nim | 31 ++++++++++ compiler/sem/sizealignoffsetimpl.nim | 22 ++----- compiler/vm/vmcompilerserdes.nim | 11 +++- compiler/vm/vmgen.nim | 22 ++++--- compiler/vm/vmtypegen.nim | 15 +---- tests/lang_defs/enum/tstored_as_unsigned.nim | 65 ++++++++++++++++++++ 12 files changed, 142 insertions(+), 74 deletions(-) create mode 100644 tests/lang_defs/enum/tstored_as_unsigned.nim diff --git a/compiler/ast/ast.nim b/compiler/ast/ast.nim index 54bbe3187b2..d2f9c26d74a 100644 --- a/compiler/ast/ast.nim +++ b/compiler/ast/ast.nim @@ -255,7 +255,12 @@ proc newIntTypeNode*(intVal: BiggestInt, typ: PType): PNode = of tyUInt16: result = newNode(nkUInt16Lit) of tyUInt32: result = newNode(nkUInt32Lit) of tyUInt64: result = newNode(nkUInt64Lit) - of tyBool, tyEnum: + of tyEnum: + # XXX: the kind for the underlying type should be used here, but too much + # code still relies on literal enum values being represented as + # exactly ``nkIntLit`` + result = newNode(nkIntLit) + of tyBool: # XXX: does this really need to be the kind nkIntLit? result = newNode(nkIntLit) of tyStatic: # that's a pre-existing bug, will fix in another PR diff --git a/compiler/ast/ast_types.nim b/compiler/ast/ast_types.nim index eec5f035de1..7f91375e1df 100644 --- a/compiler/ast/ast_types.nim +++ b/compiler/ast/ast_types.nim @@ -475,6 +475,9 @@ type tyGenericParam, ## ``a`` in the above patterns tyDistinct, tyEnum, + ## enum type. `base` stores the base type (currently always nil) and + ## `lastSon` the storage type (also referred to as "underlying type"), + ## which is the type used for the in-memory representation tyOrdinal, ## integer types (including enums and boolean) tyArray, tyObject, diff --git a/compiler/ast/types.nim b/compiler/ast/types.nim index 6b61c18681e..e79cd653662 100644 --- a/compiler/ast/types.nim +++ b/compiler/ast/types.nim @@ -100,7 +100,7 @@ proc isPureObject*(typ: PType): bool = result = t.sym != nil and sfPure in t.sym.flags func isUnsigned*(t: PType): bool {.inline.} = - t.skipTypes(abstractInst).kind in {tyChar, tyUInt..tyUInt64} + t.skipTypes(abstractInst + {tyEnum}).kind in {tyChar, tyUInt..tyUInt64} proc getOrdValue*(n: PNode; onError = high(Int128)): Int128 = let k = diff --git a/compiler/backend/ccgtypes.nim b/compiler/backend/ccgtypes.nim index 49d123e4c82..fe31f776f46 100644 --- a/compiler/backend/ccgtypes.nim +++ b/compiler/backend/ccgtypes.nim @@ -129,18 +129,8 @@ proc mapType(conf: ConfigRef; typ: PType; kind: TSymKind): TCTypeKind = doAssert typ.isResolvedUserTypeClass return mapType(conf, typ.lastSon, kind) of tyGenericBody, tyGenericInst, tyGenericParam, tyDistinct, tyOrdinal, - tyTypeDesc, tyAlias, tySink, tyInferred: + tyTypeDesc, tyAlias, tySink, tyInferred, tyEnum: result = mapType(conf, lastSon(typ), kind) - of tyEnum: - if firstOrd(conf, typ) < 0: - result = ctInt32 - else: - case int(getSize(conf, typ)) - of 1: result = ctUInt8 - of 2: result = ctUInt16 - of 4: result = ctInt32 - of 8: result = ctInt64 - else: result = ctInt32 of tyRange: result = mapType(conf, typ[0], kind) of tyPtr, tyVar, tyLent, tyRef: var base = skipTypes(typ.lastSon, typedescInst) @@ -661,18 +651,8 @@ proc getTypeDescAux(m: BModule, origTyp: PType, check: var IntSet; kind: TSymKin result = getTypeName(m, origTyp, sig) if not (sfImportc in t.sym.flags and t.sym.magic == mNone): m.typeCache[sig] = result - var size: int - if firstOrd(m.config, t) < 0: - m.s[cfsTypes].addf("typedef NI32 $1;$n", [result]) - size = 4 - else: - size = int(getSize(m.config, t)) - case size - of 1: m.s[cfsTypes].addf("typedef NU8 $1;$n", [result]) - of 2: m.s[cfsTypes].addf("typedef NU16 $1;$n", [result]) - of 4: m.s[cfsTypes].addf("typedef NI32 $1;$n", [result]) - of 8: m.s[cfsTypes].addf("typedef NI64 $1;$n", [result]) - else: internalError(m.config, t.sym.info, "getTypeDescAux: enum") + m.s[cfsTypes].addf("typedef $1 $2;$n", + [getTypeDescAux(m, t.lastSon, check, skVar), result]) when false: let owner = hashOwner(t.sym) if not gDebugInfo.hasEnum(t.sym.name.s, t.sym.info.line, owner): diff --git a/compiler/backend/cgirgen.nim b/compiler/backend/cgirgen.nim index 139c54b3607..902e8c0981a 100644 --- a/compiler/backend/cgirgen.nim +++ b/compiler/backend/cgirgen.nim @@ -262,8 +262,8 @@ proc translateLit*(val: PNode): CgNode = case val.kind of nkIntLiterals: # use the type for deciding what whether it's a signed or unsigned value - case val.typ.skipTypes(abstractRange).kind - of tyInt..tyInt64, tyEnum, tyBool: + case val.typ.skipTypes(abstractRange + {tyEnum}).kind + of tyInt..tyInt64, tyBool: node(cnkIntLit, intVal, val.intVal) of tyUInt..tyUInt64, tyChar: node(cnkUIntLit, intVal, val.intVal) @@ -713,11 +713,11 @@ proc tbRegion(tree: TreeWithSource, cl: var TranslateCl, prev: sink Values, cr: var TreeCursor): CgNode proc newIntLit(val: Int128, t: PType): CgNode = - case t.skipTypes(abstractVarRange).kind + case t.skipTypes(abstractVarRange + {tyEnum}).kind of tyUInt..tyUInt64, tyChar: CgNode(kind: cnkUIntLit, info: unknownLineInfo, typ: t, intVal: cast[BiggestInt](toUInt(val))) - of tyInt..tyInt64, tyBool, tyEnum: + of tyInt..tyInt64, tyBool: CgNode(kind: cnkIntLit, info: unknownLineInfo, typ: t, intVal: toInt(val)) else: diff --git a/compiler/backend/jsgen.nim b/compiler/backend/jsgen.nim index 43b871a119d..bcd609dcd70 100644 --- a/compiler/backend/jsgen.nim +++ b/compiler/backend/jsgen.nim @@ -210,7 +210,7 @@ proc mapType(typ: PType; indirect = false): TJSTypeKind = of tyRange, tyDistinct, tyOrdinal, tyProxy, tyLent: # tyLent is no-op as JS has pass-by-reference semantics result = mapType(t[0], indirect) - of tyInt..tyInt64, tyUInt..tyUInt64, tyEnum, tyChar: result = etyInt + of tyInt..tyInt64, tyUInt..tyUInt64, tyChar: result = etyInt of tyBool: result = etyBool of tyFloat..tyFloat128: result = etyFloat of tySet: result = etyObject # map a set to a table @@ -224,7 +224,7 @@ proc mapType(typ: PType; indirect = false): TJSTypeKind = tyAnd, tyOr, tyNot, tyAnything, tyVoid: result = etyNone of tyGenericInst, tyInferred, tyAlias, tyUserTypeClass, tyUserTypeClassInst, - tySink: + tySink, tyEnum: result = mapType(typ.lastSon, indirect) of tyStatic: if t.n != nil: result = mapType(t.lastSon, indirect) diff --git a/compiler/sem/semtypes.nim b/compiler/sem/semtypes.nim index f33674fc229..35fe2761099 100644 --- a/compiler/sem/semtypes.nim +++ b/compiler/sem/semtypes.nim @@ -41,6 +41,10 @@ proc semEnum(c: PContext, n: PNode, prev: PType): PType = localReport(c.config, n[0], reportAst(rsemIllformedAst, n[0])) rawAddSon(result, nil) # base type; always nil + # add a preliminary storage type such that the enum can already be + # used in its own definition + rawAddSon(result, getSysType(c.graph, n.info, tyInt)) + let isPure = result.sym != nil and sfPure in result.sym.flags var symbols: TStrTable if isPure: initStrTable(symbols) @@ -143,6 +147,33 @@ proc semEnum(c: PContext, n: PNode, prev: PType): PType = wrongRedefinition(c, e.info, e, conflict) inc(counter) + + # now that we know the full value range, select the correct storage type + # of the enum: + let tk = + if firstOrd(c.config, result) < Zero: + tyInt32 # use signed int32 + elif result.size != szUncomputedSize: + # use the manually specified size for selecting the storage type + let s = result.size + if s <= 1: tyUInt8 + elif s <= 2: tyUInt16 + elif s <= 4: tyInt32 + elif s <= 8: tyInt64 + else: unreachable() + else: + let lastOrd = lastOrd(c.config, result) + if lastOrd < (1 shl 8): + tyUInt8 + elif lastOrd < (1 shl 16): + tyUInt16 + elif lastOrd < (BiggestInt(1) shl 32): + tyInt32 + else: + tyInt64 + + result[1] = getSysType(c.graph, n.info, tk) + if isPure and sfExported in result.sym.flags: addPureEnum(c, LazySym(sym: result.sym)) diff --git a/compiler/sem/sizealignoffsetimpl.nim b/compiler/sem/sizealignoffsetimpl.nim index 7ca42bbe531..2795be25539 100644 --- a/compiler/sem/sizealignoffsetimpl.nim +++ b/compiler/sem/sizealignoffsetimpl.nim @@ -277,23 +277,11 @@ proc computeSizeAlign(conf: ConfigRef; typ: PType) = typ.align = base.align of tyEnum: - if firstOrd(conf, typ) < Zero: - typ.size = 4 # use signed int32 - typ.align = 4 - else: - let lastOrd = toInt64(lastOrd(conf, typ)) # BUGFIX: use lastOrd! - if lastOrd < `shl`(1, 8): - typ.size = 1 - typ.align = 1 - elif lastOrd < `shl`(1, 16): - typ.size = 2 - typ.align = 2 - elif lastOrd < `shl`(BiggestInt(1), 32): - typ.size = 4 - typ.align = 4 - else: - typ.size = 8 - typ.align = int16(conf.floatInt64Align) + let base = typ.lastSon + computeSizeAlign(conf, base) + # use the size and alignment of the underlying type: + typ.size = base.size + typ.align = base.align of tySet: if typ[0].kind == tyGenericParam: typ.size = szUncomputedSize diff --git a/compiler/vm/vmcompilerserdes.nim b/compiler/vm/vmcompilerserdes.nim index 3c6f0015911..531c1f5fa92 100644 --- a/compiler/vm/vmcompilerserdes.nim +++ b/compiler/vm/vmcompilerserdes.nim @@ -272,14 +272,19 @@ proc deserialize(c: TCtx, m: VmMemoryRegion, vt: PVmType, formal, t: PType, info of akInt: readUInt(m) of akDiscriminator: readDiscriminant(m, vt.numBits) else: unreachable() - setResult(nkUIntLit, intVal, i) - of tyBool, tyEnum, tyInt..tyInt64: + result = newIntTypeNode(i, formal) + result.info = info + of tyBool, tyInt..tyInt64: let i = case vt.kind of akInt: signExtended(readIntBits(m), BiggestInt(s)) of akDiscriminator: readDiscriminant(m, vt.numBits) else: unreachable() - setResult(nkIntLit, intVal, i) + result = newIntTypeNode(i, formal) + result.info = info + of tyEnum: + # the value is stored as the enum's underlying type + result = deserialize(c, m, vt, formal, t.lastSon, info) of tyFloat32: assert vt.kind == akFloat setResult(nkFloat32Lit, floatVal, readFloat32(m)) diff --git a/compiler/vm/vmgen.nim b/compiler/vm/vmgen.nim index d7bda647821..79d54c715e5 100644 --- a/compiler/vm/vmgen.nim +++ b/compiler/vm/vmgen.nim @@ -98,7 +98,7 @@ type val: TRegister ## the register holding the loaded value const - IrrelevantTypes = abstractInst + {tyStatic} - {tyTypeDesc} + IrrelevantTypes = abstractInst + {tyStatic, tyEnum} - {tyTypeDesc} ## the set of types that are not relevant to the VM. ``tyTypeDesc``, while ## not relevant right now, is likely going to be in the future. @@ -333,8 +333,8 @@ proc patch(c: var TCtx, p: TPosition) = TInstrType(diff+wordExcess) shl regBxShift).TInstr proc getSlotKind(t: PType): TSlotKind = - case t.skipTypes(abstractRange-{tyTypeDesc}).kind - of tyBool, tyChar, tyEnum, tyOrdinal, tyInt..tyInt64, tyUInt..tyUInt64: + case t.skipTypes(IrrelevantTypes+{tyRange}).kind + of tyBool, tyChar, tyInt..tyInt64, tyUInt..tyUInt64: slotTempInt of tyString, tyCstring: slotTempStr @@ -1207,7 +1207,7 @@ proc genNumberConv(c: var TCtx, info: CgNode, dest, src: TRegister, ## numeric types. const Floats = {tyFloat..tyFloat64} - Signed = {tyInt..tyInt64, tyEnum} + Signed = {tyInt..tyInt64} Unsigned = {tyUInt..tyUInt64, tyChar, tyBool} template payload(op: NumericConvKind): uint16 = @@ -1291,10 +1291,12 @@ proc genConv(c: var TCtx; n, arg: CgNode; dest: var TDest) = proc genToStr(c: var TCtx, n, arg: CgNode, dest: var TDest) = # TODO: don't use ``opcConv`` for to-string conversions + # don't skip enum types + const Skip = IrrelevantTypes - {tyEnum} prepare(c, dest, n, n.typ) let tmp = c.genx(arg) - c.gABx(n, opcConv, dest, c.genTypeInfo(n.typ.skipTypes(IrrelevantTypes))) - c.gABx(n, opcConv, tmp, c.genTypeInfo(arg.typ.skipTypes(IrrelevantTypes))) + c.gABx(n, opcConv, dest, c.genTypeInfo(n.typ.skipTypes(Skip))) + c.gABx(n, opcConv, tmp, c.genTypeInfo(arg.typ.skipTypes(Skip))) c.freeTemp(tmp) proc genObjConv(c: var TCtx, n: CgNode, dest: var TDest) = @@ -1513,8 +1515,8 @@ proc genSetElem(c: var TCtx, n: CgNode, typ: PType): TRegister {.inline.} = genSetElem(c, n, first) func fitsRegister(t: PType): bool = - let st = t.skipTypes(abstractInst + {tyStatic} - {tyTypeDesc}) - st.kind in { tyRange, tyEnum, tyBool, tyInt..tyUInt64, tyChar, tyPtr, tyPointer} or + let st = t.skipTypes(IrrelevantTypes + {tyRange}) + st.kind in { tyBool, tyInt..tyUInt64, tyChar, tyPtr, tyPointer} or (st.sym != nil and st.sym.magic == mPNimrodNode) # NimNode goes into register too proc ldNullOpcode(t: PType): TOpcode = @@ -1522,8 +1524,8 @@ proc ldNullOpcode(t: PType): TOpcode = if fitsRegister(t): opcLdNullReg else: opcLdNull proc whichAsgnOpc(n: CgNode; requiresCopy = true): TOpcode = - case n.typ.skipTypes(abstractRange-{tyTypeDesc}).kind - of tyBool, tyChar, tyEnum, tyOrdinal, tyInt..tyInt64, tyUInt..tyUInt64: + case n.typ.skipTypes(IrrelevantTypes + {tyRange}).kind + of tyBool, tyChar, tyInt..tyInt64, tyUInt..tyUInt64: opcAsgnInt of tyFloat..tyFloat128: opcAsgnFloat diff --git a/compiler/vm/vmtypegen.nim b/compiler/vm/vmtypegen.nim index db5d71bac85..9c9543519a1 100644 --- a/compiler/vm/vmtypegen.nim +++ b/compiler/vm/vmtypegen.nim @@ -130,19 +130,8 @@ func getAtomicType(cache: TypeInfoCache, conf: ConfigRef, t: PType): Option[PVmT if t.sym != nil and t.sym.magic == mPNimrodNode: cache.nodeType else: nil of tyEnum: - # This mirrors the logic for `tyEnum` in `types.getSize` - {.cast(noSideEffect).}: # cast away the reporting related side effects - # of `firstOrd`/`lastOrd` - if firstOrd(conf, t) < Zero: - cache.intTypes[tyInt32] - else: - let lastOrd = toInt64(lastOrd(conf, t)) # BUGFIX: use lastOrd! - let tk = - if lastOrd < (1 shl 8): tyUInt8 - elif lastOrd < (1 shl 16): tyUInt16 - elif lastOrd < (BiggestInt(1) shl 32): tyUInt32 - else: tyUInt64 - cache.uintTypes[tk] + # use the underlying type: + getAtomicType(cache, conf, t.lastSon).get() else: nil diff --git a/tests/lang_defs/enum/tstored_as_unsigned.nim b/tests/lang_defs/enum/tstored_as_unsigned.nim new file mode 100644 index 00000000000..ed10463e36d --- /dev/null +++ b/tests/lang_defs/enum/tstored_as_unsigned.nim @@ -0,0 +1,65 @@ +discard """ + targets: "c js vm" + description: " + Ensure that enum values stored as usigned integers are properly read as + such + " +""" + +type + EnumA = enum + enuA_a = 0 + enuA_b = high(uint8) + + EnumB = enum + enuB_a = 0 + enuB_b = high(uint16) + +doAssert sizeof(EnumA) == sizeof(uint8) +doAssert sizeof(EnumB) == sizeof(uint16) + +proc test() = + # globals could interfere with the test, so use a procedure in order to + # ensure that locals are used. In addition, assign the enum values to + # variables so that the ``ord`` calls are not folded away + var val1 = enuA_b + doAssert ord(val1) == int high(uint8) + + var val2 = enuB_b + doAssert ord(val2) == int high(uint16) + + # the VM has to narrow the value after loading it from a memory + # location, so also test that case by accessing an array + + var arr1 = [enuA_b] + doAssert ord(arr1[0]) == int high(uint8) + + var arr2 = [enuB_b] + doAssert ord(arr2[0]) == int high(uint16) + +test() + +block compile_time_run_time_boundary: + # make sure that enum values stored as unsigned integers properly cross the + # compile-time/run-time boundary + proc get[T](): T = high(T) + + block uint8_value: + const + folded = (enuA_b,) # the VM is not used + computed = (get[EnumA](),) # ``vmcompilerserdes`` is used + + var val = folded + doAssert val[0] == enuA_b + val = computed + doAssert val[0] == enuA_b + + block uint16_value: + const + folded = (enuB_b,) # the VM is not used + computed = (get[EnumB](),) # ``vmcompilerserdes`` is used + + var val = folded + doAssert val[0] == enuB_b + val = computed + doAssert val[0] == enuB_b \ No newline at end of file