From 7db09b1002bb6da98661a19706d7bc13506ed11c Mon Sep 17 00:00:00 2001
From: zerbina <100542850+zerbina@users.noreply.github.com>
Date: Tue, 17 Oct 2023 21:13:10 +0200
Subject: [PATCH] internal: remove the double-meaning of `mMove` (#968)

## Summary

Don't use a special `move` magic form for the `seq`/`string` sink
implementation, but instead emit the required code directly. This moves
decision making out the code generators, fixes a MIR-level issue, and
is a preparation for MIR improvements.

## Details

For the `=sink` implementation of `seq`s and `string`s,
`liftdestructors` emitted a special `mMove` call that the C code
generator then expanded into the C equivalent of:

```nim
if not samePayload(a, b):
  =destroy(a)
a = b
```

The problem is with how the generated `move` call looked:
`move(a, b, =destroy(a))`. The `CgNode` IR for the call has to have the
same shape, but this required special-casing in `mirgen` and the
resulting MIR code also doesn't properly represent the actual
behaviour (the `=destroy(a)` call is not unconditionally evaluated),
which could interfere with MIR analysis.

In order to correctly represent the behaviour at the MIR level, the
`liftdestructors` lifting emits the above code directly, without
overloading the `mMove` magic. Since how the payload comparison works
is currently backend dependent, the new internal-only `mSamePayload`
magic is introduced, which the C generator then lowers into a
comparison of the payload pointers.
---
 compiler/ast/ast_types.nim       |  2 ++
 compiler/backend/ccgexprs.nim    | 17 ++++++++--------
 compiler/mir/mirgen.nim          | 35 ++------------------------------
 compiler/sem/liftdestructors.nim | 27 ++++++++++++------------
 4 files changed, 25 insertions(+), 56 deletions(-)

diff --git a/compiler/ast/ast_types.nim b/compiler/ast/ast_types.nim
index 2296bef5b97..e0f970b6bb6 100644
--- a/compiler/ast/ast_types.nim
+++ b/compiler/ast/ast_types.nim
@@ -820,6 +820,8 @@ type
     mChckRange
       ## chckRange(v, lower, upper); conversion + range check -- returns
       ## either the type-converted value or raises a defect
+    mSamePayload
+      ## returns whether both seq/string operands share the same payload
 
 # things that we can evaluate safely at compile time, even if not asked for it:
 const
diff --git a/compiler/backend/ccgexprs.nim b/compiler/backend/ccgexprs.nim
index 788c60179cc..37f85203140 100644
--- a/compiler/backend/ccgexprs.nim
+++ b/compiler/backend/ccgexprs.nim
@@ -1689,15 +1689,8 @@ proc genWasMoved(p: BProc; n: CgNode) =
 
 proc genMove(p: BProc; n: CgNode; d: var TLoc) =
   var a: TLoc
-  initLocExpr(p, n[1].skipAddr, a)
-  if n.len == 4:
-    # generated by liftdestructors:
-    var src: TLoc
-    initLocExpr(p, n[2], src)
-    linefmt(p, cpsStmts, "if ($1.p != $2.p) {", [rdLoc(a), rdLoc(src)])
-    genStmts(p, n[3])
-    linefmt(p, cpsStmts, "}$n$1.len = $2.len; $1.p = $2.p;$n", [rdLoc(a), rdLoc(src)])
-  else:
+  initLocExpr(p, n[1], a)
+  if true:
     if d.k == locNone: getTemp(p, n.typ, d)
     genAssignment(p, d, a)
     resetLoc(p, a)
@@ -1867,6 +1860,12 @@ proc genMagicExpr(p: BProc, e: CgNode, d: var TLoc, op: TMagic) =
     linefmt(p, cpsStmts, "$1 = ($2)($3);$n", [a.r, typ, rdLoc(b)])
   of mChckRange:
     genRangeChck(p, e, d)
+  of mSamePayload:
+    var a, b: TLoc
+    initLocExpr(p, e[1], a)
+    initLocExpr(p, e[2], b)
+    # compare the payloads:
+    putIntoDest(p, d, e, "($1.p == $2.p)" % [rdLoc(a), rdLoc(b)])
   else:
     when defined(debugMagics):
       echo p.prc.name.s, " ", p.prc.id, " ", p.prc.flags, " ", p.prc.ast[genericParamsPos].kind
diff --git a/compiler/mir/mirgen.nim b/compiler/mir/mirgen.nim
index 4392707c703..5fd52aab205 100644
--- a/compiler/mir/mirgen.nim
+++ b/compiler/mir/mirgen.nim
@@ -796,38 +796,6 @@ proc genMagic(c: var TCtx, n: PNode; m: TMagic): EValue =
     argBlock(c.stmts):
       chain(c): argExpr(c, n[1]) => tag(ekMutate) => name()
     magicCall(c, m, typeOrVoid(c, n.typ))
-  of mMove:
-    # there exist two different types of ``mMove`` magic calls:
-    # 1. normal move calls: ``move(x)``
-    # 2. special move calls inserted by ``liftdestructors``, used for ``seq``s
-    #    and ``string``s: ``move(dst, src, stmt)``
-    case n.len
-    of 2:
-      # the first version
-      genCall(c, n)
-    of 4:
-      # HACK: the ``stmt`` statement is not always evaluated, so treating it as
-      #       a ``void`` argument is wrong. We also can't lower the call here
-      #       already, since we don't have access to the ``seq``s
-      #       implementation details and the lowering is also only required for
-      #       the C target.
-      #       Treating the statement as a ``void`` argument only works because,
-      #       at the time of writing this comment, there are no MIR passes that
-      #       inspect code in which a special ``move`` occurs.
-      #       A more proper solution is to add a new magic (something like
-      #       ``mMoveSeq``) that then gets lowered into the expected
-      #       comparision + destructor call by a MIR pass that is only enabled
-      #       for the C target
-
-      argBlock(c.stmts):
-        chain(c): argExpr(c, n[1]) => tag(ekReassign) => name()
-        chain(c): argExpr(c, n[2]) => arg()
-
-        gen(c, n[3])
-        chain(c): genEmpty(c, n[3]) => arg()
-      magicCall(c, m, typeOrVoid(c, n.typ))
-    else:
-      unreachable()
   of mNewSeq:
     # XXX: the first parameter is actually an ``out`` parameter -- the
     #      ``ekReassign`` effect could be used
@@ -846,7 +814,8 @@ proc genMagic(c: var TCtx, n: PNode; m: TMagic): EValue =
       magicCall(c, m, typeOrVoid(c, n.typ))
     else:
       genCall(c, n)
-  of mLtI, mSubI, mLengthSeq, mLengthStr, mAccessEnv, mAccessTypeField:
+  of mNot, mLtI, mSubI, mLengthSeq, mLengthStr, mAccessEnv, mAccessTypeField,
+     mSamePayload:
     if n[0].typ == nil:
       # simple translation. None of the arguments need to be passed by lvalue
       argBlock(c.stmts):
diff --git a/compiler/sem/liftdestructors.nim b/compiler/sem/liftdestructors.nim
index b2f5d2a0ade..f8e2e54dcf7 100644
--- a/compiler/sem/liftdestructors.nim
+++ b/compiler/sem/liftdestructors.nim
@@ -515,11 +515,18 @@ proc fillSeqOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
     body.add setLenSeqCall(c, t, x, y)
     forallElements(c, t, body, x, y)
   of attachedSink:
-    let moveCall = genBuiltin(c, mMove, "move", x)
-    moveCall.add y
     doAssert t.destructor != nil
-    moveCall.add destructorCall(c, t.destructor, x)
-    body.add moveCall
+    # generate:
+    #   if not samePayload(x, y):
+    #     =destroy(x)
+    #   x = y
+    let eq = genBuiltin(c, mSamePayload, "samePayload", x)
+    eq.add y
+    eq.typ = getSysType(c.g, x.info, tyBool)
+    let nt = genBuiltin(c, mNot, "not", eq)
+    nt.typ = eq.typ
+    body.add genIf(c, nt, destructorCall(c, t.destructor, x))
+    body.add newAsgnStmt(x, y)
   of attachedDestructor:
     # destroy all elements:
     forallElements(c, t, body, x, y)
@@ -550,11 +557,7 @@ proc useSeqOrStrOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
     body.add newHookCall(c, t.assignment, x, y)
   of attachedSink:
     # we always inline the move for better performance:
-    let moveCall = genBuiltin(c, mMove, "move", x)
-    moveCall.add y
-    doAssert t.destructor != nil
-    moveCall.add destructorCall(c, t.destructor, x)
-    body.add moveCall
+    fillSeqOp(c, t, body, x, y)
     # alternatively we could do this:
     when false:
       doAssert t.asink != nil
@@ -574,11 +577,7 @@ proc fillStrOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
   of attachedAsgn, attachedDeepCopy:
     body.add callCodegenProc(c.g, "nimAsgnStrV2", c.info, genAddr(c, x), y)
   of attachedSink:
-    let moveCall = genBuiltin(c, mMove, "move", x)
-    moveCall.add y
-    doAssert t.destructor != nil
-    moveCall.add destructorCall(c, t.destructor, x)
-    body.add moveCall
+    fillSeqOp(c, t, body, x, y)
   of attachedDestructor:
     body.add genBuiltin(c, mDestroy, "destroy", x)
   of attachedTrace: