From 98359f2735a054cfaeff9d30a57632096e8006c6 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 13 Feb 2024 10:05:22 +0100
Subject: [PATCH] [DAGCombine] Fix multi-use miscompile in load combine

The load combine replaces a number of original loads with one new
loads and also replaces the output chains of the original loads
with the output chain of the new load. This is incorrect if the
original load is retained (due to multi-use), as it may get
incorrectly reordered.

Fix this by using makeEquivalentMemoryOrdering() instead, which
will create a TokenFactor with both chains.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +-
 llvm/test/CodeGen/X86/load-combine.ll         | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d3cd9b1671e1b9..52011e593f2e0a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9252,7 +9252,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
 
   // Transfer chain users from old loads to the new load.
   for (LoadSDNode *L : Loads)
-    DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
+    DAG.makeEquivalentMemoryOrdering(L, NewLoad);
 
   if (!NeedsBswap)
     return NewLoad;
diff --git a/llvm/test/CodeGen/X86/load-combine.ll b/llvm/test/CodeGen/X86/load-combine.ll
index 7e4e11fcc75c20..b5f3e789918813 100644
--- a/llvm/test/CodeGen/X86/load-combine.ll
+++ b/llvm/test/CodeGen/X86/load-combine.ll
@@ -1283,7 +1283,6 @@ define i32 @zext_load_i32_by_i8_bswap_shl_16(ptr %arg) {
   ret i32 %tmp8
 }
 
-; FIXME: This is a miscompile.
 define i32 @pr80911_vector_load_multiuse(ptr %ptr, ptr %clobber) nounwind {
 ; CHECK-LABEL: pr80911_vector_load_multiuse:
 ; CHECK:       # %bb.0:
@@ -1299,9 +1298,9 @@ define i32 @pr80911_vector_load_multiuse(ptr %ptr, ptr %clobber) nounwind {
 ;
 ; CHECK64-LABEL: pr80911_vector_load_multiuse:
 ; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    movl (%rdi), %ecx
 ; CHECK64-NEXT:    movzwl (%rdi), %eax
 ; CHECK64-NEXT:    movl $0, (%rsi)
-; CHECK64-NEXT:    movl (%rdi), %ecx
 ; CHECK64-NEXT:    movl %ecx, (%rdi)
 ; CHECK64-NEXT:    retq
   %load = load <4 x i8>, ptr %ptr, align 16