Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Miscompile in DAGCombine #80911

Closed
cbeuw opened this issue Feb 6, 2024 · 9 comments · Fixed by #81586
Closed

Miscompile in DAGCombine #80911

cbeuw opened this issue Feb 6, 2024 · 9 comments · Fixed by #81586

Comments

@cbeuw
Copy link

cbeuw commented Feb 6, 2024

Reproduction:

; ModuleID = '9897542_debug.4c1401909d89b8aa-cgu.0'
source_filename = "9897542_debug.4c1401909d89b8aa-cgu.0"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%"core::ptr::metadata::PtrComponents<[core::mem::maybe_uninit::MaybeUninit<bool>]>" = type { ptr, i64 }
%"core::ptr::metadata::PtrRepr<[core::mem::maybe_uninit::MaybeUninit<bool>]>" = type { [2 x i64] }
%"core::ops::index_range::IndexRange" = type { i64, i64 }
%"core::array::iter::IntoIter<bool, 6>" = type { %"core::ops::index_range::IndexRange", [6 x i8], [2 x i8] }
%"core::option::Option<usize>" = type { i64, [1 x i64] }
%Adt54 = type { { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, i32, [3 x i32] }
%Adt59 = type { [6 x i16] }

@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h0fc6fafa5a71668aE", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17he4c1df2bff0cc47aE", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17hee5ebfeab04283b9E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17hee5ebfeab04283b9E" }>, align 8
@0 = private unnamed_addr constant <{ [8 x i8], [8 x i8] }> <{ [8 x i8] zeroinitializer, [8 x i8] undef }>, align 8
@alloc_5ac49220fc835d33b7e580146744cd0f = private unnamed_addr constant <{ [16 x i8] }> <{ [16 x i8] c"9897542-debug.rs" }>, align 1
@alloc_e5eed6cfbd8269f8f7433869267020a5 = private unnamed_addr constant <{ ptr, [16 x i8] }> <{ ptr @alloc_5ac49220fc835d33b7e580146744cd0f, [16 x i8] c"\10\00\00\00\00\00\00\00\18\00\00\00\05\00\00\00" }>, align 8
@alloc_40b8598efcc1c71f5a957cd65a4a1dda = private unnamed_addr constant <{ ptr, [16 x i8] }> <{ ptr @alloc_5ac49220fc835d33b7e580146744cd0f, [16 x i8] c"\10\00\00\00\00\00\00\00\19\00\00\00\05\00\00\00" }>, align 8
@alloc_c40e8a599f3a3d41612c45a675185c8e = private unnamed_addr constant <{ ptr, [16 x i8] }> <{ ptr @alloc_5ac49220fc835d33b7e580146744cd0f, [16 x i8] c"\10\00\00\00\00\00\00\00\1D\00\00\00\05\00\00\00" }>, align 8
@alloc_23cf86cb6a1d3396212d84acc0e8430c = private unnamed_addr constant <{ ptr, [16 x i8] }> <{ ptr @alloc_5ac49220fc835d33b7e580146744cd0f, [16 x i8] c"\10\00\00\00\00\00\00\00)\00\00\00\05\00\00\00" }>, align 8
@alloc_87551382a9de3243abbfdbda2f0b586b = private unnamed_addr constant <{ [4 x i8] }> <{ [4 x i8] c"%d\0A\00" }>, align 1

; std::sys_common::backtrace::__rust_begin_short_backtrace
; Function Attrs: noinline nonlazybind uwtable
define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h0e4292621a5a6bc8E(ptr %f) unnamed_addr #0 {
start:
; call core::ops::function::FnOnce::call_once
  call void @_ZN4core3ops8function6FnOnce9call_once17h6c19189960566713E(ptr %f)
  call void asm sideeffect "", "~{memory}"(), !srcloc !4
  ret void
}

; std::rt::lang_start::{{closure}}
; Function Attrs: inlinehint nonlazybind uwtable
define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17hee5ebfeab04283b9E"(ptr align 8 %_1) unnamed_addr #2 {
start:
  %self = alloca i8, align 1
  %_4 = load ptr, ptr %_1, align 8, !nonnull !5, !noundef !5
; call std::sys_common::backtrace::__rust_begin_short_backtrace
  call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h0e4292621a5a6bc8E(ptr %_4)
; call <() as std::process::Termination>::report
  %0 = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h6aa2dda252264378E"()
  store i8 %0, ptr %self, align 1
  %_6 = load i8, ptr %self, align 1, !noundef !5
  %_0 = zext i8 %_6 to i32
  ret i32 %_0
}

; core::ops::function::FnOnce::call_once{{vtable.shim}}
; Function Attrs: inlinehint nonlazybind uwtable
define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17he4c1df2bff0cc47aE"(ptr %_1) unnamed_addr #2 {
start:
  %_2 = alloca {}, align 1
  %0 = load ptr, ptr %_1, align 8, !nonnull !5, !noundef !5
; call core::ops::function::FnOnce::call_once
  %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17h049c2d45012bed40E(ptr %0)
  ret i32 %_0
}

; core::ops::function::FnOnce::call_once
; Function Attrs: inlinehint nonlazybind uwtable
define internal i32 @_ZN4core3ops8function6FnOnce9call_once17h049c2d45012bed40E(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality {
start:
  %1 = alloca { ptr, i32, [1 x i32] }, align 8
  %_2 = alloca {}, align 1
  %_1 = alloca ptr, align 8
  store ptr %0, ptr %_1, align 8
; invoke std::rt::lang_start::{{closure}}
  %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17hee5ebfeab04283b9E"(ptr align 8 %_1)
          to label %bb1 unwind label %cleanup

bb3:                                              ; preds = %cleanup
  %2 = load ptr, ptr %1, align 8, !noundef !5
  %3 = getelementptr inbounds i8, ptr %1, i64 8
  %4 = load i32, ptr %3, align 8, !noundef !5
  %5 = insertvalue { ptr, i32 } poison, ptr %2, 0
  %6 = insertvalue { ptr, i32 } %5, i32 %4, 1
  resume { ptr, i32 } %6

cleanup:                                          ; preds = %start
  %7 = landingpad { ptr, i32 }
          cleanup
  %8 = extractvalue { ptr, i32 } %7, 0
  %9 = extractvalue { ptr, i32 } %7, 1
  store ptr %8, ptr %1, align 8
  %10 = getelementptr inbounds i8, ptr %1, i64 8
  store i32 %9, ptr %10, align 8
  br label %bb3

bb1:                                              ; preds = %start
  ret i32 %_0
}

; core::ops::function::FnOnce::call_once
; Function Attrs: inlinehint nonlazybind uwtable
define internal void @_ZN4core3ops8function6FnOnce9call_once17h6c19189960566713E(ptr %_1) unnamed_addr #2 {
start:
  %_2 = alloca {}, align 1
  call void %_1()
  ret void
}

; core::ptr::drop_in_place<core::array::iter::IntoIter<bool,6_usize>>
; Function Attrs: nonlazybind uwtable
define internal void @"_ZN4core3ptr70drop_in_place$LT$core..array..iter..IntoIter$LT$bool$C$6_usize$GT$$GT$17h18eaf80e6c26917aE"(ptr align 8 %_1) unnamed_addr #1 {
start:
; call <core::array::iter::IntoIter<T,_> as core::ops::drop::Drop>::drop
  call void @"_ZN82_$LT$core..array..iter..IntoIter$LT$T$C$_$GT$$u20$as$u20$core..ops..drop..Drop$GT$4drop17h64283180b0e22285E"(ptr align 8 %_1)
  ret void
}

; core::ptr::drop_in_place<std::rt::lang_start<()>::{{closure}}>
; Function Attrs: inlinehint nonlazybind uwtable
define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h0fc6fafa5a71668aE"(ptr align 8 %_1) unnamed_addr #2 {
start:
  ret void
}

; core::hint::black_box
; Function Attrs: inlinehint nonlazybind uwtable
define internal void @_ZN4core4hint9black_box17h2686b859af4d873bE() unnamed_addr #2 {
start:
  call void asm sideeffect "", "~{memory}"(), !srcloc !4
  ret void
}

; core::hint::black_box
; Function Attrs: inlinehint nonlazybind uwtable
define internal void @_ZN4core4hint9black_box17h7d644462e0d08799E(ptr sret([6 x i16]) align 2 %_0, ptr align 2 %dummy) unnamed_addr #2 {
start:
  call void @llvm.memcpy.p0.p0.i64(ptr align 2 %_0, ptr align 2 %dummy, i64 12, i1 false)
  call void asm sideeffect "", "r,~{memory}"(ptr %_0), !srcloc !4
  ret void
}

; core::hint::black_box
; Function Attrs: inlinehint nonlazybind uwtable
define internal i48 @_ZN4core4hint9black_box17h97c81746903a7167E(i48 %0) unnamed_addr #2 {
start:
  %_0 = alloca [6 x i8], align 1
  %1 = alloca i48, align 8
  %dummy = alloca [6 x i8], align 1
  store i48 %0, ptr %1, align 8
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dummy, ptr align 8 %1, i64 6, i1 false)
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %_0, ptr align 1 %dummy, i64 6, i1 false)
  call void asm sideeffect "", "r,~{memory}"(ptr %_0), !srcloc !4
  %2 = load i48, ptr %_0, align 1
  ret i48 %2
}

; core::hint::black_box
; Function Attrs: inlinehint nonlazybind uwtable
define internal void @_ZN4core4hint9black_box17ha404caf2dba75efcE(ptr sret({ [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }) align 16 %_0, ptr align 16 %dummy) unnamed_addr #2 {
start:
  call void @llvm.memcpy.p0.p0.i64(ptr align 16 %_0, ptr align 16 %dummy, i64 64, i1 false)
  call void asm sideeffect "", "r,~{memory}"(ptr %_0), !srcloc !4
  ret void
}

; core::array::iter::IntoIter<T,_>::as_mut_slice
; Function Attrs: nonlazybind uwtable
define internal { ptr, i64 } @"_ZN4core5array4iter21IntoIter$LT$T$C$_$GT$12as_mut_slice17h4742ae0321f108b6E"(ptr align 8 %self) unnamed_addr #1 {
start:
  %_26 = alloca %"core::ptr::metadata::PtrComponents<[core::mem::maybe_uninit::MaybeUninit<bool>]>", align 8
  %_25 = alloca %"core::ptr::metadata::PtrRepr<[core::mem::maybe_uninit::MaybeUninit<bool>]>", align 8
  %index = alloca %"core::ops::index_range::IndexRange", align 8
  %_3 = getelementptr inbounds %"core::array::iter::IntoIter<bool, 6>", ptr %self, i32 0, i32 1
  %_6 = load i64, ptr %self, align 8, !noundef !5
  %self1 = getelementptr inbounds i8, ptr %self, i64 8
  %0 = getelementptr inbounds i8, ptr %self, i64 8
  %_8 = load i64, ptr %0, align 8, !noundef !5
  store i64 %_6, ptr %index, align 8
  %1 = getelementptr inbounds i8, ptr %index, i64 8
  store i64 %_8, ptr %1, align 8
  %count = load i64, ptr %index, align 8, !noundef !5
  %data = getelementptr inbounds i8, ptr %_3, i64 %count
  %2 = getelementptr inbounds i8, ptr %index, i64 8
  %_22 = load i64, ptr %2, align 8, !noundef !5
  %_23 = load i64, ptr %index, align 8, !noundef !5
  %len = sub nuw i64 %_22, %_23
  store ptr %data, ptr %_26, align 8
  %3 = getelementptr inbounds i8, ptr %_26, i64 8
  store i64 %len, ptr %3, align 8
  %4 = load ptr, ptr %_26, align 8, !noundef !5
  %5 = getelementptr inbounds i8, ptr %_26, i64 8
  %6 = load i64, ptr %5, align 8, !noundef !5
  store ptr %4, ptr %_25, align 8
  %7 = getelementptr inbounds i8, ptr %_25, i64 8
  store i64 %6, ptr %7, align 8
  %slice.0 = load ptr, ptr %_25, align 8, !noundef !5
  %8 = getelementptr inbounds i8, ptr %_25, i64 8
  %slice.1 = load i64, ptr %8, align 8, !noundef !5
  %9 = insertvalue { ptr, i64 } poison, ptr %slice.0, 0
  %10 = insertvalue { ptr, i64 } %9, i64 %slice.1, 1
  ret { ptr, i64 } %10
}

; core::array::iter::<impl core::iter::traits::collect::IntoIterator for [T; N]>::into_iter
; Function Attrs: nonlazybind uwtable
define internal void @"_ZN4core5array4iter94_$LT$impl$u20$core..iter..traits..collect..IntoIterator$u20$for$u20$$u5b$T$u3b$$u20$N$u5d$$GT$9into_iter17h847bd2095a5f5cdcE"(ptr sret(%"core::array::iter::IntoIter<bool, 6>") align 8 %_0, i48 %0) unnamed_addr #1 {
start:
  %_3 = alloca %"core::ops::index_range::IndexRange", align 8
  %data = alloca [6 x i8], align 1
  %1 = alloca i48, align 8
  %self = alloca [6 x i8], align 1
  store i48 %0, ptr %1, align 8
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %self, ptr align 8 %1, i64 6, i1 false)
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %data, ptr align 1 %self, i64 6, i1 false)
  store i64 0, ptr %_3, align 8
  %2 = getelementptr inbounds i8, ptr %_3, i64 8
  store i64 6, ptr %2, align 8
  %3 = getelementptr inbounds %"core::array::iter::IntoIter<bool, 6>", ptr %_0, i32 0, i32 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %3, ptr align 1 %data, i64 6, i1 false)
  %4 = load i64, ptr %_3, align 8, !noundef !5
  %5 = getelementptr inbounds i8, ptr %_3, i64 8
  %6 = load i64, ptr %5, align 8, !noundef !5
  store i64 %4, ptr %_0, align 8
  %7 = getelementptr inbounds i8, ptr %_0, i64 8
  store i64 %6, ptr %7, align 8
  ret void
}

; <() as std::process::Termination>::report
; Function Attrs: inlinehint nonlazybind uwtable
define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h6aa2dda252264378E"() unnamed_addr #2 {
start:
  ret i8 0
}

; <core::array::iter::IntoIter<T,_> as core::ops::drop::Drop>::drop
; Function Attrs: nonlazybind uwtable
define internal void @"_ZN82_$LT$core..array..iter..IntoIter$LT$T$C$_$GT$$u20$as$u20$core..ops..drop..Drop$GT$4drop17h64283180b0e22285E"(ptr align 8 %self) unnamed_addr #1 {
start:
; call core::array::iter::IntoIter<T,_>::as_mut_slice
  %0 = call { ptr, i64 } @"_ZN4core5array4iter21IntoIter$LT$T$C$_$GT$12as_mut_slice17h4742ae0321f108b6E"(ptr align 8 %self)
  %_3.0 = extractvalue { ptr, i64 } %0, 0
  %_3.1 = extractvalue { ptr, i64 } %0, 1
  ret void
}

; <core::array::iter::IntoIter<T,_> as core::iter::traits::iterator::Iterator>::next
; Function Attrs: nonlazybind uwtable
define internal i8 @"_ZN99_$LT$core..array..iter..IntoIter$LT$T$C$_$GT$$u20$as$u20$core..iter..traits..iterator..Iterator$GT$4next17h885781a4a2dba8d1E"(ptr align 8 %self) unnamed_addr #1 {
start:
  %_24 = alloca %"core::ptr::metadata::PtrRepr<[core::mem::maybe_uninit::MaybeUninit<bool>]>", align 8
  %self2 = alloca ptr, align 8
  %self1 = alloca %"core::option::Option<usize>", align 8
  %_0 = alloca i8, align 1
  store ptr %self, ptr %self2, align 8
  %self3 = load ptr, ptr %self2, align 8, !nonnull !5, !align !6, !noundef !5
  %0 = getelementptr inbounds i8, ptr %self3, i64 8
  %_11 = load i64, ptr %0, align 8, !noundef !5
  %_12 = load i64, ptr %self3, align 8, !noundef !5
  %_6 = sub nuw i64 %_11, %_12
  %_5 = icmp ugt i64 %_6, 0
  br i1 %_5, label %bb1, label %bb2

bb2:                                              ; preds = %start
  %1 = load i64, ptr @0, align 8, !range !7, !noundef !5
  %2 = load i64, ptr getelementptr inbounds (i8, ptr @0, i64 8), align 8
  store i64 %1, ptr %self1, align 8
  %3 = getelementptr inbounds i8, ptr %self1, i64 8
  store i64 %2, ptr %3, align 8
  br label %bb3

bb1:                                              ; preds = %start
  %self4 = load ptr, ptr %self2, align 8, !nonnull !5, !align !6, !noundef !5
  %value = load i64, ptr %self4, align 8, !noundef !5
  %_13 = add nuw i64 %value, 1
  store i64 %_13, ptr %self4, align 8
  %4 = getelementptr inbounds i8, ptr %self1, i64 8
  store i64 %value, ptr %4, align 8
  store i64 1, ptr %self1, align 8
  br label %bb3

bb3:                                              ; preds = %bb1, %bb2
  %f = getelementptr inbounds %"core::array::iter::IntoIter<bool, 6>", ptr %self, i32 0, i32 1
  %_14 = load i64, ptr %self1, align 8, !range !7, !noundef !5
  %5 = icmp eq i64 %_14, 0
  br i1 %5, label %bb4, label %bb5

bb4:                                              ; preds = %bb3
  store i8 2, ptr %_0, align 1
  br label %bb6

bb5:                                              ; preds = %bb3
  %6 = getelementptr inbounds i8, ptr %self1, i64 8
  %x = load i64, ptr %6, align 8, !noundef !5
  store ptr %f, ptr %_24, align 8
  %7 = getelementptr inbounds i8, ptr %_24, i64 8
  store i64 6, ptr %7, align 8
  %8 = getelementptr inbounds i8, ptr %_24, i64 8
  %_22 = load i64, ptr %8, align 8, !noundef !5
  %cond = icmp ult i64 %x, %_22
  call void @llvm.assume(i1 %cond)
  %self5 = getelementptr inbounds i8, ptr %f, i64 %x
  %9 = load i8, ptr %self5, align 1, !range !8, !noundef !5
  %_16 = trunc i8 %9 to i1
  %10 = zext i1 %_16 to i8
  store i8 %10, ptr %_0, align 1
  br label %bb6

bb6:                                              ; preds = %bb5, %bb4
  %11 = load i8, ptr %_0, align 1, !range !9, !noundef !5
  ret i8 %11

bb8:                                              ; No predecessors!
  unreachable
}

; _9897542_debug::enter
; Function Attrs: nonlazybind uwtable
define internal void @_ZN14_9897542_debug5enter17hf6945812e170275dE() unnamed_addr #1 {
start:
  %_5 = alloca [4 x i64], align 8
  %_4 = alloca [6 x i8], align 1
  %_3 = alloca { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, align 16
  %_2 = alloca { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, align 16
  %0 = getelementptr inbounds [6 x i8], ptr %_4, i64 0, i64 0
  call void @llvm.memset.p0.i64(ptr align 1 %0, i8 0, i64 6, i1 false)
  %1 = getelementptr inbounds [4 x i64], ptr %_5, i64 0, i64 0
  call void @llvm.memset.p0.i64(ptr align 8 %1, i8 0, i64 32, i1 false)
  %2 = getelementptr inbounds { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, ptr %_3, i32 0, i32 3
  store i8 0, ptr %2, align 2
  %3 = getelementptr inbounds { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, ptr %_3, i32 0, i32 2
  call void @llvm.memcpy.p0.p0.i64(ptr align 16 %3, ptr align 1 %_4, i64 6, i1 false)
  %4 = load <4 x i64>, ptr %_5, align 8
  store <4 x i64> %4, ptr %_3, align 16
  %5 = getelementptr inbounds { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, ptr %_3, i32 0, i32 1
  store i128 0, ptr %5, align 16
  %6 = getelementptr inbounds { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, ptr %_3, i32 0, i32 4
  store i8 0, ptr %6, align 1
; call core::hint::black_box
  call void @_ZN4core4hint9black_box17ha404caf2dba75efcE(ptr sret({ [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }) align 16 %_2, ptr align 16 %_3)
; call _9897542_debug::fn13
  call void @_ZN14_9897542_debug4fn1317h464291bc457b7dd1E(ptr align 16 %_2)
  ret void
}

; _9897542_debug::fn13
; Function Attrs: nonlazybind uwtable
define internal void @_ZN14_9897542_debug4fn1317h464291bc457b7dd1E(ptr align 16 %_1) unnamed_addr #1 {
start:
  %0 = alloca [6 x i8], align 1
  %_7 = alloca %Adt54, align 16
  %_6 = alloca { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, align 16
  %_5 = alloca %Adt59, align 2
  %_4 = alloca ptr, align 8
  %_3 = alloca [6 x i16], align 2
  %_2 = alloca [6 x i8], align 1
  %1 = getelementptr inbounds [6 x i8], ptr %_2, i64 0, i64 0
  store i8 0, ptr %1, align 1
  %2 = getelementptr inbounds [6 x i8], ptr %_2, i64 0, i64 1
  store i8 1, ptr %2, align 1
  %3 = getelementptr inbounds [6 x i8], ptr %_2, i64 0, i64 2
  store i8 1, ptr %3, align 1
  %4 = getelementptr inbounds [6 x i8], ptr %_2, i64 0, i64 3
  store i8 1, ptr %4, align 1
  %5 = getelementptr inbounds [6 x i8], ptr %_2, i64 0, i64 4
  store i8 1, ptr %5, align 1
  %6 = getelementptr inbounds [6 x i8], ptr %_2, i64 0, i64 5
  store i8 1, ptr %6, align 1
  %7 = getelementptr inbounds [6 x i16], ptr %_5, i64 0, i64 0
  store i16 25264, ptr %7, align 2
  %8 = getelementptr inbounds [6 x i16], ptr %_5, i64 0, i64 1
  store i16 -10872, ptr %8, align 2
  %9 = getelementptr inbounds [6 x i16], ptr %_5, i64 0, i64 2
  store i16 -18504, ptr %9, align 2
  %10 = getelementptr inbounds [6 x i16], ptr %_5, i64 0, i64 3
  store i16 -10083, ptr %10, align 2
  %11 = getelementptr inbounds [6 x i16], ptr %_5, i64 0, i64 4
  store i16 -32025, ptr %11, align 2
  %12 = getelementptr inbounds [6 x i16], ptr %_5, i64 0, i64 5
  store i16 -22493, ptr %12, align 2
  store ptr %_1, ptr %_4, align 8
  %_27 = load ptr, ptr %_4, align 8, !noundef !5
  %_28 = ptrtoint ptr %_27 to i64
  %_31 = and i64 %_28, 15
  %_32 = icmp eq i64 %_31, 0
  %13 = call i1 @llvm.expect.i1(i1 %_32, i1 true)
  br i1 %13, label %bb8, label %panic

bb8:                                              ; preds = %start
  %14 = load ptr, ptr %_4, align 8, !noundef !5
  %15 = getelementptr inbounds { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, ptr %14, i32 0, i32 2
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %_2, ptr align 16 %15, i64 6, i1 false)
  %_21 = load ptr, ptr %_4, align 8, !noundef !5
  %_22 = ptrtoint ptr %_21 to i64
  %_25 = and i64 %_22, 15
  %_26 = icmp eq i64 %_25, 0
  %16 = call i1 @llvm.expect.i1(i1 %_26, i1 true)
  br i1 %16, label %bb7, label %panic1

panic:                                            ; preds = %start
; call core::panicking::panic_misaligned_pointer_dereference
  call void @_ZN4core9panicking36panic_misaligned_pointer_dereference17h93252408cf5d3556E(i64 16, i64 %_28, ptr align 8 @alloc_e5eed6cfbd8269f8f7433869267020a5) #9
  unreachable

bb7:                                              ; preds = %bb8
  %17 = load ptr, ptr %_4, align 8, !noundef !5
  %18 = getelementptr inbounds { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, ptr %17, i32 0, i32 2
  %19 = getelementptr inbounds [6 x i8], ptr %18, i64 0, i64 0
  store i8 0, ptr %19, align 16
  %20 = getelementptr inbounds [6 x i8], ptr %18, i64 0, i64 1
  store i8 1, ptr %20, align 1
  %21 = getelementptr inbounds [6 x i8], ptr %18, i64 0, i64 2
  store i8 1, ptr %21, align 2
  %22 = getelementptr inbounds [6 x i8], ptr %18, i64 0, i64 3
  store i8 0, ptr %22, align 1
  %23 = getelementptr inbounds [6 x i8], ptr %18, i64 0, i64 4
  store i8 0, ptr %23, align 4
  %24 = getelementptr inbounds [6 x i8], ptr %18, i64 0, i64 5
  store i8 1, ptr %24, align 1
  %25 = getelementptr inbounds { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, ptr %_1, i32 0, i32 2
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %0, ptr align 16 %25, i64 6, i1 false)
  %26 = load i48, ptr %0, align 1
; call _9897542_debug::fn14
  call void @_ZN14_9897542_debug4fn1417hec83b423637ab076E(i48 %26)
  br label %bb1

panic1:                                           ; preds = %bb8
; call core::panicking::panic_misaligned_pointer_dereference
  call void @_ZN4core9panicking36panic_misaligned_pointer_dereference17h93252408cf5d3556E(i64 16, i64 %_22, ptr align 8 @alloc_40b8598efcc1c71f5a957cd65a4a1dda) #9
  unreachable

bb1:                                              ; preds = %bb6, %bb7
  %_15 = load ptr, ptr %_4, align 8, !noundef !5
  %_16 = ptrtoint ptr %_15 to i64
  %_19 = and i64 %_16, 15
  %_20 = icmp eq i64 %_19, 0
  %27 = call i1 @llvm.expect.i1(i1 %_20, i1 true)
  br i1 %27, label %bb6, label %panic2

bb6:                                              ; preds = %bb1
  %28 = load ptr, ptr %_4, align 8, !noundef !5
  %29 = getelementptr inbounds { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, ptr %28, i32 0, i32 2
  call void @llvm.memcpy.p0.p0.i64(ptr align 16 %29, ptr align 1 %_2, i64 6, i1 false)
  call void @llvm.memcpy.p0.p0.i64(ptr align 2 %_3, ptr align 2 %_5, i64 12, i1 false)
  store ptr %_6, ptr %_4, align 8
  %30 = getelementptr inbounds %Adt54, ptr %_7, i32 0, i32 1
  store i32 -1228700359, ptr %30, align 16
  call void @llvm.memcpy.p0.p0.i64(ptr align 16 %_7, ptr align 16 %_1, i64 64, i1 false)
  %31 = getelementptr inbounds %Adt54, ptr %_7, i32 0, i32 1
  %32 = load i32, ptr %31, align 16, !noundef !5
  switch i32 %32, label %bb4 [
    i32 0, label %bb1
    i32 -1228700359, label %bb2
  ]

panic2:                                           ; preds = %bb1
; call core::panicking::panic_misaligned_pointer_dereference
  call void @_ZN4core9panicking36panic_misaligned_pointer_dereference17h93252408cf5d3556E(i64 16, i64 %_16, ptr align 8 @alloc_c40e8a599f3a3d41612c45a675185c8e) #9
  unreachable

bb4:                                              ; preds = %bb5, %bb6
  ret void

bb2:                                              ; preds = %bb6
  %_9 = load ptr, ptr %_4, align 8, !noundef !5
  %_10 = ptrtoint ptr %_9 to i64
  %_13 = and i64 %_10, 15
  %_14 = icmp eq i64 %_13, 0
  %33 = call i1 @llvm.expect.i1(i1 %_14, i1 true)
  br i1 %33, label %bb5, label %panic3

bb5:                                              ; preds = %bb2
  %34 = load ptr, ptr %_4, align 8, !noundef !5
  %35 = getelementptr inbounds { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, ptr %_1, i32 0, i32 3
  %36 = load i8, ptr %35, align 2, !noundef !5
  %37 = getelementptr inbounds { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, ptr %34, i32 0, i32 3
  store i8 %36, ptr %37, align 2
  %38 = getelementptr inbounds { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, ptr %_1, i32 0, i32 2
  %39 = getelementptr inbounds { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, ptr %34, i32 0, i32 2
  call void @llvm.memcpy.p0.p0.i64(ptr align 16 %39, ptr align 16 %38, i64 6, i1 false)
  %40 = load <4 x i64>, ptr %_1, align 16
  store <4 x i64> %40, ptr %34, align 16
  %41 = getelementptr inbounds { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, ptr %_7, i32 0, i32 1
  %42 = load i128, ptr %41, align 16, !noundef !5
  %43 = getelementptr inbounds { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, ptr %34, i32 0, i32 1
  store i128 %42, ptr %43, align 16
  %44 = getelementptr inbounds { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, ptr %_1, i32 0, i32 4
  %45 = load i8, ptr %44, align 1, !noundef !5
  %46 = getelementptr inbounds { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, ptr %34, i32 0, i32 4
  store i8 %45, ptr %46, align 1
  %47 = getelementptr inbounds { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, ptr %_6, i32 0, i32 2
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %_2, ptr align 16 %47, i64 6, i1 false)
  %48 = load i48, ptr %_2, align 1
; call _9897542_debug::p1
  call void @_ZN14_9897542_debug2p117he5f07e46e9affd21E(i48 %48)
; call _9897542_debug::hide
  call void @_ZN14_9897542_debug4hide17h536849b57a0e6c96E(i64 13, i64 25, i64 0, i64 11, i64 0, i64 14, ptr align 2 %_3, i64 6, ptr align 16 %_1)
  br label %bb4

panic3:                                           ; preds = %bb2
; call core::panicking::panic_misaligned_pointer_dereference
  call void @_ZN4core9panicking36panic_misaligned_pointer_dereference17h93252408cf5d3556E(i64 16, i64 %_10, ptr align 8 @alloc_23cf86cb6a1d3396212d84acc0e8430c) #9
  unreachable
}

; _9897542_debug::fn14
; Function Attrs: nonlazybind uwtable
define internal void @_ZN14_9897542_debug4fn1417hec83b423637ab076E(i48 %0) unnamed_addr #1 {
start:
  %1 = alloca i48, align 8
  %_2 = alloca { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, align 16
  %2 = alloca i48, align 8
  %_1 = alloca [6 x i8], align 1
  store i48 %0, ptr %2, align 8
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %_1, ptr align 8 %2, i64 6, i1 false)
; call core::hint::black_box
  call void @_ZN4core4hint9black_box17h2686b859af4d873bE()
  %3 = getelementptr inbounds { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, ptr %_2, i32 0, i32 2
  %4 = load i48, ptr %_1, align 1
; call core::hint::black_box
  %5 = call i48 @_ZN4core4hint9black_box17h97c81746903a7167E(i48 %4)
  store i48 %5, ptr %1, align 8
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %3, ptr align 8 %1, i64 6, i1 false)
  ret void
}

; _9897542_debug::main
; Function Attrs: nonlazybind uwtable
define internal void @_ZN14_9897542_debug4main17h887f27fa905e292cE() unnamed_addr #1 {
start:
; call _9897542_debug::enter
  call void @_ZN14_9897542_debug5enter17hf6945812e170275dE()
  ret void
}

; _9897542_debug::print_bool
; Function Attrs: nonlazybind uwtable
define internal void @_ZN14_9897542_debug10print_bool17h7b0ec7a0b1285f19E(i1 zeroext %x) unnamed_addr #1 {
start:
  %_8 = zext i1 %x to i32
  %_2 = call i32 (ptr, ...) @printf(ptr @alloc_87551382a9de3243abbfdbda2f0b586b, i32 %_8)
  ret void
}

; _9897542_debug::p1
; Function Attrs: noinline nonlazybind uwtable
define internal void @_ZN14_9897542_debug2p117he5f07e46e9affd21E(i48 %0) unnamed_addr #0 personality ptr @rust_eh_personality {
start:
  %1 = alloca { ptr, i32, [1 x i32] }, align 8
  %_7 = alloca i8, align 1
  %iter = alloca %"core::array::iter::IntoIter<bool, 6>", align 8
  %_3 = alloca [6 x i8], align 1
  %_2 = alloca %"core::array::iter::IntoIter<bool, 6>", align 8
  %2 = alloca i48, align 8
  %x = alloca [6 x i8], align 1
  store i48 %0, ptr %2, align 8
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %x, ptr align 8 %2, i64 6, i1 false)
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %_3, ptr align 1 %x, i64 6, i1 false)
  %3 = load i48, ptr %_3, align 1
; call core::array::iter::<impl core::iter::traits::collect::IntoIterator for [T; N]>::into_iter
  call void @"_ZN4core5array4iter94_$LT$impl$u20$core..iter..traits..collect..IntoIterator$u20$for$u20$$u5b$T$u3b$$u20$N$u5d$$GT$9into_iter17h847bd2095a5f5cdcE"(ptr sret(%"core::array::iter::IntoIter<bool, 6>") align 8 %_2, i48 %3)
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %iter, ptr align 8 %_2, i64 24, i1 false)
  br label %bb2

bb2:                                              ; preds = %bb7, %start
; invoke <core::array::iter::IntoIter<T,_> as core::iter::traits::iterator::Iterator>::next
  %4 = invoke i8 @"_ZN99_$LT$core..array..iter..IntoIter$LT$T$C$_$GT$$u20$as$u20$core..iter..traits..iterator..Iterator$GT$4next17h885781a4a2dba8d1E"(ptr align 8 %iter)
          to label %bb3 unwind label %cleanup, !range !9

bb9:                                              ; preds = %cleanup
; invoke core::ptr::drop_in_place<core::array::iter::IntoIter<bool,6_usize>>
  invoke void @"_ZN4core3ptr70drop_in_place$LT$core..array..iter..IntoIter$LT$bool$C$6_usize$GT$$GT$17h18eaf80e6c26917aE"(ptr align 8 %iter) #10
          to label %bb10 unwind label %terminate

cleanup:                                          ; preds = %bb4, %bb2
  %5 = landingpad { ptr, i32 }
          cleanup
  %6 = extractvalue { ptr, i32 } %5, 0
  %7 = extractvalue { ptr, i32 } %5, 1
  store ptr %6, ptr %1, align 8
  %8 = getelementptr inbounds i8, ptr %1, i64 8
  store i32 %7, ptr %8, align 8
  br label %bb9

bb3:                                              ; preds = %bb2
  store i8 %4, ptr %_7, align 1
  %9 = load i8, ptr %_7, align 1, !range !9, !noundef !5
  %10 = icmp eq i8 %9, 2
  %_10 = select i1 %10, i64 0, i64 1
  %11 = icmp eq i64 %_10, 0
  br i1 %11, label %bb6, label %bb4

bb6:                                              ; preds = %bb3
; invoke core::ptr::drop_in_place<core::array::iter::IntoIter<bool,6_usize>>
  invoke void @"_ZN4core3ptr70drop_in_place$LT$core..array..iter..IntoIter$LT$bool$C$6_usize$GT$$GT$17h18eaf80e6c26917aE"(ptr align 8 %iter)
          to label %bb8 unwind label %cleanup1

bb4:                                              ; preds = %bb3
  %12 = load i8, ptr %_7, align 1, !range !8, !noundef !5
  %b = trunc i8 %12 to i1
; invoke _9897542_debug::print_bool
  invoke void @_ZN14_9897542_debug10print_bool17h7b0ec7a0b1285f19E(i1 zeroext %b)
          to label %bb7 unwind label %cleanup

bb10:                                             ; preds = %bb9, %cleanup1
  %13 = load ptr, ptr %1, align 8, !noundef !5
  %14 = getelementptr inbounds i8, ptr %1, i64 8
  %15 = load i32, ptr %14, align 8, !noundef !5
  %16 = insertvalue { ptr, i32 } poison, ptr %13, 0
  %17 = insertvalue { ptr, i32 } %16, i32 %15, 1
  resume { ptr, i32 } %17

cleanup1:                                         ; preds = %bb6
  %18 = landingpad { ptr, i32 }
          cleanup
  %19 = extractvalue { ptr, i32 } %18, 0
  %20 = extractvalue { ptr, i32 } %18, 1
  store ptr %19, ptr %1, align 8
  %21 = getelementptr inbounds i8, ptr %1, i64 8
  store i32 %20, ptr %21, align 8
  br label %bb10

bb8:                                              ; preds = %bb6
  ret void

bb7:                                              ; preds = %bb4
  br label %bb2

bb5:                                              ; No predecessors!
  unreachable

terminate:                                        ; preds = %bb9
  %22 = landingpad { ptr, i32 }
          filter [0 x ptr] zeroinitializer
  %23 = extractvalue { ptr, i32 } %22, 0
  %24 = extractvalue { ptr, i32 } %22, 1
; call core::panicking::panic_in_cleanup
  call void @_ZN4core9panicking16panic_in_cleanup17h8f1b458bda2e1ba8E() #11
  unreachable
}

; _9897542_debug::hide
; Function Attrs: noinline nonlazybind uwtable
define internal void @_ZN14_9897542_debug4hide17h536849b57a0e6c96E(i64 %f, i64 %var0, i64 %val0, i64 %var1, i64 %val1, i64 %var2, ptr align 2 %val2, i64 %var3, ptr align 16 %val3) unnamed_addr #0 {
start:
  %_13 = alloca { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, align 16
  %_12 = alloca { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, align 16
  %_11 = alloca [6 x i16], align 2
  %_10 = alloca [6 x i16], align 2
  call void @llvm.memcpy.p0.p0.i64(ptr align 2 %_11, ptr align 2 %val2, i64 12, i1 false)
; call core::hint::black_box
  call void @_ZN4core4hint9black_box17h7d644462e0d08799E(ptr sret([6 x i16]) align 2 %_10, ptr align 2 %_11)
  call void @llvm.memcpy.p0.p0.i64(ptr align 16 %_13, ptr align 16 %val3, i64 64, i1 false)
; call core::hint::black_box
  call void @_ZN4core4hint9black_box17ha404caf2dba75efcE(ptr sret({ [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }) align 16 %_12, ptr align 16 %_13)
  ret void
}

; Function Attrs: nonlazybind uwtable
define i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1 {
start:
  ret i32 0
}

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #3

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
declare void @llvm.assume(i1 noundef) #4

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #5

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
declare i1 @llvm.expect.i1(i1, i1) #6

; core::panicking::panic_misaligned_pointer_dereference
; Function Attrs: cold noinline noreturn nounwind nonlazybind uwtable
define void @_ZN4core9panicking36panic_misaligned_pointer_dereference17h93252408cf5d3556E(i64, i64, ptr align 8) unnamed_addr #7 {
start:
  ret void
}

; Function Attrs: nonlazybind uwtable
declare i32 @printf(ptr, ...) unnamed_addr #1

; core::panicking::panic_in_cleanup
; Function Attrs: cold noinline noreturn nounwind nonlazybind uwtable
define void @_ZN4core9panicking16panic_in_cleanup17h8f1b458bda2e1ba8E() unnamed_addr #7 {
start:
  ret void
}

; Function Attrs: nonlazybind
define i32 @main(i32 %0, ptr %1) unnamed_addr #8 {
top:
  call void @_ZN14_9897542_debug5enter17hf6945812e170275dE()
  ret i32 0
}

attributes #0 = { noinline nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #1 = { nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #2 = { inlinehint nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
attributes #4 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
attributes #5 = { nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #6 = { nocallback nofree nosync nounwind willreturn memory(none) }
attributes #7 = { cold noinline noreturn nounwind nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #8 = { nonlazybind "target-cpu"="x86-64" }
attributes #9 = { noreturn nounwind }
attributes #10 = { cold }
attributes #11 = { cold noreturn nounwind }

!llvm.module.flags = !{!0, !1, !2}
!llvm.ident = !{!3}

!0 = !{i32 8, !"PIC Level", i32 2}
!1 = !{i32 7, !"PIE Level", i32 2}
!2 = !{i32 2, !"RtLibUseGOT", i32 1}
!3 = !{!"rustc version 1.77.0-dev"}
!4 = !{i32 1517090}
!5 = !{}
!6 = !{i64 8}
!7 = !{i64 0, i64 2}
!8 = !{i8 0, i8 2}
!9 = !{i8 0, i8 3}

Right:

$ clang -O1 9897542-debug.ll && ./a.out
0
0
0
0
0
0

Wrong:

$ clang -O2 9897542-debug.ll && ./a.out
0
0
1
0
0
0

Bisected down to

BISECT: running pass (1121) SLPVectorizerPass on main

On trunk 5a9af39

clang version 19.0.0git (https://github.com/llvm/llvm-project.git 5a9af39aab40bba52d4e46cabf4b1ab47f614fa2)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /opt/compiler-explorer/clang-trunk/bin
Compiler returned: 0

From fuzzer-generated Rust. IR emitted with -Zmir-opt-level=0 -Copt-level=0 and then Rust std symbols manually shimmed.

Rust MIR
#![feature(custom_mir, core_intrinsics)]
#![allow(unused_parens, unused_assignments, overflowing_literals)]
extern crate core;
use core::intrinsics::mir::*;

pub fn enter() {
    fn13(core::hint::black_box((0, [false; 6], [0; 4], 0, 0)));
}

#[custom_mir(dialect = "runtime", phase = "initial")]
fn fn13(mut _6: (i8, [bool; 6], [i64; 4], i128, u8)) {
    mir! {
    let _1: [bool; 6];
    let _14: [u16; 6];
    let _17: *const (i8, [bool; 6], [i64; 4], i128, u8);
    let _19: Adt59;
    let _27: (i8, [bool; 6], [i64; 4], i128, u8);
    let _34: Adt54;
    let _49: ();
    {
    _1 = [false,true,true,true,true,true];
    _19.fld4 = [25264_u16,54664_u16,47032_u16,55453_u16,33511_u16,43043_u16];
    _17 = core::ptr::addr_of!(_6);
    _1 = (*_17).1;
    (*_17).1 = [false,true,true,false,false,true];
    Call(_49 = fn14(_6.1), ReturnTo(bb3), UnwindUnreachable())
    }
    bb3 = {
    (*_17).1 = _1;
    _14 = _19.fld4;
    _17 = core::ptr::addr_of!(_27);
    _34.fld5 = (-1228700359_i32);
    _34.fld1 = _6;
    match _34.fld5 {
    0 => bb3,
    340282366920938463463374607430539511097 => bb12,
    _ => bb18
    }
    }
    bb12 = {
    (*_17) = (_6.0, _6.1, _6.2, _34.fld1.3, _6.4);
    _1 = _27.1;
    Call(_49 = p1(Move(_1)), ReturnTo(bb17), UnwindUnreachable())
    }
    bb17 = {
    Call(_49 = hide(13_usize, 25_usize, 0_usize, 11_usize, 0_usize, 14_usize, Move(_14), 6_usize, Move(_6)), ReturnTo(bb18), UnwindUnreachable())
    }
    bb18 = {
    Return()
    }

    }
}

#[custom_mir(dialect = "runtime", phase = "initial")]
pub fn fn14(mut _16: [bool; 6]) {
    mir! {
    let _20: (i8, [bool; 6], [i64; 4], i128, u8);
    let unit: ();
    let _44: ();
    {
    Call(_44 = core::hint::black_box(unit), ReturnTo(bb17), UnwindUnreachable())
    }
    bb17 = {
    Call(_20.1 = core::hint::black_box(Move(_16)), ReturnTo(bb18), UnwindUnreachable())
    }
    bb18 = {
    Return()
    }

    }
}
pub fn main() {
    enter();
}

#[derive(Debug)]
pub struct Adt54 {
    fld1: (i8, [bool; 6], [i64; 4], i128, u8),
    fld5: i32,
}
#[derive(Debug)]
pub struct Adt59 {
    fld4: [u16; 6],
}

extern "C" {
    fn printf(fmt: *const core::ffi::c_char, ...) -> core::ffi::c_int;
}
#[cfg(not(miri))]
fn print_bool(x: bool) {
    unsafe {
        printf(b"%d\n\0".as_ptr().cast(), x as core::ffi::c_int);
    }
}

#[cfg(miri)]
fn print_bool(x: bool) {
    println!("{}", x as i32);
}

#[inline(never)]
pub fn p1(x: [bool; 6]) {
    for b in x {
        print_bool(b);
    }
}

#[inline(never)]
fn hide(
    f: usize,
    var0: usize,
    val0: usize,
    var1: usize,
    val1: usize,
    var2: usize,
    val2: [u16; 6],
    var3: usize,
    val3: (i8, [bool; 6], [i64; 4], i128, u8),
) {
    core::hint::black_box(val2);
    core::hint::black_box(val3);
}
@alexey-bataev
Copy link
Member

alexey-bataev commented Feb 8, 2024

Are you sure your code is correct? There are some issues with alignments, because of that this code returns different results. Check how you transform your test

@cbeuw
Copy link
Author

cbeuw commented Feb 8, 2024

The LLVM IR was generated from Rust's MIR code. The only changes I made was substituting out declare'd Rust's std symbols (rust_eh_personality, panic_in_cleanup, and panic_misaligned_pointer_dereference returns immediately and main calls my entry function directly instead of going through Rust's lang_start).

It's also possible that the bug is in Rust's codegen. Can you point out where the alignment issue is?

@alexey-bataev
Copy link
Member

alexey-bataev commented Feb 9, 2024

Here is the IR I got after transformation:

; ModuleID = 'repro.ll'
source_filename = "9897542_debug.4c1401909d89b8aa-cgu.0"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@alloc_87551382a9de3243abbfdbda2f0b586b = private unnamed_addr constant <{ [4 x i8] }> <{ [4 x i8] c"%d\0A\00" }>, align 1

; Function Attrs: nofree noinline nounwind nonlazybind uwtable
define internal fastcc void @_ZN14_9897542_debug2p117he5f07e46e9affd21E(i48 %0) unnamed_addr #0 personality ptr @rust_eh_personality {
bb6:
  %iter.sroa.9.16.extract.shift = lshr i48 %0, 40
  %iter.sroa.9.16.extract.trunc = trunc i48 %iter.sroa.9.16.extract.shift to i32
  %iter.sroa.8.16.extract.shift = lshr i48 %0, 32
  %iter.sroa.8.16.extract.trunc = trunc i48 %iter.sroa.8.16.extract.shift to i32
  %1 = trunc i48 %0 to i32
  %2 = lshr i32 %1, 24
  %3 = trunc i48 %0 to i32
  %4 = lshr i32 %3, 16
  %5 = trunc i48 %0 to i32
  %6 = lshr i32 %5, 8
  %iter.sroa.3.16.extract.trunc = trunc i48 %0 to i32
  %_8.i = and i32 %iter.sroa.3.16.extract.trunc, 255
  %_2.i = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @alloc_87551382a9de3243abbfdbda2f0b586b, i32 %_8.i)
  %_8.i.1 = and i32 %6, 255
  %_2.i.1 = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @alloc_87551382a9de3243abbfdbda2f0b586b, i32 %_8.i.1)
  %_8.i.2 = and i32 %4, 255
  %_2.i.2 = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @alloc_87551382a9de3243abbfdbda2f0b586b, i32 %_8.i.2)
  %_2.i.3 = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @alloc_87551382a9de3243abbfdbda2f0b586b, i32 %2)
  %_8.i.4 = and i32 %iter.sroa.8.16.extract.trunc, 255
  %_2.i.4 = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @alloc_87551382a9de3243abbfdbda2f0b586b, i32 %_8.i.4)
  %_2.i.5 = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @alloc_87551382a9de3243abbfdbda2f0b586b, i32 %iter.sroa.9.16.extract.trunc)
  ret void
}

; Function Attrs: noinline nonlazybind uwtable
define internal fastcc void @_ZN14_9897542_debug4hide17h536849b57a0e6c96E(ptr nocapture readonly align 2 %val2, ptr nocapture readonly align 16 %val3) unnamed_addr #1 {
start:
  %_12 = alloca { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, align 16
  %_10 = alloca [6 x i16], align 2
  call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 2 dereferenceable(12) %_10, ptr noundef nonnull align 2 dereferenceable(12) %val2, i64 12, i1 false)
  call void asm sideeffect "", "r,~{memory}"(ptr nonnull %_10) #9, !srcloc !4
  call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(64) %_12, ptr noundef nonnull align 16 dereferenceable(64) %val3, i64 64, i1 false)
  call void asm sideeffect "", "r,~{memory}"(ptr nonnull %_12) #9, !srcloc !4
  ret void
}

; Function Attrs: mustprogress nofree norecurse nosync nounwind nonlazybind willreturn memory(none) uwtable
define noundef i32 @rust_eh_personality(i32 %0, i32 %1, i64 %2, ptr nocapture readnone %3, ptr nocapture readnone %4) unnamed_addr #2 {
start:
  ret i32 0
}

; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #3

; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: write)
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4

; Function Attrs: cold mustprogress nofree noinline norecurse noreturn nosync nounwind nonlazybind willreturn memory(none) uwtable
define void @_ZN4core9panicking36panic_misaligned_pointer_dereference17h93252408cf5d3556E(i64 %0, i64 %1, ptr nocapture readnone align 8 %2) unnamed_addr #5 {
start:
  ret void
}

; Function Attrs: nofree nounwind nonlazybind uwtable
declare noundef i32 @printf(ptr nocapture noundef readonly, ...) unnamed_addr #6

; Function Attrs: cold mustprogress nofree noinline norecurse noreturn nosync nounwind nonlazybind willreturn memory(none) uwtable
define void @_ZN4core9panicking16panic_in_cleanup17h8f1b458bda2e1ba8E() unnamed_addr #5 {
start:
  ret void
}

; Function Attrs: nonlazybind
define noundef i32 @main(i32 %0, ptr nocapture readnone %1) unnamed_addr #7 {
top:
  %_0.i.i.i.i = alloca [6 x i8], align 8
  %_3.i.i = alloca [6 x i16], align 2
  %_2.i = alloca { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, align 16
  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %_2.i)
  %_3.sroa.3.0._2.sroa_idx.i = getelementptr inbounds i8, ptr %_2.i, i64 48
  call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(56) %_2.i, i8 0, i64 56, i1 false)
  call void asm sideeffect "", "r,~{memory}"(ptr nonnull %_2.i) #9, !srcloc !4
  call void @llvm.lifetime.start.p0(i64 12, ptr nonnull %_3.i.i)
  %_2.sroa.5.0..sroa_idx3.i.i = getelementptr inbounds i8, ptr %_2.i, i64 49
  %_2.sroa.6.0..sroa_idx7.i.i = getelementptr inbounds i8, ptr %_2.i, i64 50
  %_2.sroa.7.0..sroa_idx11.i.i = getelementptr inbounds i8, ptr %_2.i, i64 51
  %_2.sroa.8.0..sroa_idx15.i.i = getelementptr inbounds i8, ptr %_2.i, i64 52
  %_2.sroa.8.0.copyload16.i.i = load i8, ptr %_2.sroa.8.0..sroa_idx15.i.i, align 4
  %_2.sroa.9.0..sroa_idx19.i.i = getelementptr inbounds i8, ptr %_2.i, i64 53
  %_2.sroa.9.0.copyload20.i.i = load i8, ptr %_2.sroa.9.0..sroa_idx19.i.i, align 1
  store i8 0, ptr %_2.sroa.8.0..sroa_idx15.i.i, align 4
  store i8 1, ptr %_2.sroa.9.0..sroa_idx19.i.i, align 1
  %2 = load <4 x i8>, ptr %_3.sroa.3.0._2.sroa_idx.i, align 16 ; works with align 8
  store i8 0, ptr %_3.sroa.3.0._2.sroa_idx.i, align 16
  store i8 1, ptr %_2.sroa.5.0..sroa_idx3.i.i, align 1
  store i8 1, ptr %_2.sroa.6.0..sroa_idx7.i.i, align 2
  store i8 0, ptr %_2.sroa.7.0..sroa_idx11.i.i, align 1
  %.sroa.0.0.copyload.i.i = load i48, ptr %_3.sroa.3.0._2.sroa_idx.i, align 16
  call void asm sideeffect "", "~{memory}"() #9, !srcloc !4
  call void @llvm.lifetime.start.p0(i64 6, ptr nonnull %_0.i.i.i.i)
  store i48 %.sroa.0.0.copyload.i.i, ptr %_0.i.i.i.i, align 8
  call void asm sideeffect "", "r,~{memory}"(ptr nonnull %_0.i.i.i.i) #9, !srcloc !4
  call void @llvm.lifetime.end.p0(i64 6, ptr nonnull %_0.i.i.i.i)
  store <4 x i8> %2, ptr %_3.sroa.3.0._2.sroa_idx.i, align 16
  store i8 %_2.sroa.8.0.copyload16.i.i, ptr %_2.sroa.8.0..sroa_idx15.i.i, align 4
  store i8 %_2.sroa.9.0.copyload20.i.i, ptr %_2.sroa.9.0..sroa_idx19.i.i, align 1
  store i16 25264, ptr %_3.i.i, align 2
  %_5.sroa.2.0._3.sroa_idx.i.i = getelementptr inbounds i8, ptr %_3.i.i, i64 2
  store i16 -10872, ptr %_5.sroa.2.0._3.sroa_idx.i.i, align 2
  %_5.sroa.3.0._3.sroa_idx.i.i = getelementptr inbounds i8, ptr %_3.i.i, i64 4
  store i16 -18504, ptr %_5.sroa.3.0._3.sroa_idx.i.i, align 2
  %_5.sroa.4.0._3.sroa_idx.i.i = getelementptr inbounds i8, ptr %_3.i.i, i64 6
  store i16 -10083, ptr %_5.sroa.4.0._3.sroa_idx.i.i, align 2
  %_5.sroa.5.0._3.sroa_idx.i.i = getelementptr inbounds i8, ptr %_3.i.i, i64 8
  store i16 -32025, ptr %_5.sroa.5.0._3.sroa_idx.i.i, align 2
  %_5.sroa.6.0._3.sroa_idx.i.i = getelementptr inbounds i8, ptr %_3.i.i, i64 10
  store i16 -22493, ptr %_5.sroa.6.0._3.sroa_idx.i.i, align 2
  %_6.sroa.5.48.copyload.i.i = load i32, ptr %_2.sroa.6.0..sroa_idx7.i.i, align 2
  %3 = zext i32 %_6.sroa.5.48.copyload.i.i to i48
  %4 = shl nuw i48 %3, 16
  %5 = extractelement <4 x i8> %2, i32 1
  %_2.sroa.5.0.insert.ext.i.i = zext i8 %5 to i48
  %_2.sroa.5.0.insert.shift.i.i = shl nuw nsw i48 %_2.sroa.5.0.insert.ext.i.i, 8
  %6 = extractelement <4 x i8> %2, i32 0
  %_2.sroa.0.0.insert.ext.i.i = zext i8 %6 to i48
  %_2.sroa.5.0.insert.insert.i.i = or disjoint i48 %_2.sroa.5.0.insert.shift.i.i, %_2.sroa.0.0.insert.ext.i.i
  %_2.sroa.0.0.insert.insert.i.i = or disjoint i48 %_2.sroa.5.0.insert.insert.i.i, %4
  call fastcc void @_ZN14_9897542_debug2p117he5f07e46e9affd21E(i48 %_2.sroa.0.0.insert.insert.i.i)
  call fastcc void @_ZN14_9897542_debug4hide17h536849b57a0e6c96E(ptr nonnull align 2 %_3.i.i, ptr nonnull align 16 %_2.i)
  call void @llvm.lifetime.end.p0(i64 12, ptr nonnull %_3.i.i)
  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %_2.i)
  ret i32 0
}

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #8

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #8

attributes #0 = { nofree noinline nounwind nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #1 = { noinline nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #2 = { mustprogress nofree norecurse nosync nounwind nonlazybind willreturn memory(none) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #3 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: readwrite) }
attributes #4 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #5 = { cold mustprogress nofree noinline norecurse noreturn nosync nounwind nonlazybind willreturn memory(none) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #6 = { nofree nounwind nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #7 = { nonlazybind "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #8 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #9 = { nounwind }

!llvm.module.flags = !{!0, !1, !2}
!llvm.ident = !{!3}

!0 = !{i32 8, !"PIC Level", i32 2}
!1 = !{i32 7, !"PIE Level", i32 2}
!2 = !{i32 2, !"RtLibUseGOT", i32 1}
!3 = !{!"rustc version 1.77.0-dev"}
!4 = !{i32 1517090}

See the comment ; works with align 8. To me, seems like the issue with the %_2.i = alloca { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, align 16. Looks like the fields in the struct are not aligned properly, they actually aligned by alignment 8, not 16.

@nikic
Copy link
Contributor

nikic commented Feb 9, 2024

I think this is a bug in DAGCombine, specifically with this code in MatchLoadCombine: https://github.com/llvm/llvm-project/blame/7ddc32052546abd41656d2e670f3902b1bf805a7/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L9248-L9255

That "transfer chain users" logic does not look correct to me for the cases where the original loads do not get removed. In that case, their chains will no longer be used at all, which will permit those loads to be incorrectly reordered.

@nikic nikic added the llvm:SelectionDAG SelectionDAGISel as well label Feb 9, 2024
@RalfJung
Copy link
Contributor

See the comment ; works with align 8. To me, seems like the issue with the %_2.i = alloca { [4 x i64], i128, [6 x i8], i8, i8, [8 x i8] }, align 16. Looks like the fields in the struct are not aligned properly, they actually aligned by alignment 8, not 16.

The alloca line asks LLVM to put this alloca at alignment 16. This is something LLVM must respect even if the type of the alloca would not inherently require 16-byte alignment. Therefore, %_2.i will be 16-aligned. Then %_3.sroa.3.0._2.sroa_idx.i is computed by adding 48, which is a multiple of 16 and hence still 16-aligned. Therefore, think the align annotation at the load seems correct to me.

@nikic
Copy link
Contributor

nikic commented Feb 12, 2024

I believe this is a fairly minimal reproducer:

define i32 @test(ptr %ptr, ptr %clobber) {
  %load = load <4 x i8>, ptr %ptr, align 16
  store i32 0, ptr %clobber
  store <4 x i8> %load, ptr %ptr, align 16
  %e1 = extractelement <4 x i8> %load, i64 1
  %e1.ext = zext i8 %e1 to i32
  %e1.ext.shift = shl nuw nsw i32 %e1.ext, 8
  %e0 = extractelement <4 x i8> %load, i64 0
  %e0.ext = zext i8 %e0 to i32
  %res = or i32 %e1.ext.shift, %e0.ext
  ret i32 %res
}

Produces:

	movzwl	(%rdi), %eax
	movl	$0, (%rsi)
	movl	(%rdi), %ecx
	movl	%ecx, (%rdi)
	retq

nikic added a commit that referenced this issue Feb 12, 2024
nikic added a commit to nikic/llvm-project that referenced this issue Feb 12, 2024
The load combine replaces a number of original loads with one
new loads and also replaces the output chains of the original loads
with the output chain of the new load. This is only correct if
the old loads actually get removed, otherwise they may get
incorrectly reordered.

The code did enforce that all involved operations are one-use
(which also guarantees that the loads will be removed), with one
exceptions: For vector loads, multi-use was allowed to support
multiple extract elements from one load.

This patch collects these extract elements, and then validates
that the loads are only used inside them.

I think an alternative fix would be to replace the uses of the old
output chains with TokenFactors that include both the old output
chains and the new output chain. However, I think the proposed
patch is preferable, as the profitability of the transform in the
general multi-use case is unclear, as it may increase the overall
number of loads.

Fixes llvm#80911.
nikic added a commit that referenced this issue Feb 13, 2024
The load combine replaces a number of original loads with one new loads
and also replaces the output chains of the original loads with the
output chain of the new load. This is incorrect if the original load is
retained (due to multi-use), as it may get incorrectly reordered.

Fix this by using makeEquivalentMemoryOrdering() instead, which will
create a TokenFactor with both chains.

Fixes #80911.
@nikic nikic added this to the LLVM 18.X Release milestone Feb 13, 2024
@nikic
Copy link
Contributor

nikic commented Feb 13, 2024

/cherry-pick 25b9ed6

@llvmbot
Copy link
Collaborator

llvmbot commented Feb 13, 2024

Failed to cherry-pick: 25b9ed6

https://github.com/llvm/llvm-project/actions/runs/7889150888

Please manually backport the fix and push it to your github fork. Once this is done, please create a pull request

@EugeneZelenko EugeneZelenko reopened this Feb 13, 2024
@cbeuw cbeuw changed the title Miscompile in SLPVectorizerPass Miscompile in DAGCombine Feb 13, 2024
nikic added a commit to nikic/llvm-project that referenced this issue Feb 13, 2024
The load combine replaces a number of original loads with one new loads
and also replaces the output chains of the original loads with the
output chain of the new load. This is incorrect if the original load is
retained (due to multi-use), as it may get incorrectly reordered.

Fix this by using makeEquivalentMemoryOrdering() instead, which will
create a TokenFactor with both chains.

Fixes llvm#80911.

(cherry picked from commit 25b9ed6)
@nikic
Copy link
Contributor

nikic commented Feb 13, 2024

Backport PR opened at #81633.

@nikic nikic closed this as completed Feb 13, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment