diff --git a/compiler/rustc_middle/src/ty/layout.rs b/compiler/rustc_middle/src/ty/layout.rs
index c77047f8a2ec2..7495449da4c4f 100644
--- a/compiler/rustc_middle/src/ty/layout.rs
+++ b/compiler/rustc_middle/src/ty/layout.rs
@@ -3226,12 +3226,8 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> {
                     _ => return,
                 }
 
-                // Pass and return structures up to 2 pointers in size by value, matching `ScalarPair`.
-                // LLVM will usually pass these in 2 registers, which is more efficient than by-ref.
-                let max_by_val_size = Pointer.size(self) * 2;
                 let size = arg.layout.size;
-
-                if arg.layout.is_unsized() || size > max_by_val_size {
+                if arg.layout.is_unsized() || size > Pointer.size(self) {
                     arg.make_indirect();
                 } else {
                     // We want to pass small aggregates as immediates, but using
diff --git a/src/test/codegen/arg-return-value-in-reg.rs b/src/test/codegen/arg-return-value-in-reg.rs
deleted file mode 100644
index a69291d47821a..0000000000000
--- a/src/test/codegen/arg-return-value-in-reg.rs
+++ /dev/null
@@ -1,32 +0,0 @@
-//! Check that types of up to 128 bits are passed and returned by-value instead of via pointer.
-
-// compile-flags: -C no-prepopulate-passes -O
-// only-x86_64
-
-#![crate_type = "lib"]
-
-pub struct S {
-    a: u64,
-    b: u32,
-    c: u32,
-}
-
-// CHECK: define i128 @modify(i128{{( %0)?}})
-#[no_mangle]
-pub fn modify(s: S) -> S {
-    S { a: s.a + s.a, b: s.b + s.b, c: s.c + s.c }
-}
-
-#[repr(packed)]
-pub struct TooBig {
-    a: u64,
-    b: u32,
-    c: u32,
-    d: u8,
-}
-
-// CHECK: define void @m_big(%TooBig* [[ATTRS:.*sret.*]], %TooBig* [[ATTRS2:.*]] %s)
-#[no_mangle]
-pub fn m_big(s: TooBig) -> TooBig {
-    TooBig { a: s.a + s.a, b: s.b + s.b, c: s.c + s.c, d: s.d + s.d }
-}
diff --git a/src/test/codegen/array-equality.rs b/src/test/codegen/array-equality.rs
index fefc232b49040..8dce004b54a1b 100644
--- a/src/test/codegen/array-equality.rs
+++ b/src/test/codegen/array-equality.rs
@@ -5,20 +5,20 @@
 
 // CHECK-LABEL: @array_eq_value
 #[no_mangle]
-pub fn array_eq_value(a: [u16; 6], b: [u16; 6]) -> bool {
+pub fn array_eq_value(a: [u16; 3], b: [u16; 3]) -> bool {
     // CHECK-NEXT: start:
-    // CHECK-NEXT: %2 = icmp eq i96 %0, %1
+    // CHECK-NEXT: %2 = icmp eq i48 %0, %1
     // CHECK-NEXT: ret i1 %2
     a == b
 }
 
 // CHECK-LABEL: @array_eq_ref
 #[no_mangle]
-pub fn array_eq_ref(a: &[u16; 6], b: &[u16; 6]) -> bool {
+pub fn array_eq_ref(a: &[u16; 3], b: &[u16; 3]) -> bool {
     // CHECK: start:
-    // CHECK: load i96, i96* %{{.+}}, align 2
-    // CHECK: load i96, i96* %{{.+}}, align 2
-    // CHECK: icmp eq i96
+    // CHECK: load i48, i48* %{{.+}}, align 2
+    // CHECK: load i48, i48* %{{.+}}, align 2
+    // CHECK: icmp eq i48
     // CHECK-NEXT: ret
     a == b
 }
@@ -47,11 +47,33 @@ pub fn array_eq_long(a: &[u16; 1234], b: &[u16; 1234]) -> bool {
     a == b
 }
 
-// CHECK-LABEL: @array_eq_zero(i128 %0)
+// CHECK-LABEL: @array_eq_zero_short(i48
 #[no_mangle]
-pub fn array_eq_zero(x: [u16; 8]) -> bool {
+pub fn array_eq_zero_short(x: [u16; 3]) -> bool {
     // CHECK-NEXT: start:
-    // CHECK-NEXT: %[[EQ:.+]] = icmp eq i128 %0, 0
+    // CHECK-NEXT: %[[EQ:.+]] = icmp eq i48 %0, 0
+    // CHECK-NEXT: ret i1 %[[EQ]]
+    x == [0; 3]
+}
+
+// CHECK-LABEL: @array_eq_zero_mid([8 x i16]*
+#[no_mangle]
+pub fn array_eq_zero_mid(x: [u16; 8]) -> bool {
+    // CHECK-NEXT: start:
+    // CHECK-NEXT: bitcast
+    // CHECK-NEXT: %[[LOAD:.+]] = load i128,
+    // CHECK-NEXT: %[[EQ:.+]] = icmp eq i128 %[[LOAD]], 0
     // CHECK-NEXT: ret i1 %[[EQ]]
     x == [0; 8]
 }
+
+// CHECK-LABEL: @array_eq_zero_long([1234 x i16]*
+#[no_mangle]
+pub fn array_eq_zero_long(x: [u16; 1234]) -> bool {
+    // CHECK-NEXT: start:
+    // CHECK-NOT: alloca
+    // CHECK: %[[CMP:.+]] = tail call i32 @{{bcmp|memcmp}}(
+    // CHECK-NEXT: %[[EQ:.+]] = icmp eq i32 %[[CMP]], 0
+    // CHECK-NEXT: ret i1 %[[EQ]]
+    x == [0; 1234]
+}
diff --git a/src/test/codegen/autovectorize-f32x4.rs b/src/test/codegen/autovectorize-f32x4.rs
new file mode 100644
index 0000000000000..6b09c8fc99860
--- /dev/null
+++ b/src/test/codegen/autovectorize-f32x4.rs
@@ -0,0 +1,32 @@
+// compile-flags: -C opt-level=3
+// only-x86_64
+#![crate_type = "lib"]
+
+// CHECK-LABEL: @auto_vectorize_direct
+#[no_mangle]
+pub fn auto_vectorize_direct(a: [f32; 4], b: [f32; 4]) -> [f32; 4] {
+// CHECK: load <4 x float>
+// CHECK: load <4 x float>
+// CHECK: fadd <4 x float>
+// CHECK: store <4 x float>
+    [
+        a[0] + b[0],
+        a[1] + b[1],
+        a[2] + b[2],
+        a[3] + b[3],
+    ]
+}
+
+// CHECK-LABEL: @auto_vectorize_loop
+#[no_mangle]
+pub fn auto_vectorize_loop(a: [f32; 4], b: [f32; 4]) -> [f32; 4] {
+// CHECK: load <4 x float>
+// CHECK: load <4 x float>
+// CHECK: fadd <4 x float>
+// CHECK: store <4 x float>
+    let mut c = [0.0; 4];
+    for i in 0..4 {
+        c[i] = a[i] + b[i];
+    }
+    c
+}
diff --git a/src/test/codegen/union-abi.rs b/src/test/codegen/union-abi.rs
index 99576a5f57e90..01f917b291094 100644
--- a/src/test/codegen/union-abi.rs
+++ b/src/test/codegen/union-abi.rs
@@ -63,16 +63,11 @@ pub union UnionU128{a:u128}
 #[no_mangle]
 pub fn test_UnionU128(_: UnionU128) -> UnionU128 { loop {} }
 
-pub union UnionU128x2{a:(u128, u128)}
-// CHECK: define void @test_UnionU128x2(i128 %_1.0, i128 %_1.1)
-#[no_mangle]
-pub fn test_UnionU128x2(_: UnionU128x2) { loop {} }
-
 #[repr(C)]
-pub union CUnionU128x2{a:(u128, u128)}
-// CHECK: define void @test_CUnionU128x2(%CUnionU128x2* {{.*}} %_1)
+pub union CUnionU128{a:u128}
+// CHECK: define void @test_CUnionU128(%CUnionU128* {{.*}} %_1)
 #[no_mangle]
-pub fn test_CUnionU128x2(_: CUnionU128x2) { loop {} }
+pub fn test_CUnionU128(_: CUnionU128) { loop {} }
 
 pub union UnionBool { b:bool }
 // CHECK: define noundef zeroext i1 @test_UnionBool(i8 %b)