Skip to content

Commit

Permalink
Auto merge of #126444 - tesuji:gvn-const-arrays, r=<try>
Browse files Browse the repository at this point in the history
gvn: Promote/propagate const local array

Rewriting of #125916 which used `PromoteTemps` pass.

This allows promoting constant local arrays as anonymous constants. So that's in codegen for
a local array, rustc outputs `llvm.memcpy` (which is easy for LLVM to optimize) instead of a series
of `store` on stack (a.k.a in-place initialization). This makes rustc on par with clang on this specific case.
See more in #73825 or [zulip][opsem] for more info.

[Here is a simple micro benchmark][bench] that shows the performance differences between promoting arrays or not.

[Prior discussions on zulip][opsem].

This patch [saves about 600 KB][perf] (~0.5%) of `librustc_driver.so`.
![image](https://github.com/rust-lang/rust/assets/15225902/0e37559c-f5d9-4cdf-b7e3-a2956fd17bc1)

Fix #73825

r? cjgillot

### Unresolved questions
- [ ] Should we ignore nested arrays?
    I think that promoting nested arrays is bloating codegen.
- [ ] Should stack_threshold be at least 32 bytes? Like the benchmark showed.
    If yes, the test should be updated to make arrays larger than 32 bytes.
- [x] ~Is this concerning that  `call(move _1)` is now `call(const [array])`?~
  It reverted back to `call(move _1)`

[opsem]: https://rust-lang.zulipchat.com/#narrow/stream/136281-t-opsem/topic/Could.20const.20read-only.20arrays.20be.20const.20promoted.3F
[bench]: rust-lang/rust-clippy#12854 (comment)
[perf]: https://perf.rust-lang.org/compare.html?start=f9515fdd5aa132e27d9b580a35b27f4b453251c1&end=7e160d4b55bb5a27be0696f45db247ccc2e166d9&stat=size%3Alinked_artifact&tab=artifact-size
  • Loading branch information
bors committed Jul 14, 2024
2 parents 88fa119 + c15eb60 commit b6d6d25
Show file tree
Hide file tree
Showing 10 changed files with 266 additions and 16 deletions.
36 changes: 24 additions & 12 deletions compiler/rustc_mir_transform/src/gvn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ fn propagate_ssa<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
let value = state.simplify_rvalue(rvalue, location);
// FIXME(#112651) `rvalue` may have a subtype to `local`. We can only mark `local` as
// reusable if we have an exact type match.
if state.local_decls[local].ty != rvalue.ty(state.local_decls, tcx) {
if state.local_decls[local].ty != rvalue.ty(state.local_decls, state.tcx) {
return;
}
value
Expand Down Expand Up @@ -382,7 +382,8 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
let ty = match kind {
AggregateTy::Array => {
assert!(fields.len() > 0);
Ty::new_array(self.tcx, fields[0].layout.ty, fields.len() as u64)
let field_ty = fields[0].layout.ty;
Ty::new_array(self.tcx, field_ty, fields.len() as u64)
}
AggregateTy::Tuple => {
Ty::new_tup_from_iter(self.tcx, fields.iter().map(|f| f.layout.ty))
Expand All @@ -406,7 +407,9 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
};
let ptr_imm = Immediate::new_pointer_with_meta(data, meta, &self.ecx);
ImmTy::from_immediate(ptr_imm, ty).into()
} else if matches!(ty.abi, Abi::Scalar(..) | Abi::ScalarPair(..)) {
} else if matches!(kind, AggregateTy::Array)
|| matches!(ty.abi, Abi::Scalar(..) | Abi::ScalarPair(..))
{
let dest = self.ecx.allocate(ty, MemoryKind::Stack).ok()?;
let variant_dest = if let Some(variant) = variant {
self.ecx.project_downcast(&dest, variant).ok()?
Expand All @@ -418,9 +421,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
self.ecx.copy_op(op, &field_dest).ok()?;
}
self.ecx.write_discriminant(variant.unwrap_or(FIRST_VARIANT), &dest).ok()?;
self.ecx
.alloc_mark_immutable(dest.ptr().provenance.unwrap().alloc_id())
.ok()?;
let dest = dest.map_provenance(|prov| prov.as_immutable());
dest.into()
} else {
return None;
Expand Down Expand Up @@ -704,7 +705,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
place.projection = self.tcx.mk_place_elems(&projection);
}

trace!(?place);
trace!(after_place = ?place);
}

/// Represent the *value* which would be read from `place`, and point `place` to a preexisting
Expand Down Expand Up @@ -884,7 +885,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
}

let (mut ty, variant_index) = match *kind {
AggregateKind::Array(..) => {
AggregateKind::Array(_) => {
assert!(!field_ops.is_empty());
(AggregateTy::Array, FIRST_VARIANT)
}
Expand Down Expand Up @@ -1347,6 +1348,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
}
}

#[instrument(level = "trace", skip(ecx), ret)]
fn op_to_prop_const<'tcx>(
ecx: &mut InterpCx<'tcx, DummyMachine>,
op: &OpTy<'tcx>,
Expand All @@ -1361,8 +1363,11 @@ fn op_to_prop_const<'tcx>(
return Some(ConstValue::ZeroSized);
}

// Do not synthetize too large constants. Codegen will just memcpy them, which we'd like to avoid.
if !matches!(op.layout.abi, Abi::Scalar(..) | Abi::ScalarPair(..)) {
// Do not synthesize too large constants, except constant arrays.
// For arrays, codegen will just memcpy them, but LLVM will optimize out those unneeded memcpy.
// For others, we'd prefer in-place initialization over memcpy them.
if !(op.layout.ty.is_array() || matches!(op.layout.abi, Abi::Scalar(..) | Abi::ScalarPair(..)))
{
return None;
}

Expand Down Expand Up @@ -1433,6 +1438,7 @@ impl<'tcx> VnState<'_, 'tcx> {
}

/// If `index` is a `Value::Constant`, return the `Constant` to be put in the MIR.
#[instrument(level = "trace", skip(self, index), ret)]
fn try_as_constant(&mut self, index: VnIndex) -> Option<ConstOperand<'tcx>> {
// This was already constant in MIR, do not change it.
if let Value::Constant { value, disambiguator: _ } = *self.get(index)
Expand All @@ -1444,8 +1450,13 @@ impl<'tcx> VnState<'_, 'tcx> {
}

let op = self.evaluated[index].as_ref()?;
if op.layout.is_unsized() {
// Do not attempt to propagate unsized locals.

// Ignore promoted arrays. Promoted arrays are already placed in `.rodata`.
// Which is what we try to archive for running gvn on constant local arrays.
if let Either::Left(mplace) = op.as_mplace_or_imm()
&& mplace.layout.ty.is_array()
&& let Value::Projection(_index, ProjectionElem::Deref) = self.get(index)
{
return None;
}

Expand Down Expand Up @@ -1484,6 +1495,7 @@ impl<'tcx> MutVisitor<'tcx> for VnState<'_, 'tcx> {
self.simplify_operand(operand, location);
}

#[instrument(level = "trace", skip(self, stmt))]
fn visit_statement(&mut self, stmt: &mut Statement<'tcx>, location: Location) {
if let StatementKind::Assign(box (ref mut lhs, ref mut rvalue)) = stmt.kind {
self.simplify_place_projection(lhs, location);
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_mir_transform/src/ssa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ impl SsaLocals {
})
}

#[instrument(level = "trace", skip_all)]
pub fn for_each_assignment_mut<'tcx>(
&self,
basic_blocks: &mut IndexSlice<BasicBlock, BasicBlockData<'tcx>>,
Expand Down
51 changes: 51 additions & 0 deletions tests/codegen/issue-73825-gvn-const-local-array.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// issue: <https://github.com/rust-lang/rust/issues/73825>
//@ compile-flags: -C opt-level=1
#![crate_type = "lib"]

// CHECK-LABEL: @foo
// CHECK-NEXT: start:
// CHECK-NEXT: %_3 = and i64 %x, 63
// CHECK-NEXT: %0 = getelementptr inbounds [64 x i32], ptr @0, i64 0, i64 %_3
// CHECK-NEXT: %_0 = load i32, ptr %0, align 4
// CHECK-NEXT: ret i32 %_0
#[no_mangle]
#[rustfmt::skip]
pub fn foo(x: usize) -> i32 {
let base: [i32; 64] = [
67, 754, 860, 559, 368, 870, 548, 972,
141, 731, 351, 664, 32, 4, 996, 741,
203, 292, 237, 480, 151, 940, 777, 540,
143, 587, 747, 65, 152, 517, 882, 880,
712, 595, 370, 901, 237, 53, 789, 785,
912, 650, 896, 367, 316, 392, 62, 473,
675, 691, 281, 192, 445, 970, 225, 425,
628, 324, 322, 206, 912, 867, 462, 92
];
base[x % 64]
}

// This checks whether LLVM de-duplicates `promoted` array and `base` array.
// Because in MIR, `&[..]` is already promoted by promote pass. GVN keeps promoting
// `*&[..]` to `const [..]` again.
//
// CHECK-LABEL: @deduplicability
// CHECK-NEXT: start:
// CHECK-NEXT: %_3 = and i64 %x, 63
// CHECK-NEXT: %0 = getelementptr inbounds [64 x i32], ptr @0, i64 0, i64 %_3
// CHECK-NEXT: %_0 = load i32, ptr %0, align 4
// CHECK-NEXT: ret i32 %_0
#[no_mangle]
#[rustfmt::skip]
pub fn deduplicability(x: usize) -> i32 {
let promoted = *&[
67i32, 754, 860, 559, 368, 870, 548, 972,
141, 731, 351, 664, 32, 4, 996, 741,
203, 292, 237, 480, 151, 940, 777, 540,
143, 587, 747, 65, 152, 517, 882, 880,
712, 595, 370, 901, 237, 53, 789, 785,
912, 650, 896, 367, 316, 392, 62, 473,
675, 691, 281, 192, 445, 970, 225, 425,
628, 324, 322, 206, 912, 867, 462, 92
];
promoted[x % 64]
}
127 changes: 127 additions & 0 deletions tests/mir-opt/const_array_locals.main.GVN.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
- // MIR for `main` before GVN
+ // MIR for `main` after GVN

fn main() -> () {
let mut _0: ();
let _1: [i32; 5];
let mut _4: [i32; 5];
let mut _5: [i32; 5];
let mut _7: &[i32; 5];
let _8: [i32; 5];
let _9: ();
let mut _10: [u32; 5];
let mut _12: [f32; 8];
let _13: [[i32; 3]; 3];
let mut _14: [i32; 3];
let mut _15: [i32; 3];
let mut _16: [i32; 3];
scope 1 {
debug _arr => _1;
let _2: [i32; 5];
scope 2 {
debug _duplicated_arr => _2;
let _3: [[i32; 5]; 2];
scope 3 {
debug _foo => _3;
let _6: [i32; 5];
let mut _17: &[i32; 5];
scope 4 {
debug _darr => _6;
let _11: F32x8;
scope 5 {
debug _f => _11;
}
}
}
}
}

bb0: {
StorageLive(_1);
- _1 = [const 255_i32, const 105_i32, const 15_i32, const 39_i32, const 62_i32];
+ _1 = const [255_i32, 105_i32, 15_i32, 39_i32, 62_i32];
StorageLive(_2);
- _2 = [const 255_i32, const 105_i32, const 15_i32, const 39_i32, const 62_i32];
+ _2 = const [255_i32, 105_i32, 15_i32, 39_i32, 62_i32];
StorageLive(_3);
StorageLive(_4);
- _4 = [const 178_i32, const 9_i32, const 4_i32, const 56_i32, const 221_i32];
+ _4 = const [178_i32, 9_i32, 4_i32, 56_i32, 221_i32];
StorageLive(_5);
- _5 = [const 193_i32, const 164_i32, const 194_i32, const 197_i32, const 6_i32];
- _3 = [move _4, move _5];
+ _5 = const [193_i32, 164_i32, 194_i32, 197_i32, 6_i32];
+ _3 = const [[178_i32, 9_i32, 4_i32, 56_i32, 221_i32], [193_i32, 164_i32, 194_i32, 197_i32, 6_i32]];
StorageDead(_5);
StorageDead(_4);
StorageLive(_6);
StorageLive(_7);
_17 = const main::promoted[0];
_7 = &(*_17);
- _6 = (*_7);
+ _6 = (*_17);
StorageDead(_7);
StorageLive(_9);
StorageLive(_10);
- _10 = [const 31_u32, const 96_u32, const 173_u32, const 50_u32, const 1_u32];
- _9 = consume(move _10) -> [return: bb1, unwind continue];
+ _10 = const [31_u32, 96_u32, 173_u32, 50_u32, 1_u32];
+ _9 = consume(const [31_u32, 96_u32, 173_u32, 50_u32, 1_u32]) -> [return: bb1, unwind continue];
}

bb1: {
StorageDead(_10);
StorageDead(_9);
StorageLive(_11);
StorageLive(_12);
- _12 = [const 1f32, const 2f32, const 3f32, const 1f32, const 1f32, const 1f32, const 1f32, const 42f32];
- _11 = F32x8(move _12);
+ _12 = const [1f32, 2f32, 3f32, 1f32, 1f32, 1f32, 1f32, 42f32];
+ _11 = F32x8(const [1f32, 2f32, 3f32, 1f32, 1f32, 1f32, 1f32, 42f32]);
StorageDead(_12);
StorageLive(_13);
StorageLive(_14);
- _14 = [const 1_i32, const 0_i32, const 0_i32];
+ _14 = const [1_i32, 0_i32, 0_i32];
StorageLive(_15);
- _15 = [const 0_i32, const 1_i32, const 0_i32];
+ _15 = const [0_i32, 1_i32, 0_i32];
StorageLive(_16);
- _16 = [const 0_i32, const 0_i32, const 1_i32];
- _13 = [move _14, move _15, move _16];
+ _16 = const [0_i32, 0_i32, 1_i32];
+ _13 = const [[1_i32, 0_i32, 0_i32], [0_i32, 1_i32, 0_i32], [0_i32, 0_i32, 1_i32]];
StorageDead(_16);
StorageDead(_15);
StorageDead(_14);
StorageDead(_13);
_0 = const ();
StorageDead(_11);
StorageDead(_6);
StorageDead(_3);
StorageDead(_2);
StorageDead(_1);
return;
}
}
+
+ ALLOC0 (size: 36, align: 4) { .. }
+
+ ALLOC1 (size: 12, align: 4) { .. }
+
+ ALLOC2 (size: 12, align: 4) { .. }
+
+ ALLOC3 (size: 12, align: 4) { .. }
+
+ ALLOC4 (size: 32, align: 4) { .. }
+
+ ALLOC5 (size: 20, align: 4) { .. }
+
+ ALLOC6 (size: 40, align: 4) { .. }
+
+ ALLOC7 (size: 20, align: 4) { .. }
+
+ ALLOC8 (size: 20, align: 4) { .. }
+
+ ALLOC9 (size: 20, align: 4) { .. }

46 changes: 46 additions & 0 deletions tests/mir-opt/const_array_locals.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//@ test-mir-pass: GVN
//@ compile-flags: -Zdump-mir-exclude-alloc-bytes
#![feature(repr_simd)]

#[repr(simd)]
struct F32x8([f32; 8]);

// EMIT_MIR const_array_locals.main.GVN.diff
// CHECK-LABEL: fn main(
// CHECK: debug _arr => [[_arr:_[0-9]+]];
// CHECK: debug _duplicated_arr => [[_duplicated_arr:_[0-9]+]];
// CHECK: debug _foo => [[_foo:_[0-9]+]];
// CHECK: debug _darr => [[_darr:_[0-9]+]];
// CHECK: debug _f => [[_f:_[0-9]+]];
pub fn main() {
// CHECK: [[_arr]] = const [255_i32, 105_i32, 15_i32, 39_i32, 62_i32];
let _arr = [255, 105, 15, 39, 62];
// CHECK: [[_duplicated_arr]] = const [255_i32, 105_i32, 15_i32, 39_i32, 62_i32];
let _duplicated_arr = [255, 105, 15, 39, 62];
// CHECK: [[subarray1:_[0-9]+]] = const [178_i32, 9_i32, 4_i32, 56_i32, 221_i32];
// CHECK: [[subarray2:_[0-9]+]] = const [193_i32, 164_i32, 194_i32, 197_i32, 6_i32];
// CHECK{LITERAL}: const [[178_i32, 9_i32, 4_i32, 56_i32, 221_i32], [193_i32, 164_i32, 194_i32, 197_i32, 6_i32]];
let _foo = [[178, 9, 4, 56, 221], [193, 164, 194, 197, 6]];
// CHECK: [[PROMOTED:_[0-9]+]] = const main::promoted[0];
// CHECK: [[_darr]] = (*[[PROMOTED]]);
let _darr = *&[254, 42, 15, 39, 62];

// CHECK: [[ARG:_[0-9]+]] = const [31_u32, 96_u32, 173_u32, 50_u32, 1_u32];
// CHECK: consume(const [31_u32, 96_u32, 173_u32, 50_u32, 1_u32])
consume([31, 96, 173, 50, 1]);

// CHECK: [[OP:_[0-9]+]] = const [1f32, 2f32, 3f32, 1f32, 1f32, 1f32, 1f32, 42f32];
// CHECK: [[_f]] = F32x8(const [1f32, 2f32, 3f32, 1f32, 1f32, 1f32, 1f32, 42f32]);
let _f = F32x8([1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 42.0]);

// ice with small arrays
// CHECK: [[A:_[0-9]+]] = const [1_i32, 0_i32, 0_i32];
// CHECK: [[B:_[0-9]+]] = const [0_i32, 1_i32, 0_i32];
// CHECK: [[C:_[0-9]+]] = const [0_i32, 0_i32, 1_i32];
// CHECK{LITERAL}: const [[1_i32, 0_i32, 0_i32], [0_i32, 1_i32, 0_i32], [0_i32, 0_i32, 1_i32]];
[[1, 0, 0], [0, 1, 0], [0, 0, 1]]; // 2D array
}

fn consume(_arr: [u32; 5]) {
unimplemented!()
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
bb0: {
StorageLive(_1);
StorageLive(_2);
_2 = [const 0_u32, const 1_u32, const 2_u32, const 3_u32];
- _2 = [const 0_u32, const 1_u32, const 2_u32, const 3_u32];
+ _2 = const [0_u32, 1_u32, 2_u32, 3_u32];
StorageLive(_3);
_3 = const 2_usize;
- _4 = Len(_2);
Expand All @@ -36,4 +37,6 @@
return;
}
}
+
+ ALLOC0 (size: 16, align: 4) { .. }

Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
bb0: {
StorageLive(_1);
StorageLive(_2);
_2 = [const 0_u32, const 1_u32, const 2_u32, const 3_u32];
- _2 = [const 0_u32, const 1_u32, const 2_u32, const 3_u32];
+ _2 = const [0_u32, 1_u32, 2_u32, 3_u32];
StorageLive(_3);
_3 = const 2_usize;
- _4 = Len(_2);
Expand All @@ -36,4 +37,6 @@
return;
}
}
+
+ ALLOC0 (size: 16, align: 4) { .. }

1 change: 1 addition & 0 deletions tests/mir-opt/const_prop/array_index.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
//@ test-mir-pass: GVN
//@ compile-flags: -Zdump-mir-exclude-alloc-bytes
// EMIT_MIR_FOR_EACH_PANIC_STRATEGY
// EMIT_MIR_FOR_EACH_BIT_WIDTH

Expand Down
Loading

0 comments on commit b6d6d25

Please sign in to comment.