From 7a0e1ef6dc6b5b2d58ed782ac17aca3fed2f04b4 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 30 Dec 2021 12:43:00 -0800 Subject: [PATCH] Fix spillslot size bug in SIMD by removing type-dependent spillslot allocation. This patch makes spillslot allocation, spilling and reloading all based on register class only. Hence when we have a 32- or 64-bit value in a 128-bit XMM register on x86-64 or vector register on aarch64, this results in larger spillslots and spills/restores. Why make this change, if it results in less efficient stack-frame usage? Simply put, it is safer: there is always a risk when allocating spillslots or spilling/reloading that we get the wrong type and make the spillslot or the store/load too small. This was one contributing factor to CVE-2021-32629, and is now the source of a fuzzbug in SIMD code that puns an arbitrary user-controlled vector constant over another stackslot. (If this were a pointer, that could result in RCE. SIMD is not yet on by default in a release, fortunately. In particular, we have not been particularly careful about using moves between values of different types, for example with `raw_bitcast` or with certain SIMD operations, and such moves indicate to regalloc.rs that vregs are in equivalence classes and some arbitrary vreg in the class is provided when allocating the spillslot or spilling/reloading. Since regalloc.rs does not track actual type, and since we haven't been careful about moves, we can't really trust this "arbitrary vreg in equivalence class" to provide accurate type information. In the fix to CVE-2021-32629 we fixed this for integer registers by always spilling/reloading 64 bits; this fix can be seen as the analogous change for FP/vector regs. --- cranelift/codegen/src/isa/aarch64/abi.rs | 9 +-- cranelift/codegen/src/isa/arm32/abi.rs | 2 +- cranelift/codegen/src/isa/s390x/abi.rs | 8 +- cranelift/codegen/src/isa/x64/abi.rs | 9 +-- cranelift/codegen/src/machinst/abi.rs | 13 +--- cranelift/codegen/src/machinst/abi_impl.rs | 73 +++++-------------- cranelift/codegen/src/machinst/vcode.rs | 15 ++-- .../filetests/filetests/isa/aarch64/call.clif | 32 ++++---- .../filetests/filetests/isa/x64/fastcall.clif | 60 +++++++-------- .../spillslot-size-fuzzbug.wast | 11 +++ 10 files changed, 99 insertions(+), 133 deletions(-) create mode 100644 tests/misc_testsuite/spillslot-size-fuzzbug.wast diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 2a869185d454..44b848edc547 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -1145,12 +1145,11 @@ impl ABIMachineSpec for AArch64MachineDeps { insts } - fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 { + fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 { // We allocate in terms of 8-byte slots. - match (rc, ty) { - (RegClass::I64, _) => 1, - (RegClass::V128, F32) | (RegClass::V128, F64) => 1, - (RegClass::V128, _) => 2, + match rc { + RegClass::I64 => 1, + RegClass::V128 => 2, _ => panic!("Unexpected register class!"), } } diff --git a/cranelift/codegen/src/isa/arm32/abi.rs b/cranelift/codegen/src/isa/arm32/abi.rs index aba6e548d854..4fcf42386027 100644 --- a/cranelift/codegen/src/isa/arm32/abi.rs +++ b/cranelift/codegen/src/isa/arm32/abi.rs @@ -436,7 +436,7 @@ impl ABIMachineSpec for Arm32MachineDeps { unimplemented!("StructArgs not implemented for ARM32 yet"); } - fn get_number_of_spillslots_for_value(rc: RegClass, _ty: Type) -> u32 { + fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 { match rc { RegClass::I32 => 1, _ => panic!("Unexpected register class!"), diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index 8712d0de2818..5dd7dd55c688 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -685,11 +685,11 @@ impl ABIMachineSpec for S390xMachineDeps { unimplemented!("StructArgs not implemented for S390X yet"); } - fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 { + fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 { // We allocate in terms of 8-byte slots. - match (rc, ty) { - (RegClass::I64, _) => 1, - (RegClass::F64, _) => 1, + match rc { + RegClass::I64 => 1, + RegClass::F64 => 1, _ => panic!("Unexpected register class!"), } } diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 7a4352b38f18..9687ac5f26c4 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -719,12 +719,11 @@ impl ABIMachineSpec for X64ABIMachineSpec { insts } - fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 { + fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 { // We allocate in terms of 8-byte slots. - match (rc, ty) { - (RegClass::I64, _) => 1, - (RegClass::V128, types::F32) | (RegClass::V128, types::F64) => 1, - (RegClass::V128, _) => 2, + match rc { + RegClass::I64 => 1, + RegClass::V128 => 2, _ => panic!("Unexpected register class!"), } } diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index d9e1e3c6ae3b..d0e60a9ef838 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -161,21 +161,16 @@ pub trait ABICallee { fn stack_args_size(&self) -> u32; /// Get the spill-slot size. - fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32; + fn get_spillslot_size(&self, rc: RegClass) -> u32; - /// Generate a spill. The type, if known, is given; this can be used to - /// generate a store instruction optimized for the particular type rather - /// than the RegClass (e.g., only F64 that resides in a V128 register). If - /// no type is given, the implementation should spill the whole register. - fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option) -> Self::I; + /// Generate a spill. + fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> Self::I; - /// Generate a reload (fill). As for spills, the type may be given to allow - /// a more optimized load instruction to be generated. + /// Generate a reload (fill). fn gen_reload( &self, to_reg: Writable, from_slot: SpillSlot, - ty: Option, ) -> Self::I; } diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index 0da169b42cc2..5126752db02c 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -502,9 +502,8 @@ pub trait ABIMachineSpec { size: usize, ) -> SmallVec<[Self::I; 8]>; - /// Get the number of spillslots required for the given register-class and - /// type. - fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32; + /// Get the number of spillslots required for the given register-class. + fn get_number_of_spillslots_for_value(rc: RegClass) -> u32; /// Get the current virtual-SP offset from an instruction-emission state. fn get_virtual_sp_offset_from_state(s: &::State) -> i64; @@ -658,6 +657,17 @@ fn get_special_purpose_param_register( } } +fn ty_from_class(class: RegClass) -> Type { + match class { + RegClass::I32 => I32, + RegClass::I64 => I64, + RegClass::F32 => F32, + RegClass::F64 => F64, + RegClass::V128 => I8X16, + _ => panic!("Unknown regclass: {:?}", class), + } +} + impl ABICalleeImpl { /// Create a new body ABI instance. pub fn new(f: &ir::Function, flags: settings::Flags) -> CodegenResult { @@ -856,26 +866,6 @@ fn generate_gv( } } -/// Return a type either from an optional type hint, or if not, from the default -/// type associated with the given register's class. This is used to generate -/// loads/spills appropriately given the type of value loaded/stored (which may -/// be narrower than the spillslot). We usually have the type because the -/// regalloc usually provides the vreg being spilled/reloaded, and we know every -/// vreg's type. However, the regalloc *can* request a spill/reload without an -/// associated vreg when needed to satisfy a safepoint (which requires all -/// ref-typed values, even those in real registers in the original vcode, to be -/// in spillslots). -fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option) -> Type { - match (ty, r.get_class()) { - // If the type is provided - (Some(t), _) => t, - // If no type is provided, this should be a register spill for a - // safepoint, so we only expect I32/I64 (integer) registers. - (None, rc) if rc == M::word_reg_class() => M::word_type(), - _ => panic!("Unexpected register class!"), - } -} - fn gen_load_stack_multi( from: StackAMode, dst: ValueRegs>, @@ -1203,14 +1193,6 @@ impl ABICallee for ABICalleeImpl { let sp_off = self.stackslots_size as i64 + spill_off; log::trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - // Integer types smaller than word size have been spilled as words below, - // and therefore must be reloaded in the same type. - let ty = if ty.is_int() && ty.bytes() < M::word_bytes() { - M::word_type() - } else { - ty - }; - gen_load_stack_multi::(StackAMode::NominalSPOffset(sp_off, ty), into_regs, ty) } @@ -1227,18 +1209,6 @@ impl ABICallee for ABICalleeImpl { let sp_off = self.stackslots_size as i64 + spill_off; log::trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - // When reloading from a spill slot, we might have lost information about real integer - // types. For instance, on the x64 backend, a zero-extension can become spurious and - // optimized into a move, causing vregs of types I32 and I64 to share the same coalescing - // equivalency class. As a matter of fact, such a value can be spilled as an I32 and later - // reloaded as an I64; to make sure the high bits are always defined, do a word-sized store - // all the time, in this case. - let ty = if ty.is_int() && ty.bytes() < M::word_bytes() { - M::word_type() - } else { - ty - }; - gen_store_stack_multi::(StackAMode::NominalSPOffset(sp_off, ty), from_regs, ty) } @@ -1383,25 +1353,20 @@ impl ABICallee for ABICalleeImpl { self.sig.stack_arg_space as u32 } - fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 { - M::get_number_of_spillslots_for_value(rc, ty) + fn get_spillslot_size(&self, rc: RegClass) -> u32 { + M::get_number_of_spillslots_for_value(rc) } - fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option) -> Self::I { - let ty = ty_from_ty_hint_or_reg_class::(from_reg.to_reg(), ty); + fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> Self::I { + let ty = ty_from_class(from_reg.to_reg().get_class()); self.store_spillslot(to_slot, ty, ValueRegs::one(from_reg.to_reg())) .into_iter() .next() .unwrap() } - fn gen_reload( - &self, - to_reg: Writable, - from_slot: SpillSlot, - ty: Option, - ) -> Self::I { - let ty = ty_from_ty_hint_or_reg_class::(to_reg.to_reg().to_reg(), ty); + fn gen_reload(&self, to_reg: Writable, from_slot: SpillSlot) -> Self::I { + let ty = ty_from_class(to_reg.to_reg().get_class()); self.load_spillslot( from_slot, ty, diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 96a1acc90978..0e8300f49d58 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -688,24 +688,21 @@ impl RegallocFunction for VCode { self.vreg_types.len() } - fn get_spillslot_size(&self, regclass: RegClass, vreg: VirtualReg) -> u32 { - let ty = self.vreg_type(vreg); - self.abi.get_spillslot_size(regclass, ty) + fn get_spillslot_size(&self, regclass: RegClass, _: VirtualReg) -> u32 { + self.abi.get_spillslot_size(regclass) } - fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: Option) -> I { - let ty = vreg.map(|v| self.vreg_type(v)); - self.abi.gen_spill(to_slot, from_reg, ty) + fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, _: Option) -> I { + self.abi.gen_spill(to_slot, from_reg) } fn gen_reload( &self, to_reg: Writable, from_slot: SpillSlot, - vreg: Option, + _: Option, ) -> I { - let ty = vreg.map(|v| self.vreg_type(v)); - self.abi.gen_reload(to_reg, from_slot, ty) + self.abi.gen_reload(to_reg, from_slot) } fn gen_move(&self, to_reg: Writable, from_reg: RealReg, vreg: VirtualReg) -> I { diff --git a/cranelift/filetests/filetests/isa/aarch64/call.clif b/cranelift/filetests/filetests/isa/aarch64/call.clif index dcbf1edbc443..21ba70c98745 100644 --- a/cranelift/filetests/filetests/isa/aarch64/call.clif +++ b/cranelift/filetests/filetests/isa/aarch64/call.clif @@ -133,28 +133,28 @@ block0: ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: sub sp, sp, #32 +; nextln: sub sp, sp, #48 ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: str s0, [sp] +; nextln: str q0, [sp] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: str d0, [sp, #8] +; nextln: str q0, [sp, #16] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: str d0, [sp, #16] +; nextln: str q0, [sp, #32] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: ldr s0, [sp] +; nextln: ldr q0, [sp] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: ldr d0, [sp, #8] +; nextln: ldr q0, [sp, #16] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: ldr d0, [sp, #16] +; nextln: ldr q0, [sp, #32] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: add sp, sp, #32 +; nextln: add sp, sp, #48 ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -223,28 +223,28 @@ block0: ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: sub sp, sp, #32 -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: str s0, [sp] +; nextln: sub sp, sp, #48 ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: str d0, [sp, #8] +; nextln: str q0, [sp] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 ; nextln: str q0, [sp, #16] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: ldr s0, [sp] +; nextln: str q0, [sp, #32] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: ldr d0, [sp, #8] +; nextln: ldr q0, [sp] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 ; nextln: ldr q0, [sp, #16] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: add sp, sp, #32 +; nextln: ldr q0, [sp, #32] +; nextln: ldr x0, 8 ; b 12 ; data +; nextln: blr x0 +; nextln: add sp, sp, #48 ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret diff --git a/cranelift/filetests/filetests/isa/x64/fastcall.clif b/cranelift/filetests/filetests/isa/x64/fastcall.clif index 2081bf5ebb5a..bead5b77f4c7 100644 --- a/cranelift/filetests/filetests/isa/x64/fastcall.clif +++ b/cranelift/filetests/filetests/isa/x64/fastcall.clif @@ -238,34 +238,34 @@ block0(v0: i64): ; nextln: unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } ; nextln: movq %rsp, %rbp ; nextln: unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 160 } -; nextln: subq $$192, %rsp -; nextln: movdqu %xmm6, 32(%rsp) +; nextln: subq $$224, %rsp +; nextln: movdqu %xmm6, 64(%rsp) ; nextln: unwind SaveReg { clobber_offset: 0, reg: r6V } -; nextln: movdqu %xmm7, 48(%rsp) +; nextln: movdqu %xmm7, 80(%rsp) ; nextln: unwind SaveReg { clobber_offset: 16, reg: r7V } -; nextln: movdqu %xmm8, 64(%rsp) +; nextln: movdqu %xmm8, 96(%rsp) ; nextln: unwind SaveReg { clobber_offset: 32, reg: r8V } -; nextln: movdqu %xmm9, 80(%rsp) +; nextln: movdqu %xmm9, 112(%rsp) ; nextln: unwind SaveReg { clobber_offset: 48, reg: r9V } -; nextln: movdqu %xmm10, 96(%rsp) +; nextln: movdqu %xmm10, 128(%rsp) ; nextln: unwind SaveReg { clobber_offset: 64, reg: r10V } -; nextln: movdqu %xmm11, 112(%rsp) +; nextln: movdqu %xmm11, 144(%rsp) ; nextln: unwind SaveReg { clobber_offset: 80, reg: r11V } -; nextln: movdqu %xmm12, 128(%rsp) +; nextln: movdqu %xmm12, 160(%rsp) ; nextln: unwind SaveReg { clobber_offset: 96, reg: r12V } -; nextln: movdqu %xmm13, 144(%rsp) +; nextln: movdqu %xmm13, 176(%rsp) ; nextln: unwind SaveReg { clobber_offset: 112, reg: r13V } -; nextln: movdqu %xmm14, 160(%rsp) +; nextln: movdqu %xmm14, 192(%rsp) ; nextln: unwind SaveReg { clobber_offset: 128, reg: r14V } -; nextln: movdqu %xmm15, 176(%rsp) +; nextln: movdqu %xmm15, 208(%rsp) ; nextln: unwind SaveReg { clobber_offset: 144, reg: r15V } ; nextln: movsd 0(%rcx), %xmm4 ; nextln: movsd 8(%rcx), %xmm1 ; nextln: movsd 16(%rcx), %xmm0 -; nextln: movsd %xmm0, rsp(16 + virtual offset) +; nextln: movdqu %xmm0, rsp(32 + virtual offset) ; nextln: movsd 24(%rcx), %xmm3 ; nextln: movsd 32(%rcx), %xmm0 -; nextln: movsd %xmm0, rsp(24 + virtual offset) +; nextln: movdqu %xmm0, rsp(48 + virtual offset) ; nextln: movsd 40(%rcx), %xmm5 ; nextln: movsd 48(%rcx), %xmm6 ; nextln: movsd 56(%rcx), %xmm7 @@ -278,24 +278,24 @@ block0(v0: i64): ; nextln: movsd 112(%rcx), %xmm14 ; nextln: movsd 120(%rcx), %xmm15 ; nextln: movsd 128(%rcx), %xmm0 -; nextln: movsd %xmm0, rsp(0 + virtual offset) +; nextln: movdqu %xmm0, rsp(0 + virtual offset) ; nextln: movsd 136(%rcx), %xmm0 ; nextln: movsd 144(%rcx), %xmm2 -; nextln: movsd %xmm2, rsp(8 + virtual offset) +; nextln: movdqu %xmm2, rsp(16 + virtual offset) ; nextln: movsd 152(%rcx), %xmm2 ; nextln: addsd %xmm1, %xmm4 -; nextln: movsd rsp(16 + virtual offset), %xmm1 +; nextln: movdqu rsp(32 + virtual offset), %xmm1 ; nextln: addsd %xmm3, %xmm1 -; nextln: movsd rsp(24 + virtual offset), %xmm3 +; nextln: movdqu rsp(48 + virtual offset), %xmm3 ; nextln: addsd %xmm5, %xmm3 ; nextln: addsd %xmm7, %xmm6 ; nextln: addsd %xmm9, %xmm8 ; nextln: addsd %xmm11, %xmm10 ; nextln: addsd %xmm13, %xmm12 ; nextln: addsd %xmm15, %xmm14 -; nextln: movsd rsp(0 + virtual offset), %xmm5 +; nextln: movdqu rsp(0 + virtual offset), %xmm5 ; nextln: addsd %xmm0, %xmm5 -; nextln: movsd rsp(8 + virtual offset), %xmm0 +; nextln: movdqu rsp(16 + virtual offset), %xmm0 ; nextln: addsd %xmm2, %xmm0 ; nextln: addsd %xmm1, %xmm4 ; nextln: addsd %xmm6, %xmm3 @@ -307,17 +307,17 @@ block0(v0: i64): ; nextln: addsd %xmm8, %xmm4 ; nextln: addsd %xmm5, %xmm4 ; nextln: movaps %xmm4, %xmm0 -; nextln: movdqu 32(%rsp), %xmm6 -; nextln: movdqu 48(%rsp), %xmm7 -; nextln: movdqu 64(%rsp), %xmm8 -; nextln: movdqu 80(%rsp), %xmm9 -; nextln: movdqu 96(%rsp), %xmm10 -; nextln: movdqu 112(%rsp), %xmm11 -; nextln: movdqu 128(%rsp), %xmm12 -; nextln: movdqu 144(%rsp), %xmm13 -; nextln: movdqu 160(%rsp), %xmm14 -; nextln: movdqu 176(%rsp), %xmm15 -; nextln: addq $$192, %rsp +; nextln: movdqu 64(%rsp), %xmm6 +; nextln: movdqu 80(%rsp), %xmm7 +; nextln: movdqu 96(%rsp), %xmm8 +; nextln: movdqu 112(%rsp), %xmm9 +; nextln: movdqu 128(%rsp), %xmm10 +; nextln: movdqu 144(%rsp), %xmm11 +; nextln: movdqu 160(%rsp), %xmm12 +; nextln: movdqu 176(%rsp), %xmm13 +; nextln: movdqu 192(%rsp), %xmm14 +; nextln: movdqu 208(%rsp), %xmm15 +; nextln: addq $$224, %rsp ; nextln: movq %rbp, %rsp ; nextln: popq %rbp ; nextln: ret diff --git a/tests/misc_testsuite/spillslot-size-fuzzbug.wast b/tests/misc_testsuite/spillslot-size-fuzzbug.wast new file mode 100644 index 000000000000..287b9274474a --- /dev/null +++ b/tests/misc_testsuite/spillslot-size-fuzzbug.wast @@ -0,0 +1,11 @@ +(module + (func (export "test") (result f32 f32) + i32.const 0 + f32.convert_i32_s + v128.const i32x4 0 0 0 0 + data.drop 0 + f32x4.extract_lane 0 + data.drop 0) + (data "")) + +(assert_return (invoke "test") (f32.const 0.0) (f32.const 0.0))