Register allocation failure on x86_64 with simd enabled #3160

alexcrichton · 2021-08-06T15:52:54Z

Found via fuzz-bugs last night using this test case yields:

$ cargo run testcase0.wat --enable-simd
    Finished dev [unoptimized + debuginfo] target(s) in 0.23s
     Running `target/debug/wasmtime testcase0.wat --enable-simd`
 ERROR cranelift_codegen::machinst::compile > Register allocation error for vcode
VCode_ShowWithRRU {{
  Entry block: 0
Block 0:
  (original IR block: block0)
  (successor: Block 1)
  (successor: Block 11)
  (instruction range: 0 .. 6)
  Inst 0:   movq    %rdi, %v0J
  Inst 1:   movq    %rsi, %v1J
  Inst 2:   movq    %rdx, %v2J
  Inst 3:   movl    208(%v0J), %v4Jl
  Inst 4:   testl   %v4Jl, %v4Jl
  Inst 5:   jnz     label1; j label11
Block 1:
  (original IR block: block2)
  (successor: Block 2)
  (successor: Block 10)
  (instruction range: 6 .. 15)
  Inst 6:   movl    $-1, %v133Jl
  Inst 7:   movq    %v133J, %v8J
  Inst 8:   addl    208(%v0J), %v8Jl
  Inst 9:   movl    %v8Jl, 208(%v0J)
  Inst 10:   movl    $1835102836, %v132Jl
  Inst 11:   movl    %v132Jl, %v11Jl
  Inst 12:   movl    $-2147483648, %v131Jl
  Inst 13:   cmpq    %v131J, %v11J
  Inst 14:   jbe     label2; j label10
Block 2:
  (original IR block: block5)
  (successor: Block 3)
  (successor: Block 9)
  (instruction range: 15 .. 69)
  Inst 15:   movq    68(%v0J), %v14J
  Inst 16:   movq    %v14J, %v15J
  Inst 17:   addq    %v11J, %v15J
  Inst 18:   xorq    %v129J, %v129J
  Inst 19:   movl    $-2147483648, %v130Jl
  Inst 20:   cmpq    %v130J, %v11J
  Inst 21:   movq    %v15J, %v17J
  Inst 22:   cmovnbeq %v129J, %v17J
  Inst 23:   movl    $-2147483648, %v128Jl
  Inst 24:   movzbq  25701677(%v17J,%v128J,1), %v19J
  Inst 25:   movq    %v19J, %v20J
  Inst 26:   movsbq  %v20Jb, %v21J
  Inst 27:   movl    $-606395173, %v127Jl
  Inst 28:   movl    %v127Jl, %v22Jl
  Inst 29:   movw    %v21Jw, 419430398(%v14J,%v22J,1)
  Inst 30:   movl    $694575416, %v125Jl
  Inst 31:   movd    %v125Jl, %v124V
  Inst 32:   movl    $2147483647, %v126Jl
  Inst 33:   movd    %v126Jl, %v25V
  Inst 34:   andps   %v124V, %v25V
  Inst 35:   movl    $-2147483648, %v123Jl
  Inst 36:   movd    %v123Jl, %v26V
  Inst 37:   xorps   %v25V, %v26V
  Inst 38:   movaps  %v26V, %v120V
  Inst 39:   cvt_float32_to_uint64_seq %v120V, %v27J
  Inst 40:   movl    $-404232217, %v116Jl
  Inst 41:   movd    %v116Jl, %v115V
  Inst 42:   movaps  %v115V, %v117V
  Inst 43:   cvt_float32_to_sint32_seq %v117V, %v29Jl
  Inst 44:   movl    %v29Jl, %v30Jl
  Inst 45:   movq    757935405(%v14J,%v30J,1), %v32J
  Inst 46:   cmpq    %v32J, %v27J
  Inst 47:   setz    %v33Jb
  Inst 48:   movq    %v33J, %v34J
  Inst 49:   andq    $1, %v34J
  Inst 50:   movl    %v34Jl, %v35Jl
  Inst 51:   uninit  %v38V
  Inst 52:   pinsrd.w $0, 757935405(%v14J,%v35J,1), %v38V
  Inst 53:   pinsrd.w $1, 757935405(%v14J,%v35J,1), %v38V
  Inst 54:   movdqa  %v38V, %v39V
  Inst 55:   movdqa  %v39V, %v40V
  Inst 56:   palignr $8, %v39V, %v40V
  Inst 57:   pmovsxbw %v40V, %v40V
  Inst 58:   movdqa  %v40V, %v41V
  Inst 59:   pxor    %v114V, %v114V
  Inst 60:   pcmpeqd %v41V, %v114V
  Inst 61:   ptest   %v114V, %v114V
  Inst 62:   setz    %v42Jb
  Inst 63:   movq    %v42J, %v43J
  Inst 64:   andq    $1, %v43J
  Inst 65:   movl    %v43Jl, %v44Jl
  Inst 66:   movl    $-2147483648, %v113Jl
  Inst 67:   cmpq    %v113J, %v44J
  Inst 68:   jbe     label3; j label9
Block 3:
  (original IR block: block7)
  (successor: Block 4)
  (successor: Block 8)
  (instruction range: 69 .. 99)
  Inst 69:   movq    %v14J, %v46J
  Inst 70:   addq    %v44J, %v46J
  Inst 71:   xorq    %v111J, %v111J
  Inst 72:   movl    $-2147483648, %v112Jl
  Inst 73:   cmpq    %v112J, %v44J
  Inst 74:   movq    %v46J, %v48J
  Inst 75:   cmovnbeq %v111J, %v48J
  Inst 76:   movl    $-2147483648, %v110Jl
  Inst 77:   movq    26291501(%v48J,%v110J,1), %v50J
  Inst 78:   movq    76(%v0J), %v51J
  Inst 79:   movq    %v51J, %v53J
  Inst 80:   shrq    $16, %v53J
  Inst 81:   movq    %v53J, %v54J
  Inst 82:   movl    %v54Jl, %v55Jl
  Inst 83:   movl    1667575909(%v14J,%v55J,1), %v57Jl
  Inst 84:   movl    96(%v0J), %v64Jl
  Inst 85:   pmovsxbw 808597554(%v14J,%v64J,1), %v66V
  Inst 86:   movdqa  %v67V, %v68V
  Inst 87:   pextrb  $13, %v68V, %v69J
  Inst 88:   movsbl  %v69Jb, %v70Jl
  Inst 89:   movl    %v70Jl, %v71Jl
  Inst 90:   movq    770941057(%v14J,%v71J,1), %v73J
  Inst 91:   movl    $-757935404, %v106Jl
  Inst 92:   movd    %v106Jl, %v105V
  Inst 93:   movaps  %v105V, %v107V
  Inst 94:   cvt_float32_to_uint32_seq %v107V, %v75Jl
  Inst 95:   movl    %v75Jl, %v76Jl
  Inst 96:   movl    $-2147483648, %v104Jl
  Inst 97:   cmpq    %v104J, %v76J
  Inst 98:   jbe     label4; j label8
Block 4:
  (original IR block: block9)
  (successor: Block 5)
  (successor: Block 7)
  (instruction range: 99 .. 116)
  Inst 99:   movq    %v14J, %v78J
  Inst 100:   addq    %v76J, %v78J
  Inst 101:   xorq    %v102J, %v102J
  Inst 102:   movl    $-2147483648, %v103Jl
  Inst 103:   cmpq    %v103J, %v76J
  Inst 104:   movq    %v78J, %v80J
  Inst 105:   cmovnbeq %v102J, %v80J
  Inst 106:   movl    $-2147483648, %v101Jl
  Inst 107:   uninit  %v83V
  Inst 108:   pinsrd.w $0, 1541135323(%v80J,%v101J,1), %v83V
  Inst 109:   pinsrd.w $1, 1541135323(%v80J,%v101J,1), %v83V
  Inst 110:   pextrd.w $1, %v83V, %v84J
  Inst 111:   movq    %v84J, %v85J
  Inst 112:   movl    %v85Jl, %v86Jl
  Inst 113:   movl    $-2147483648, %v100Jl
  Inst 114:   cmpq    %v100J, %v86J
  Inst 115:   jbe     label5; j label7
Block 5:
  (original IR block: block11)
  (successor: Block 6)
  (instruction range: 116 .. 126)
  Inst 116:   movq    %v14J, %v88J
  Inst 117:   addq    %v86J, %v88J
  Inst 118:   xorq    %v98J, %v98J
  Inst 119:   movl    $-2147483648, %v99Jl
  Inst 120:   cmpq    %v99J, %v86J
  Inst 121:   movq    %v88J, %v90J
  Inst 122:   cmovnbeq %v98J, %v90J
  Inst 123:   movl    $-2147483648, %v97Jl
  Inst 124:   movq    1528889120(%v90J,%v97J,1), %v92J
  Inst 125:   jmp     label6
Block 6:
  (original IR block: block1)
  (instruction range: 126 .. 133)
  Inst 126:   xorl    %v95Jl, %v95Jl
  Inst 127:   movq    %v95J, %v93J
  Inst 128:   xorl    %v96Jl, %v96Jl
  Inst 129:   movq    %v96J, %v94J
  Inst 130:   movq    %v93J, %rax
  Inst 131:   movq    %v94J, %rdx
  Inst 132:   ret
Block 7:
  (original IR block: block10)
  (instruction range: 133 .. 134)
  Inst 133:   ud2 heap_oob
Block 8:
  (original IR block: block8)
  (instruction range: 134 .. 135)
  Inst 134:   ud2 heap_oob
Block 9:
  (original IR block: block6)
  (instruction range: 135 .. 136)
  Inst 135:   ud2 heap_oob
Block 10:
  (original IR block: block4)
  (instruction range: 136 .. 137)
  Inst 136:   ud2 heap_oob
Block 11:
  (original IR block: block3)
  (instruction range: 137 .. 138)
  Inst 137:   ud2 unreachable
}}

Error: Analysis(EntryLiveinValues([v67V]))
thread '<unnamed>' panicked at 'register allocation: Analysis(EntryLiveinValues([v67V]))', cranelift/codegen/src/machinst/compile.rs:96:10
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace

I'll try to reduce this in a bit

The text was updated successfully, but these errors were encountered:

alexcrichton · 2021-08-06T16:26:17Z

A reduction of this appears to be:

$ cat foo.wat
(module
  (func (result v128)
    v128.const i64x2 0 0
    i16x8.extend_high_i8x16_s
    v128.const i64x2 0 0
    i16x8.mul)
  (memory 1))
$ cargo run --features all-arch compile --enable-simd --target x86_64 ./foo.wat
    Finished dev [unoptimized] target(s) in 0.12s
     Running `target/debug/wasmtime compile --enable-simd --target x86_64 ./foo.wat`
 ERROR cranelift_codegen::machinst::compile > Register allocation error for vcode
VCode_ShowWithRRU {{
  Entry block: 0
Block 0:
  (original IR block: block0)
  (successor: Block 1)
  (instruction range: 0 .. 4)
  Inst 0:   movq    %rdi, %v0J
  Inst 1:   movq    %rsi, %v1J
  Inst 2:   movdqa  %v5V, %v6V
  Inst 3:   jmp     label1
Block 1:
  (original IR block: block1)
  (instruction range: 4 .. 7)
  Inst 4:   movdqa  %v6V, %v7V
  Inst 5:   movdqa  %v7V, %xmm0
  Inst 6:   ret
}}

Error: Analysis(EntryLiveinValues([v5V]))
thread 'main' panicked at 'register allocation: Analysis(EntryLiveinValues([v5V]))', cranelift/codegen/src/machinst/compile.rs:96:10
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace

bjorn3 · 2021-08-06T16:53:54Z

function u0:0(i64 vmctx) -> i8x16 fast {
    const0 = 0x00000000000000000000000000000000

                                block0(v0: i64):
@001d                               v2 = vconst.i8x16 const0
@002f                               v3 = swiden_high v2
@0032                               v4 = vconst.i8x16 const0
@0044                               v5 = raw_bitcast.i16x8 v4
@0044                               v6 = imul v3, v5
@0047                               v7 = raw_bitcast.i8x16 v6
@0047                               jump block1(v7)

                                block1(v1: i8x16):
@0047                               return v1
}

cfallin · 2021-08-06T18:10:51Z

The AnalysisLiveInValues error means a register is used without having been written to first; it seems the movdqa reads v5 here and nothing produced it (so the lowering is missing an instruction somewhere). Digging into this imminently...

cfallin · 2021-08-06T18:19:32Z

The issue appears to be that the lowering for imul with widening ops on the inputs does a pattern-match that gets the swiden on one input, but not on the other; this is supposed to be an unsupported case at the CLIF level, but because of two nested if-lets here, we don't do anything at all for lowering if we get one but not the other:

                if let Some(swiden0_high) = matches_input(ctx, inputs[0], Opcode::SwidenHigh) {
                    if let Some(swiden1_high) = matches_input(ctx, inputs[1], Opcode::SwidenHigh) {

with an else on the outer but not the inner if.

Separately, I think that the discussion that led to "we don't generate this CLIF from any Wasm so we don't need to support the case with zero or one extends" is somehow wrong -- we're clearly getting it here. @jlb6740 I think we may need to just implement the general case?

cfallin · 2021-08-06T18:20:10Z

(For clarity, the "unsupported case" is supposed to hit the panic a bit further down, rather than silently not generate instructions)

cfallin · 2021-08-16T16:19:48Z

@jlb6740 there was another fuzzbug that came in today related to this -- would you mind taking a look when you're able?

jlb6740 · 2021-08-18T22:27:56Z

@cfallin Sorry guys. I do not notice alerts when I'm tagged so missed this but I did see 3161 and submitted a fix for that .. which fixes this too.

jlb6740 · 2021-08-28T21:54:17Z

@alexcrichton I can no longer reproduce this after #3209. I will close, but please reopen as necessary.

alexcrichton added bug Incorrect behavior in the current implementation that needs fixing fuzz-bug Bugs found by a fuzzer wasm-proposal:simd Issues related to the WebAssembly SIMD proposal labels Aug 6, 2021

jlb6740 mentioned this issue Aug 18, 2021

Remove unnecessary, too strict assertion. Fix for 3161. #3209

Merged

jlb6740 closed this as completed Aug 28, 2021

jlb6740 self-assigned this Aug 28, 2021

alexcrichton mentioned this issue Sep 12, 2021

x64: Register allocation failure with simd instructions #3337

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Register allocation failure on x86_64 with simd enabled #3160

Register allocation failure on x86_64 with simd enabled #3160

alexcrichton commented Aug 6, 2021

alexcrichton commented Aug 6, 2021

bjorn3 commented Aug 6, 2021

cfallin commented Aug 6, 2021

cfallin commented Aug 6, 2021

cfallin commented Aug 6, 2021

cfallin commented Aug 16, 2021

jlb6740 commented Aug 18, 2021

jlb6740 commented Aug 28, 2021

Register allocation failure on x86_64 with simd enabled #3160

Register allocation failure on x86_64 with simd enabled #3160

Comments

alexcrichton commented Aug 6, 2021

alexcrichton commented Aug 6, 2021

bjorn3 commented Aug 6, 2021

cfallin commented Aug 6, 2021

cfallin commented Aug 6, 2021

cfallin commented Aug 6, 2021

cfallin commented Aug 16, 2021

jlb6740 commented Aug 18, 2021

jlb6740 commented Aug 28, 2021