Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Register allocation failure on x86_64 with simd enabled #3160

Closed
alexcrichton opened this issue Aug 6, 2021 · 8 comments
Closed

Register allocation failure on x86_64 with simd enabled #3160

alexcrichton opened this issue Aug 6, 2021 · 8 comments
Assignees
Labels
bug Incorrect behavior in the current implementation that needs fixing fuzz-bug Bugs found by a fuzzer wasm-proposal:simd Issues related to the WebAssembly SIMD proposal

Comments

@alexcrichton
Copy link
Member

Found via fuzz-bugs last night using this test case yields:

$ cargo run testcase0.wat --enable-simd
    Finished dev [unoptimized + debuginfo] target(s) in 0.23s
     Running `target/debug/wasmtime testcase0.wat --enable-simd`
 ERROR cranelift_codegen::machinst::compile > Register allocation error for vcode
VCode_ShowWithRRU {{
  Entry block: 0
Block 0:
  (original IR block: block0)
  (successor: Block 1)
  (successor: Block 11)
  (instruction range: 0 .. 6)
  Inst 0:   movq    %rdi, %v0J
  Inst 1:   movq    %rsi, %v1J
  Inst 2:   movq    %rdx, %v2J
  Inst 3:   movl    208(%v0J), %v4Jl
  Inst 4:   testl   %v4Jl, %v4Jl
  Inst 5:   jnz     label1; j label11
Block 1:
  (original IR block: block2)
  (successor: Block 2)
  (successor: Block 10)
  (instruction range: 6 .. 15)
  Inst 6:   movl    $-1, %v133Jl
  Inst 7:   movq    %v133J, %v8J
  Inst 8:   addl    208(%v0J), %v8Jl
  Inst 9:   movl    %v8Jl, 208(%v0J)
  Inst 10:   movl    $1835102836, %v132Jl
  Inst 11:   movl    %v132Jl, %v11Jl
  Inst 12:   movl    $-2147483648, %v131Jl
  Inst 13:   cmpq    %v131J, %v11J
  Inst 14:   jbe     label2; j label10
Block 2:
  (original IR block: block5)
  (successor: Block 3)
  (successor: Block 9)
  (instruction range: 15 .. 69)
  Inst 15:   movq    68(%v0J), %v14J
  Inst 16:   movq    %v14J, %v15J
  Inst 17:   addq    %v11J, %v15J
  Inst 18:   xorq    %v129J, %v129J
  Inst 19:   movl    $-2147483648, %v130Jl
  Inst 20:   cmpq    %v130J, %v11J
  Inst 21:   movq    %v15J, %v17J
  Inst 22:   cmovnbeq %v129J, %v17J
  Inst 23:   movl    $-2147483648, %v128Jl
  Inst 24:   movzbq  25701677(%v17J,%v128J,1), %v19J
  Inst 25:   movq    %v19J, %v20J
  Inst 26:   movsbq  %v20Jb, %v21J
  Inst 27:   movl    $-606395173, %v127Jl
  Inst 28:   movl    %v127Jl, %v22Jl
  Inst 29:   movw    %v21Jw, 419430398(%v14J,%v22J,1)
  Inst 30:   movl    $694575416, %v125Jl
  Inst 31:   movd    %v125Jl, %v124V
  Inst 32:   movl    $2147483647, %v126Jl
  Inst 33:   movd    %v126Jl, %v25V
  Inst 34:   andps   %v124V, %v25V
  Inst 35:   movl    $-2147483648, %v123Jl
  Inst 36:   movd    %v123Jl, %v26V
  Inst 37:   xorps   %v25V, %v26V
  Inst 38:   movaps  %v26V, %v120V
  Inst 39:   cvt_float32_to_uint64_seq %v120V, %v27J
  Inst 40:   movl    $-404232217, %v116Jl
  Inst 41:   movd    %v116Jl, %v115V
  Inst 42:   movaps  %v115V, %v117V
  Inst 43:   cvt_float32_to_sint32_seq %v117V, %v29Jl
  Inst 44:   movl    %v29Jl, %v30Jl
  Inst 45:   movq    757935405(%v14J,%v30J,1), %v32J
  Inst 46:   cmpq    %v32J, %v27J
  Inst 47:   setz    %v33Jb
  Inst 48:   movq    %v33J, %v34J
  Inst 49:   andq    $1, %v34J
  Inst 50:   movl    %v34Jl, %v35Jl
  Inst 51:   uninit  %v38V
  Inst 52:   pinsrd.w $0, 757935405(%v14J,%v35J,1), %v38V
  Inst 53:   pinsrd.w $1, 757935405(%v14J,%v35J,1), %v38V
  Inst 54:   movdqa  %v38V, %v39V
  Inst 55:   movdqa  %v39V, %v40V
  Inst 56:   palignr $8, %v39V, %v40V
  Inst 57:   pmovsxbw %v40V, %v40V
  Inst 58:   movdqa  %v40V, %v41V
  Inst 59:   pxor    %v114V, %v114V
  Inst 60:   pcmpeqd %v41V, %v114V
  Inst 61:   ptest   %v114V, %v114V
  Inst 62:   setz    %v42Jb
  Inst 63:   movq    %v42J, %v43J
  Inst 64:   andq    $1, %v43J
  Inst 65:   movl    %v43Jl, %v44Jl
  Inst 66:   movl    $-2147483648, %v113Jl
  Inst 67:   cmpq    %v113J, %v44J
  Inst 68:   jbe     label3; j label9
Block 3:
  (original IR block: block7)
  (successor: Block 4)
  (successor: Block 8)
  (instruction range: 69 .. 99)
  Inst 69:   movq    %v14J, %v46J
  Inst 70:   addq    %v44J, %v46J
  Inst 71:   xorq    %v111J, %v111J
  Inst 72:   movl    $-2147483648, %v112Jl
  Inst 73:   cmpq    %v112J, %v44J
  Inst 74:   movq    %v46J, %v48J
  Inst 75:   cmovnbeq %v111J, %v48J
  Inst 76:   movl    $-2147483648, %v110Jl
  Inst 77:   movq    26291501(%v48J,%v110J,1), %v50J
  Inst 78:   movq    76(%v0J), %v51J
  Inst 79:   movq    %v51J, %v53J
  Inst 80:   shrq    $16, %v53J
  Inst 81:   movq    %v53J, %v54J
  Inst 82:   movl    %v54Jl, %v55Jl
  Inst 83:   movl    1667575909(%v14J,%v55J,1), %v57Jl
  Inst 84:   movl    96(%v0J), %v64Jl
  Inst 85:   pmovsxbw 808597554(%v14J,%v64J,1), %v66V
  Inst 86:   movdqa  %v67V, %v68V
  Inst 87:   pextrb  $13, %v68V, %v69J
  Inst 88:   movsbl  %v69Jb, %v70Jl
  Inst 89:   movl    %v70Jl, %v71Jl
  Inst 90:   movq    770941057(%v14J,%v71J,1), %v73J
  Inst 91:   movl    $-757935404, %v106Jl
  Inst 92:   movd    %v106Jl, %v105V
  Inst 93:   movaps  %v105V, %v107V
  Inst 94:   cvt_float32_to_uint32_seq %v107V, %v75Jl
  Inst 95:   movl    %v75Jl, %v76Jl
  Inst 96:   movl    $-2147483648, %v104Jl
  Inst 97:   cmpq    %v104J, %v76J
  Inst 98:   jbe     label4; j label8
Block 4:
  (original IR block: block9)
  (successor: Block 5)
  (successor: Block 7)
  (instruction range: 99 .. 116)
  Inst 99:   movq    %v14J, %v78J
  Inst 100:   addq    %v76J, %v78J
  Inst 101:   xorq    %v102J, %v102J
  Inst 102:   movl    $-2147483648, %v103Jl
  Inst 103:   cmpq    %v103J, %v76J
  Inst 104:   movq    %v78J, %v80J
  Inst 105:   cmovnbeq %v102J, %v80J
  Inst 106:   movl    $-2147483648, %v101Jl
  Inst 107:   uninit  %v83V
  Inst 108:   pinsrd.w $0, 1541135323(%v80J,%v101J,1), %v83V
  Inst 109:   pinsrd.w $1, 1541135323(%v80J,%v101J,1), %v83V
  Inst 110:   pextrd.w $1, %v83V, %v84J
  Inst 111:   movq    %v84J, %v85J
  Inst 112:   movl    %v85Jl, %v86Jl
  Inst 113:   movl    $-2147483648, %v100Jl
  Inst 114:   cmpq    %v100J, %v86J
  Inst 115:   jbe     label5; j label7
Block 5:
  (original IR block: block11)
  (successor: Block 6)
  (instruction range: 116 .. 126)
  Inst 116:   movq    %v14J, %v88J
  Inst 117:   addq    %v86J, %v88J
  Inst 118:   xorq    %v98J, %v98J
  Inst 119:   movl    $-2147483648, %v99Jl
  Inst 120:   cmpq    %v99J, %v86J
  Inst 121:   movq    %v88J, %v90J
  Inst 122:   cmovnbeq %v98J, %v90J
  Inst 123:   movl    $-2147483648, %v97Jl
  Inst 124:   movq    1528889120(%v90J,%v97J,1), %v92J
  Inst 125:   jmp     label6
Block 6:
  (original IR block: block1)
  (instruction range: 126 .. 133)
  Inst 126:   xorl    %v95Jl, %v95Jl
  Inst 127:   movq    %v95J, %v93J
  Inst 128:   xorl    %v96Jl, %v96Jl
  Inst 129:   movq    %v96J, %v94J
  Inst 130:   movq    %v93J, %rax
  Inst 131:   movq    %v94J, %rdx
  Inst 132:   ret
Block 7:
  (original IR block: block10)
  (instruction range: 133 .. 134)
  Inst 133:   ud2 heap_oob
Block 8:
  (original IR block: block8)
  (instruction range: 134 .. 135)
  Inst 134:   ud2 heap_oob
Block 9:
  (original IR block: block6)
  (instruction range: 135 .. 136)
  Inst 135:   ud2 heap_oob
Block 10:
  (original IR block: block4)
  (instruction range: 136 .. 137)
  Inst 136:   ud2 heap_oob
Block 11:
  (original IR block: block3)
  (instruction range: 137 .. 138)
  Inst 137:   ud2 unreachable
}}

Error: Analysis(EntryLiveinValues([v67V]))
thread '<unnamed>' panicked at 'register allocation: Analysis(EntryLiveinValues([v67V]))', cranelift/codegen/src/machinst/compile.rs:96:10
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace

I'll try to reduce this in a bit

@alexcrichton alexcrichton added bug Incorrect behavior in the current implementation that needs fixing fuzz-bug Bugs found by a fuzzer wasm-proposal:simd Issues related to the WebAssembly SIMD proposal labels Aug 6, 2021
@alexcrichton
Copy link
Member Author

A reduction of this appears to be:

$ cat foo.wat
(module
  (func (result v128)
    v128.const i64x2 0 0
    i16x8.extend_high_i8x16_s
    v128.const i64x2 0 0
    i16x8.mul)
  (memory 1))
$ cargo run --features all-arch compile --enable-simd --target x86_64 ./foo.wat
    Finished dev [unoptimized] target(s) in 0.12s
     Running `target/debug/wasmtime compile --enable-simd --target x86_64 ./foo.wat`
 ERROR cranelift_codegen::machinst::compile > Register allocation error for vcode
VCode_ShowWithRRU {{
  Entry block: 0
Block 0:
  (original IR block: block0)
  (successor: Block 1)
  (instruction range: 0 .. 4)
  Inst 0:   movq    %rdi, %v0J
  Inst 1:   movq    %rsi, %v1J
  Inst 2:   movdqa  %v5V, %v6V
  Inst 3:   jmp     label1
Block 1:
  (original IR block: block1)
  (instruction range: 4 .. 7)
  Inst 4:   movdqa  %v6V, %v7V
  Inst 5:   movdqa  %v7V, %xmm0
  Inst 6:   ret
}}

Error: Analysis(EntryLiveinValues([v5V]))
thread 'main' panicked at 'register allocation: Analysis(EntryLiveinValues([v5V]))', cranelift/codegen/src/machinst/compile.rs:96:10
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace

@bjorn3
Copy link
Contributor

bjorn3 commented Aug 6, 2021

function u0:0(i64 vmctx) -> i8x16 fast {
    const0 = 0x00000000000000000000000000000000

                                block0(v0: i64):
@001d                               v2 = vconst.i8x16 const0
@002f                               v3 = swiden_high v2
@0032                               v4 = vconst.i8x16 const0
@0044                               v5 = raw_bitcast.i16x8 v4
@0044                               v6 = imul v3, v5
@0047                               v7 = raw_bitcast.i8x16 v6
@0047                               jump block1(v7)

                                block1(v1: i8x16):
@0047                               return v1
}

@cfallin
Copy link
Member

cfallin commented Aug 6, 2021

The AnalysisLiveInValues error means a register is used without having been written to first; it seems the movdqa reads v5 here and nothing produced it (so the lowering is missing an instruction somewhere). Digging into this imminently...

@cfallin
Copy link
Member

cfallin commented Aug 6, 2021

The issue appears to be that the lowering for imul with widening ops on the inputs does a pattern-match that gets the swiden on one input, but not on the other; this is supposed to be an unsupported case at the CLIF level, but because of two nested if-lets here, we don't do anything at all for lowering if we get one but not the other:

                if let Some(swiden0_high) = matches_input(ctx, inputs[0], Opcode::SwidenHigh) {
                    if let Some(swiden1_high) = matches_input(ctx, inputs[1], Opcode::SwidenHigh) {

with an else on the outer but not the inner if.

Separately, I think that the discussion that led to "we don't generate this CLIF from any Wasm so we don't need to support the case with zero or one extends" is somehow wrong -- we're clearly getting it here. @jlb6740 I think we may need to just implement the general case?

@cfallin
Copy link
Member

cfallin commented Aug 6, 2021

(For clarity, the "unsupported case" is supposed to hit the panic a bit further down, rather than silently not generate instructions)

@cfallin
Copy link
Member

cfallin commented Aug 16, 2021

@jlb6740 there was another fuzzbug that came in today related to this -- would you mind taking a look when you're able?

@jlb6740
Copy link
Contributor

jlb6740 commented Aug 18, 2021

@cfallin Sorry guys. I do not notice alerts when I'm tagged so missed this but I did see 3161 and submitted a fix for that .. which fixes this too.

@jlb6740
Copy link
Contributor

jlb6740 commented Aug 28, 2021

@alexcrichton I can no longer reproduce this after #3209. I will close, but please reopen as necessary.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Incorrect behavior in the current implementation that needs fixing fuzz-bug Bugs found by a fuzzer wasm-proposal:simd Issues related to the WebAssembly SIMD proposal
Projects
None yet
Development

No branches or pull requests

4 participants