diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index c82e74972b67ca..7c97f7afbe0933 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -838,8 +838,6 @@ lltok::Kind LLLexer::LexIdentifier() { TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context)); TYPEKEYWORD("label", Type::getLabelTy(Context)); TYPEKEYWORD("metadata", Type::getMetadataTy(Context)); - TYPEKEYWORD("x86_mmx", llvm::FixedVectorType::get( - llvm::IntegerType::get(Context, 64), 1)); TYPEKEYWORD("x86_amx", Type::getX86_AMXTy(Context)); TYPEKEYWORD("token", Type::getTokenTy(Context)); TYPEKEYWORD("ptr", PointerType::getUnqual(Context)); diff --git a/llvm/test/Bindings/llvm-c/echo.ll b/llvm/test/Bindings/llvm-c/echo.ll index ab9acbc0a66a5a..45e3d0357ebdf2 100644 --- a/llvm/test/Bindings/llvm-c/echo.ll +++ b/llvm/test/Bindings/llvm-c/echo.ll @@ -70,7 +70,7 @@ define void @types() { %9 = alloca [3 x i22], align 4 %10 = alloca ptr addrspace(5), align 8 %11 = alloca <5 x ptr>, align 64 - %12 = alloca x86_mmx, align 8 + %12 = alloca <1 x i64>, align 8 ret void } diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index a7567038b7a7ba..e5592b347425a2 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -1112,8 +1112,6 @@ define void @typesystem() { ; CHECK: %t5 = alloca x86_fp80 %t6 = alloca ppc_fp128 ; CHECK: %t6 = alloca ppc_fp128 - %t7 = alloca x86_mmx - ; CHECK: %t7 = alloca <1 x i64> %t8 = alloca ptr ; CHECK: %t8 = alloca ptr %t9 = alloca <4 x i32> diff --git a/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll b/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll index 69f733461efc77..ba40c5c4627d95 100644 --- a/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll +++ b/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll @@ -25,10 +25,8 @@ define void @test(<1 x i64> %c64, <1 x i64> %mask1, ptr %P) { ; CHECK-NEXT: popl %edi ; CHECK-NEXT: retl entry: - %tmp4 = bitcast <1 x i64> %mask1 to x86_mmx ; [#uses=1] - %tmp6 = bitcast <1 x i64> %c64 to x86_mmx ; [#uses=1] - tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp4, x86_mmx %tmp6, ptr %P ) + tail call void @llvm.x86.mmx.maskmovq( <1 x i64> %mask1, <1 x i64> %c64, ptr %P ) ret void } -declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, ptr) +declare void @llvm.x86.mmx.maskmovq(<1 x i64>, <1 x i64>, ptr) diff --git a/llvm/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll b/llvm/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll index 79b06ba836af29..6c586782420e15 100644 --- a/llvm/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll +++ b/llvm/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | FileCheck %s -@R = external global x86_mmx ; [#uses=1] +@R = external global <1 x i64> ; [#uses=1] define void @foo(<1 x i64> %A, <1 x i64> %B) nounwind { ; CHECK-LABEL: foo: @@ -14,13 +14,11 @@ define void @foo(<1 x i64> %A, <1 x i64> %B) nounwind { ; CHECK-NEXT: emms ; CHECK-NEXT: retq entry: - %tmp4 = bitcast <1 x i64> %B to x86_mmx ; <<4 x i16>> [#uses=1] - %tmp6 = bitcast <1 x i64> %A to x86_mmx ; <<4 x i16>> [#uses=1] - %tmp7 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp6, x86_mmx %tmp4 ) ; [#uses=1] - store x86_mmx %tmp7, ptr @R + %tmp7 = tail call <1 x i64> @llvm.x86.mmx.paddus.w( <1 x i64> %A, <1 x i64> %B ) ; <<1 x i64>> [#uses=1] + store <1 x i64> %tmp7, ptr @R tail call void @llvm.x86.mmx.emms( ) ret void } -declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64>, <1 x i64>) declare void @llvm.x86.mmx.emms() diff --git a/llvm/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll b/llvm/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll index d439e827e81994..0c792644fc5c8a 100644 --- a/llvm/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll +++ b/llvm/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll @@ -5,15 +5,15 @@ entry: tail call void asm sideeffect "# top of block", "~{dirflag},~{fpsr},~{flags},~{di},~{si},~{dx},~{cx},~{ax}"( ) nounwind tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind tail call void asm sideeffect ".line 8", "~{dirflag},~{fpsr},~{flags}"( ) nounwind - %tmp1 = tail call x86_mmx asm sideeffect "movd $1, $0", "=={mm4},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( i32 undef ) nounwind ; [#uses=1] + %tmp1 = tail call <1 x i64> asm sideeffect "movd $1, $0", "=={mm4},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( i32 undef ) nounwind ; <<1 x i64>> [#uses=1] tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind tail call void asm sideeffect ".line 9", "~{dirflag},~{fpsr},~{flags}"( ) nounwind - %tmp3 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm3},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx undef ) nounwind ; [#uses=1] + %tmp3 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm3},~{dirflag},~{fpsr},~{flags},~{memory}"( <1 x i64> undef ) nounwind ; [#uses=1] tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind tail call void asm sideeffect ".line 10", "~{dirflag},~{fpsr},~{flags}"( ) nounwind - tail call void asm sideeffect "movntq $0, 0($1,$2)", "{mm0},{di},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx undef, i32 undef, i32 %tmp3 ) nounwind + tail call void asm sideeffect "movntq $0, 0($1,$2)", "{mm0},{di},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( <1 x i64> undef, i32 undef, i32 %tmp3 ) nounwind tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind tail call void asm sideeffect ".line 11", "~{dirflag},~{fpsr},~{flags}"( ) nounwind - %tmp8 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm4},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx %tmp1 ) nounwind ; [#uses=0] + %tmp8 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm4},~{dirflag},~{fpsr},~{flags},~{memory}"( <1 x i64> %tmp1 ) nounwind ; [#uses=0] ret i32 undef } diff --git a/llvm/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll b/llvm/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll index 594edbaad29441..4a4477823a61d3 100644 --- a/llvm/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll +++ b/llvm/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll @@ -17,13 +17,13 @@ entry: br i1 false, label %bb.nph144.split, label %bb133 bb.nph144.split: ; preds = %entry - %tmp = bitcast <8 x i8> zeroinitializer to x86_mmx - %tmp2 = bitcast <8 x i8> zeroinitializer to x86_mmx - tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp, x86_mmx %tmp2, ptr null ) nounwind + %tmp = bitcast <8 x i8> zeroinitializer to <1 x i64> + %tmp2 = bitcast <8 x i8> zeroinitializer to <1 x i64> + tail call void @llvm.x86.mmx.maskmovq( <1 x i64> %tmp, <1 x i64> %tmp2, ptr null ) nounwind unreachable bb133: ; preds = %entry ret void } -declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, ptr) nounwind +declare void @llvm.x86.mmx.maskmovq(<1 x i64>, <1 x i64>, ptr) nounwind diff --git a/llvm/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll b/llvm/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll index 3a112ae2a2113e..20673a177ac31f 100644 --- a/llvm/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll +++ b/llvm/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll @@ -26,7 +26,7 @@ entry: ; This is how to get MMX instructions. -define <2 x double> @a2(x86_mmx %x) nounwind { +define <2 x double> @a2(<1 x i64> %x) nounwind { ; CHECK-LABEL: a2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushl %ebp @@ -42,11 +42,11 @@ define <2 x double> @a2(x86_mmx %x) nounwind { ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl entry: - %y = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %x) + %y = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %x) ret <2 x double> %y } -define x86_mmx @b2(<2 x double> %x) nounwind { +define <1 x i64> @b2(<2 x double> %x) nounwind { ; CHECK-LABEL: b2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushl %ebp @@ -61,9 +61,9 @@ define x86_mmx @b2(<2 x double> %x) nounwind { ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl entry: - %y = tail call x86_mmx @llvm.x86.sse.cvttpd2pi (<2 x double> %x) - ret x86_mmx %y + %y = tail call <1 x i64> @llvm.x86.sse.cvttpd2pi (<2 x double> %x) + ret <1 x i64> %y } -declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) -declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) +declare <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>) +declare <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double>) diff --git a/llvm/test/CodeGen/X86/2011-06-14-mmx-inlineasm.ll b/llvm/test/CodeGen/X86/2011-06-14-mmx-inlineasm.ll index 306aeed1ace3e1..582ebb9bdcfd15 100644 --- a/llvm/test/CodeGen/X86/2011-06-14-mmx-inlineasm.ll +++ b/llvm/test/CodeGen/X86/2011-06-14-mmx-inlineasm.ll @@ -3,14 +3,14 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" target triple = "i386-apple-macosx10.6.6" -%0 = type { x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx } +%0 = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } define i32 @pixman_fill_mmx(ptr nocapture %bits, i32 %stride, i32 %bpp, i32 %x, i32 %y, i32 %width, i32 %height, i32 %xor) nounwind ssp { entry: %conv = zext i32 %xor to i64 %shl = shl nuw i64 %conv, 32 %or = or i64 %shl, %conv - %0 = bitcast i64 %or to x86_mmx + %0 = bitcast i64 %or to <1 x i64> ; CHECK: movq [[MMXR:%mm[0-7],]] {{%mm[0-7]}} ; CHECK-NEXT: movq [[MMXR]] {{%mm[0-7]}} ; CHECK-NEXT: movq [[MMXR]] {{%mm[0-7]}} @@ -18,7 +18,7 @@ entry: ; CHECK-NEXT: movq [[MMXR]] {{%mm[0-7]}} ; CHECK-NEXT: movq [[MMXR]] {{%mm[0-7]}} ; CHECK-NEXT: movq [[MMXR]] {{%mm[0-7]}} - %1 = tail call %0 asm "movq\09\09$7,\09$0\0Amovq\09\09$7,\09$1\0Amovq\09\09$7,\09$2\0Amovq\09\09$7,\09$3\0Amovq\09\09$7,\09$4\0Amovq\09\09$7,\09$5\0Amovq\09\09$7,\09$6\0A", "=&y,=&y,=&y,=&y,=&y,=&y,=y,y,~{dirflag},~{fpsr},~{flags}"(x86_mmx %0) nounwind, !srcloc !0 + %1 = tail call %0 asm "movq\09\09$7,\09$0\0Amovq\09\09$7,\09$1\0Amovq\09\09$7,\09$2\0Amovq\09\09$7,\09$3\0Amovq\09\09$7,\09$4\0Amovq\09\09$7,\09$5\0Amovq\09\09$7,\09$6\0A", "=&y,=&y,=&y,=&y,=&y,=&y,=y,y,~{dirflag},~{fpsr},~{flags}"(<1 x i64> %0) nounwind, !srcloc !0 %asmresult = extractvalue %0 %1, 0 %asmresult6 = extractvalue %0 %1, 1 %asmresult7 = extractvalue %0 %1, 2 @@ -34,7 +34,7 @@ entry: ; CHECK-NEXT: movq {{%mm[0-7]}}, ; CHECK-NEXT: movq {{%mm[0-7]}}, ; CHECK-NEXT: movq {{%mm[0-7]}}, - tail call void asm sideeffect "movq\09$1,\09 ($0)\0Amovq\09$2,\09 8($0)\0Amovq\09$3,\0916($0)\0Amovq\09$4,\0924($0)\0Amovq\09$5,\0932($0)\0Amovq\09$6,\0940($0)\0Amovq\09$7,\0948($0)\0Amovq\09$8,\0956($0)\0A", "r,y,y,y,y,y,y,y,y,~{memory},~{dirflag},~{fpsr},~{flags}"(ptr undef, x86_mmx %0, x86_mmx %asmresult, x86_mmx %asmresult6, x86_mmx %asmresult7, x86_mmx %asmresult8, x86_mmx %asmresult9, x86_mmx %asmresult10, x86_mmx %asmresult11) nounwind, !srcloc !1 + tail call void asm sideeffect "movq\09$1,\09 ($0)\0Amovq\09$2,\09 8($0)\0Amovq\09$3,\0916($0)\0Amovq\09$4,\0924($0)\0Amovq\09$5,\0932($0)\0Amovq\09$6,\0940($0)\0Amovq\09$7,\0948($0)\0Amovq\09$8,\0956($0)\0A", "r,y,y,y,y,y,y,y,y,~{memory},~{dirflag},~{fpsr},~{flags}"(ptr undef, <1 x i64> %0, <1 x i64> %asmresult, <1 x i64> %asmresult6, <1 x i64> %asmresult7, <1 x i64> %asmresult8, <1 x i64> %asmresult9, <1 x i64> %asmresult10, <1 x i64> %asmresult11) nounwind, !srcloc !1 tail call void @llvm.x86.mmx.emms() nounwind ret i32 1 } diff --git a/llvm/test/CodeGen/X86/avx-vbroadcast.ll b/llvm/test/CodeGen/X86/avx-vbroadcast.ll index 3f6f8c01b9049f..c69886df82bdfb 100644 --- a/llvm/test/CodeGen/X86/avx-vbroadcast.ll +++ b/llvm/test/CodeGen/X86/avx-vbroadcast.ll @@ -1011,7 +1011,7 @@ define float @broadcast_lifetime() nounwind { ret float %7 } -define <8 x i16> @broadcast_x86_mmx(x86_mmx %tmp) nounwind { +define <8 x i16> @broadcast_x86_mmx(<1 x i64> %tmp) nounwind { ; X86-LABEL: broadcast_x86_mmx: ; X86: ## %bb.0: ## %bb ; X86-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] @@ -1023,7 +1023,7 @@ define <8 x i16> @broadcast_x86_mmx(x86_mmx %tmp) nounwind { ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; X64-NEXT: retq bb: - %tmp1 = bitcast x86_mmx %tmp to i64 + %tmp1 = bitcast <1 x i64> %tmp to i64 %tmp2 = insertelement <2 x i64> undef, i64 %tmp1, i32 0 %tmp3 = bitcast <2 x i64> %tmp2 to <8 x i16> %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> poison, <8 x i32> diff --git a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll index fed6c2eb8ba0a2..9ac0503831eb7f 100644 --- a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll +++ b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll @@ -1449,7 +1449,7 @@ eintry: ret void } -define <8 x i16> @broadcast_x86_mmx(x86_mmx %tmp) nounwind { +define <8 x i16> @broadcast_x86_mmx(<1 x i64> %tmp) nounwind { ; X86-LABEL: broadcast_x86_mmx: ; X86: ## %bb.0: ## %bb ; X86-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] @@ -1466,7 +1466,7 @@ define <8 x i16> @broadcast_x86_mmx(x86_mmx %tmp) nounwind { ; X64-AVX512VL-NEXT: vpbroadcastq %rdi, %xmm0 ; X64-AVX512VL-NEXT: retq bb: - %tmp1 = bitcast x86_mmx %tmp to i64 + %tmp1 = bitcast <1 x i64> %tmp to i64 %tmp2 = insertelement <2 x i64> undef, i64 %tmp1, i32 0 %tmp3 = bitcast <2 x i64> %tmp2 to <8 x i16> %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> poison, <8 x i32> diff --git a/llvm/test/CodeGen/X86/bitcast-mmx.ll b/llvm/test/CodeGen/X86/bitcast-mmx.ll index 061723a0966e2b..fe48a96a51d3ec 100644 --- a/llvm/test/CodeGen/X86/bitcast-mmx.ll +++ b/llvm/test/CodeGen/X86/bitcast-mmx.ll @@ -17,9 +17,9 @@ define i32 @t0(i64 %x) nounwind { ; X64-NEXT: retq entry: %0 = bitcast i64 %x to <4 x i16> - %1 = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 -18) - %3 = bitcast x86_mmx %2 to <4 x i16> + %1 = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %1, i8 -18) + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 %6 = bitcast i64 %5 to <2 x i32> @@ -52,9 +52,9 @@ define i64 @t1(i64 %x, i32 %n) nounwind { ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: - %0 = bitcast i64 %x to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %0, i32 %n) - %2 = bitcast x86_mmx %1 to i64 + %0 = bitcast i64 %x to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %0, i32 %n) + %2 = bitcast <1 x i64> %1 to i64 ret i64 %2 } @@ -88,11 +88,11 @@ define i64 @t2(i64 %x, i32 %n, i32 %w) nounwind { entry: %0 = insertelement <2 x i32> undef, i32 %w, i32 0 %1 = insertelement <2 x i32> %0, i32 0, i32 1 - %2 = bitcast <2 x i32> %1 to x86_mmx - %3 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %2, i32 %n) - %4 = bitcast i64 %x to x86_mmx - %5 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %4, x86_mmx %3) - %6 = bitcast x86_mmx %5 to i64 + %2 = bitcast <2 x i32> %1 to <1 x i64> + %3 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %2, i32 %n) + %4 = bitcast i64 %x to <1 x i64> + %5 = tail call <1 x i64> @llvm.x86.mmx.por(<1 x i64> %4, <1 x i64> %3) + %6 = bitcast <1 x i64> %5 to i64 ret i64 %6 } @@ -123,14 +123,14 @@ define i64 @t3(ptr %y, ptr %n) nounwind { ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: - %0 = load x86_mmx, ptr %y, align 8 + %0 = load <1 x i64>, ptr %y, align 8 %1 = load i32, ptr %n, align 4 - %2 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %0, i32 %1) - %3 = bitcast x86_mmx %2 to i64 + %2 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %0, i32 %1) + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) -declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) -declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64>, i8) +declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) +declare <1 x i64> @llvm.x86.mmx.por(<1 x i64>, <1 x i64>) diff --git a/llvm/test/CodeGen/X86/expand-vr64-gr64-copy.mir b/llvm/test/CodeGen/X86/expand-vr64-gr64-copy.mir index 559560ac20f8af..aa637e7408f22a 100644 --- a/llvm/test/CodeGen/X86/expand-vr64-gr64-copy.mir +++ b/llvm/test/CodeGen/X86/expand-vr64-gr64-copy.mir @@ -6,9 +6,9 @@ define <2 x i32> @test_paddw(<2 x i32> %a) nounwind readnone { entry: - %0 = bitcast <2 x i32> %a to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %0, x86_mmx %0) - %2 = bitcast x86_mmx %1 to <2 x i32> + %0 = bitcast <2 x i32> %a to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %0, <1 x i64> %0) + %2 = bitcast <1 x i64> %1 to <2 x i32> ret <2 x i32> %2 } diff --git a/llvm/test/CodeGen/X86/fast-isel-bc.ll b/llvm/test/CodeGen/X86/fast-isel-bc.ll index e3bb5e7176e57b..64bdfd6d4f8632 100644 --- a/llvm/test/CodeGen/X86/fast-isel-bc.ll +++ b/llvm/test/CodeGen/X86/fast-isel-bc.ll @@ -4,7 +4,7 @@ ; PR4684 -declare void @func2(x86_mmx) +declare void @func2(<1 x i64>) ; This isn't spectacular, but it's MMX code at -O0... @@ -28,7 +28,7 @@ define void @func1() nounwind { ; X64-NEXT: callq _func2 ; X64-NEXT: popq %rax ; X64-NEXT: retq - %tmp0 = bitcast <2 x i32> to x86_mmx - call void @func2(x86_mmx %tmp0) + %tmp0 = bitcast <2 x i32> to <1 x i64> + call void @func2(<1 x i64> %tmp0) ret void } diff --git a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll index c13fdae540d0b8..3b1a8f541b4902 100644 --- a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll +++ b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll @@ -104,12 +104,12 @@ define void @test_mmx(ptr nocapture %a0, ptr nocapture %a1) { ; ALL-NEXT: movntq %mm0, (%rsi) ; ALL-NEXT: retq entry: - %0 = load x86_mmx, ptr %a0 - %1 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %0, i32 3) - store x86_mmx %1, ptr %a1, align 8, !nontemporal !1 + %0 = load <1 x i64>, ptr %a0 + %1 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %0, i32 3) + store <1 x i64> %1, ptr %a1, align 8, !nontemporal !1 ret void } -declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone ; ; 128-bit Vector Stores diff --git a/llvm/test/CodeGen/X86/mmx-arg-passing-x86-64.ll b/llvm/test/CodeGen/X86/mmx-arg-passing-x86-64.ll index 54f048eb697f6c..439d7efc2d7551 100644 --- a/llvm/test/CodeGen/X86/mmx-arg-passing-x86-64.ll +++ b/llvm/test/CodeGen/X86/mmx-arg-passing-x86-64.ll @@ -14,12 +14,12 @@ define void @t3() nounwind { ; X86-64-NEXT: xorl %eax, %eax ; X86-64-NEXT: jmp _pass_v8qi ## TAILCALL %tmp3 = load <8 x i8>, ptr @g_v8qi, align 8 - %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx - %tmp4 = tail call i32 (...) @pass_v8qi( x86_mmx %tmp3a ) nounwind + %tmp3a = bitcast <8 x i8> %tmp3 to <1 x i64> + %tmp4 = tail call i32 (...) @pass_v8qi( <1 x i64> %tmp3a ) nounwind ret void } -define void @t4(x86_mmx %v1, x86_mmx %v2) nounwind { +define void @t4(<1 x i64> %v1, <1 x i64> %v2) nounwind { ; X86-64-LABEL: t4: ; X86-64: ## %bb.0: ; X86-64-NEXT: movq %rdi, %xmm0 @@ -28,11 +28,11 @@ define void @t4(x86_mmx %v1, x86_mmx %v2) nounwind { ; X86-64-NEXT: movq %xmm1, %rdi ; X86-64-NEXT: xorl %eax, %eax ; X86-64-NEXT: jmp _pass_v8qi ## TAILCALL - %v1a = bitcast x86_mmx %v1 to <8 x i8> - %v2b = bitcast x86_mmx %v2 to <8 x i8> + %v1a = bitcast <1 x i64> %v1 to <8 x i8> + %v2b = bitcast <1 x i64> %v2 to <8 x i8> %tmp3 = add <8 x i8> %v1a, %v2b - %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx - %tmp4 = tail call i32 (...) @pass_v8qi( x86_mmx %tmp3a ) nounwind + %tmp3a = bitcast <8 x i8> %tmp3 to <1 x i64> + %tmp4 = tail call i32 (...) @pass_v8qi( <1 x i64> %tmp3a ) nounwind ret void } diff --git a/llvm/test/CodeGen/X86/mmx-arg-passing.ll b/llvm/test/CodeGen/X86/mmx-arg-passing.ll index 1ae9920873fafc..d933149c5e027e 100644 --- a/llvm/test/CodeGen/X86/mmx-arg-passing.ll +++ b/llvm/test/CodeGen/X86/mmx-arg-passing.ll @@ -8,9 +8,9 @@ ; On Darwin x86-64, v8i8, v4i16, v2i32 values are passed in XMM[0-7]. ; On Darwin x86-64, v1i64 values are passed in 64-bit GPRs. -@u1 = external global x86_mmx +@u1 = external global <1 x i64> -define void @t1(x86_mmx %v1) nounwind { +define void @t1(<1 x i64> %v1) nounwind { ; X86-32-LABEL: t1: ; X86-32: ## %bb.0: ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -25,11 +25,11 @@ define void @t1(x86_mmx %v1) nounwind { ; X86-64-NEXT: movq _u1@GOTPCREL(%rip), %rax ; X86-64-NEXT: movq %rdi, (%rax) ; X86-64-NEXT: retq - store x86_mmx %v1, ptr @u1, align 8 + store <1 x i64> %v1, ptr @u1, align 8 ret void } -@u2 = external global x86_mmx +@u2 = external global <1 x i64> define void @t2(<1 x i64> %v1) nounwind { ; X86-32-LABEL: t2: @@ -46,7 +46,6 @@ define void @t2(<1 x i64> %v1) nounwind { ; X86-64-NEXT: movq _u2@GOTPCREL(%rip), %rax ; X86-64-NEXT: movq %rdi, (%rax) ; X86-64-NEXT: retq - %tmp = bitcast <1 x i64> %v1 to x86_mmx - store x86_mmx %tmp, ptr @u2, align 8 + store <1 x i64> %v1, ptr @u2, align 8 ret void } diff --git a/llvm/test/CodeGen/X86/mmx-arith.ll b/llvm/test/CodeGen/X86/mmx-arith.ll index 230e763a7c7340..5bb3b17fdf4219 100644 --- a/llvm/test/CodeGen/X86/mmx-arith.ll +++ b/llvm/test/CodeGen/X86/mmx-arith.ll @@ -88,53 +88,53 @@ define void @test0(ptr %A, ptr %B) nounwind { ; X64-NEXT: emms ; X64-NEXT: retq entry: - %tmp1 = load x86_mmx, ptr %A - %tmp3 = load x86_mmx, ptr %B - %tmp1a = bitcast x86_mmx %tmp1 to <8 x i8> - %tmp3a = bitcast x86_mmx %tmp3 to <8 x i8> + %tmp1 = load <1 x i64>, ptr %A + %tmp3 = load <1 x i64>, ptr %B + %tmp1a = bitcast <1 x i64> %tmp1 to <8 x i8> + %tmp3a = bitcast <1 x i64> %tmp3 to <8 x i8> %tmp4 = add <8 x i8> %tmp1a, %tmp3a - %tmp4a = bitcast <8 x i8> %tmp4 to x86_mmx - store x86_mmx %tmp4a, ptr %A - %tmp7 = load x86_mmx, ptr %B - %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %tmp4a, x86_mmx %tmp7) - store x86_mmx %tmp12, ptr %A - %tmp16 = load x86_mmx, ptr %B - %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %tmp12, x86_mmx %tmp16) - store x86_mmx %tmp21, ptr %A - %tmp27 = load x86_mmx, ptr %B - %tmp21a = bitcast x86_mmx %tmp21 to <8 x i8> - %tmp27a = bitcast x86_mmx %tmp27 to <8 x i8> + %tmp4a = bitcast <8 x i8> %tmp4 to <1 x i64> + store <1 x i64> %tmp4a, ptr %A + %tmp7 = load <1 x i64>, ptr %B + %tmp12 = tail call <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64> %tmp4a, <1 x i64> %tmp7) + store <1 x i64> %tmp12, ptr %A + %tmp16 = load <1 x i64>, ptr %B + %tmp21 = tail call <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64> %tmp12, <1 x i64> %tmp16) + store <1 x i64> %tmp21, ptr %A + %tmp27 = load <1 x i64>, ptr %B + %tmp21a = bitcast <1 x i64> %tmp21 to <8 x i8> + %tmp27a = bitcast <1 x i64> %tmp27 to <8 x i8> %tmp28 = sub <8 x i8> %tmp21a, %tmp27a - %tmp28a = bitcast <8 x i8> %tmp28 to x86_mmx - store x86_mmx %tmp28a, ptr %A - %tmp31 = load x86_mmx, ptr %B - %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %tmp28a, x86_mmx %tmp31) - store x86_mmx %tmp36, ptr %A - %tmp40 = load x86_mmx, ptr %B - %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %tmp36, x86_mmx %tmp40) - store x86_mmx %tmp45, ptr %A - %tmp51 = load x86_mmx, ptr %B - %tmp45a = bitcast x86_mmx %tmp45 to <8 x i8> - %tmp51a = bitcast x86_mmx %tmp51 to <8 x i8> + %tmp28a = bitcast <8 x i8> %tmp28 to <1 x i64> + store <1 x i64> %tmp28a, ptr %A + %tmp31 = load <1 x i64>, ptr %B + %tmp36 = tail call <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64> %tmp28a, <1 x i64> %tmp31) + store <1 x i64> %tmp36, ptr %A + %tmp40 = load <1 x i64>, ptr %B + %tmp45 = tail call <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64> %tmp36, <1 x i64> %tmp40) + store <1 x i64> %tmp45, ptr %A + %tmp51 = load <1 x i64>, ptr %B + %tmp45a = bitcast <1 x i64> %tmp45 to <8 x i8> + %tmp51a = bitcast <1 x i64> %tmp51 to <8 x i8> %tmp52 = mul <8 x i8> %tmp45a, %tmp51a - %tmp52a = bitcast <8 x i8> %tmp52 to x86_mmx - store x86_mmx %tmp52a, ptr %A - %tmp57 = load x86_mmx, ptr %B - %tmp57a = bitcast x86_mmx %tmp57 to <8 x i8> + %tmp52a = bitcast <8 x i8> %tmp52 to <1 x i64> + store <1 x i64> %tmp52a, ptr %A + %tmp57 = load <1 x i64>, ptr %B + %tmp57a = bitcast <1 x i64> %tmp57 to <8 x i8> %tmp58 = and <8 x i8> %tmp52, %tmp57a - %tmp58a = bitcast <8 x i8> %tmp58 to x86_mmx - store x86_mmx %tmp58a, ptr %A - %tmp63 = load x86_mmx, ptr %B - %tmp63a = bitcast x86_mmx %tmp63 to <8 x i8> + %tmp58a = bitcast <8 x i8> %tmp58 to <1 x i64> + store <1 x i64> %tmp58a, ptr %A + %tmp63 = load <1 x i64>, ptr %B + %tmp63a = bitcast <1 x i64> %tmp63 to <8 x i8> %tmp64 = or <8 x i8> %tmp58, %tmp63a - %tmp64a = bitcast <8 x i8> %tmp64 to x86_mmx - store x86_mmx %tmp64a, ptr %A - %tmp69 = load x86_mmx, ptr %B - %tmp69a = bitcast x86_mmx %tmp69 to <8 x i8> - %tmp64b = bitcast x86_mmx %tmp64a to <8 x i8> + %tmp64a = bitcast <8 x i8> %tmp64 to <1 x i64> + store <1 x i64> %tmp64a, ptr %A + %tmp69 = load <1 x i64>, ptr %B + %tmp69a = bitcast <1 x i64> %tmp69 to <8 x i8> + %tmp64b = bitcast <1 x i64> %tmp64a to <8 x i8> %tmp70 = xor <8 x i8> %tmp64b, %tmp69a - %tmp70a = bitcast <8 x i8> %tmp70 to x86_mmx - store x86_mmx %tmp70a, ptr %A + %tmp70a = bitcast <8 x i8> %tmp70 to <1 x i64> + store <1 x i64> %tmp70a, ptr %A tail call void @llvm.x86.mmx.emms() ret void } @@ -196,42 +196,42 @@ define void @test1(ptr %A, ptr %B) nounwind { ; X64-NEXT: emms ; X64-NEXT: retq entry: - %tmp1 = load x86_mmx, ptr %A - %tmp3 = load x86_mmx, ptr %B - %tmp1a = bitcast x86_mmx %tmp1 to <2 x i32> - %tmp3a = bitcast x86_mmx %tmp3 to <2 x i32> + %tmp1 = load <1 x i64>, ptr %A + %tmp3 = load <1 x i64>, ptr %B + %tmp1a = bitcast <1 x i64> %tmp1 to <2 x i32> + %tmp3a = bitcast <1 x i64> %tmp3 to <2 x i32> %tmp4 = add <2 x i32> %tmp1a, %tmp3a - %tmp4a = bitcast <2 x i32> %tmp4 to x86_mmx - store x86_mmx %tmp4a, ptr %A - %tmp9 = load x86_mmx, ptr %B - %tmp9a = bitcast x86_mmx %tmp9 to <2 x i32> + %tmp4a = bitcast <2 x i32> %tmp4 to <1 x i64> + store <1 x i64> %tmp4a, ptr %A + %tmp9 = load <1 x i64>, ptr %B + %tmp9a = bitcast <1 x i64> %tmp9 to <2 x i32> %tmp10 = sub <2 x i32> %tmp4, %tmp9a - %tmp10a = bitcast <2 x i32> %tmp4 to x86_mmx - store x86_mmx %tmp10a, ptr %A - %tmp15 = load x86_mmx, ptr %B - %tmp10b = bitcast x86_mmx %tmp10a to <2 x i32> - %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32> + %tmp10a = bitcast <2 x i32> %tmp4 to <1 x i64> + store <1 x i64> %tmp10a, ptr %A + %tmp15 = load <1 x i64>, ptr %B + %tmp10b = bitcast <1 x i64> %tmp10a to <2 x i32> + %tmp15a = bitcast <1 x i64> %tmp15 to <2 x i32> %tmp16 = mul <2 x i32> %tmp10b, %tmp15a - %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx - store x86_mmx %tmp16a, ptr %A - %tmp21 = load x86_mmx, ptr %B - %tmp16b = bitcast x86_mmx %tmp16a to <2 x i32> - %tmp21a = bitcast x86_mmx %tmp21 to <2 x i32> + %tmp16a = bitcast <2 x i32> %tmp16 to <1 x i64> + store <1 x i64> %tmp16a, ptr %A + %tmp21 = load <1 x i64>, ptr %B + %tmp16b = bitcast <1 x i64> %tmp16a to <2 x i32> + %tmp21a = bitcast <1 x i64> %tmp21 to <2 x i32> %tmp22 = and <2 x i32> %tmp16b, %tmp21a - %tmp22a = bitcast <2 x i32> %tmp22 to x86_mmx - store x86_mmx %tmp22a, ptr %A - %tmp27 = load x86_mmx, ptr %B - %tmp22b = bitcast x86_mmx %tmp22a to <2 x i32> - %tmp27a = bitcast x86_mmx %tmp27 to <2 x i32> + %tmp22a = bitcast <2 x i32> %tmp22 to <1 x i64> + store <1 x i64> %tmp22a, ptr %A + %tmp27 = load <1 x i64>, ptr %B + %tmp22b = bitcast <1 x i64> %tmp22a to <2 x i32> + %tmp27a = bitcast <1 x i64> %tmp27 to <2 x i32> %tmp28 = or <2 x i32> %tmp22b, %tmp27a - %tmp28a = bitcast <2 x i32> %tmp28 to x86_mmx - store x86_mmx %tmp28a, ptr %A - %tmp33 = load x86_mmx, ptr %B - %tmp28b = bitcast x86_mmx %tmp28a to <2 x i32> - %tmp33a = bitcast x86_mmx %tmp33 to <2 x i32> + %tmp28a = bitcast <2 x i32> %tmp28 to <1 x i64> + store <1 x i64> %tmp28a, ptr %A + %tmp33 = load <1 x i64>, ptr %B + %tmp28b = bitcast <1 x i64> %tmp28a to <2 x i32> + %tmp33a = bitcast <1 x i64> %tmp33 to <2 x i32> %tmp34 = xor <2 x i32> %tmp28b, %tmp33a - %tmp34a = bitcast <2 x i32> %tmp34 to x86_mmx - store x86_mmx %tmp34a, ptr %A + %tmp34a = bitcast <2 x i32> %tmp34 to <1 x i64> + store <1 x i64> %tmp34a, ptr %A tail call void @llvm.x86.mmx.emms( ) ret void } @@ -336,62 +336,61 @@ define void @test2(ptr %A, ptr %B) nounwind { ; X64-NEXT: emms ; X64-NEXT: retq entry: - %tmp1 = load x86_mmx, ptr %A - %tmp3 = load x86_mmx, ptr %B - %tmp1a = bitcast x86_mmx %tmp1 to <4 x i16> - %tmp3a = bitcast x86_mmx %tmp3 to <4 x i16> + %tmp1 = load <1 x i64>, ptr %A + %tmp3 = load <1 x i64>, ptr %B + %tmp1a = bitcast <1 x i64> %tmp1 to <4 x i16> + %tmp3a = bitcast <1 x i64> %tmp3 to <4 x i16> %tmp4 = add <4 x i16> %tmp1a, %tmp3a - %tmp4a = bitcast <4 x i16> %tmp4 to x86_mmx - store x86_mmx %tmp4a, ptr %A - %tmp7 = load x86_mmx, ptr %B - %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %tmp4a, x86_mmx %tmp7) - store x86_mmx %tmp12, ptr %A - %tmp16 = load x86_mmx, ptr %B - %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %tmp12, x86_mmx %tmp16) - store x86_mmx %tmp21, ptr %A - %tmp27 = load x86_mmx, ptr %B - %tmp21a = bitcast x86_mmx %tmp21 to <4 x i16> - %tmp27a = bitcast x86_mmx %tmp27 to <4 x i16> + %tmp4a = bitcast <4 x i16> %tmp4 to <1 x i64> + store <1 x i64> %tmp4a, ptr %A + %tmp7 = load <1 x i64>, ptr %B + %tmp12 = tail call <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64> %tmp4a, <1 x i64> %tmp7) + store <1 x i64> %tmp12, ptr %A + %tmp16 = load <1 x i64>, ptr %B + %tmp21 = tail call <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64> %tmp12, <1 x i64> %tmp16) + store <1 x i64> %tmp21, ptr %A + %tmp27 = load <1 x i64>, ptr %B + %tmp21a = bitcast <1 x i64> %tmp21 to <4 x i16> + %tmp27a = bitcast <1 x i64> %tmp27 to <4 x i16> %tmp28 = sub <4 x i16> %tmp21a, %tmp27a - %tmp28a = bitcast <4 x i16> %tmp28 to x86_mmx - store x86_mmx %tmp28a, ptr %A - %tmp31 = load x86_mmx, ptr %B - %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %tmp28a, x86_mmx %tmp31) - store x86_mmx %tmp36, ptr %A - %tmp40 = load x86_mmx, ptr %B - %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %tmp36, x86_mmx %tmp40) - store x86_mmx %tmp45, ptr %A - %tmp51 = load x86_mmx, ptr %B - %tmp45a = bitcast x86_mmx %tmp45 to <4 x i16> - %tmp51a = bitcast x86_mmx %tmp51 to <4 x i16> + %tmp28a = bitcast <4 x i16> %tmp28 to <1 x i64> + store <1 x i64> %tmp28a, ptr %A + %tmp31 = load <1 x i64>, ptr %B + %tmp36 = tail call <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64> %tmp28a, <1 x i64> %tmp31) + store <1 x i64> %tmp36, ptr %A + %tmp40 = load <1 x i64>, ptr %B + %tmp45 = tail call <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64> %tmp36, <1 x i64> %tmp40) + store <1 x i64> %tmp45, ptr %A + %tmp51 = load <1 x i64>, ptr %B + %tmp45a = bitcast <1 x i64> %tmp45 to <4 x i16> + %tmp51a = bitcast <1 x i64> %tmp51 to <4 x i16> %tmp52 = mul <4 x i16> %tmp45a, %tmp51a - %tmp52a = bitcast <4 x i16> %tmp52 to x86_mmx - store x86_mmx %tmp52a, ptr %A - %tmp55 = load x86_mmx, ptr %B - %tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %tmp52a, x86_mmx %tmp55) - store x86_mmx %tmp60, ptr %A - %tmp64 = load x86_mmx, ptr %B - %tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %tmp60, x86_mmx %tmp64) - %tmp70 = bitcast x86_mmx %tmp69 to x86_mmx - store x86_mmx %tmp70, ptr %A - %tmp75 = load x86_mmx, ptr %B - %tmp70a = bitcast x86_mmx %tmp70 to <4 x i16> - %tmp75a = bitcast x86_mmx %tmp75 to <4 x i16> + %tmp52a = bitcast <4 x i16> %tmp52 to <1 x i64> + store <1 x i64> %tmp52a, ptr %A + %tmp55 = load <1 x i64>, ptr %B + %tmp60 = tail call <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64> %tmp52a, <1 x i64> %tmp55) + store <1 x i64> %tmp60, ptr %A + %tmp64 = load <1 x i64>, ptr %B + %tmp69 = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %tmp60, <1 x i64> %tmp64) + store <1 x i64> %tmp69, ptr %A + %tmp75 = load <1 x i64>, ptr %B + %tmp70a = bitcast <1 x i64> %tmp69 to <4 x i16> + %tmp75a = bitcast <1 x i64> %tmp75 to <4 x i16> %tmp76 = and <4 x i16> %tmp70a, %tmp75a - %tmp76a = bitcast <4 x i16> %tmp76 to x86_mmx - store x86_mmx %tmp76a, ptr %A - %tmp81 = load x86_mmx, ptr %B - %tmp76b = bitcast x86_mmx %tmp76a to <4 x i16> - %tmp81a = bitcast x86_mmx %tmp81 to <4 x i16> + %tmp76a = bitcast <4 x i16> %tmp76 to <1 x i64> + store <1 x i64> %tmp76a, ptr %A + %tmp81 = load <1 x i64>, ptr %B + %tmp76b = bitcast <1 x i64> %tmp76a to <4 x i16> + %tmp81a = bitcast <1 x i64> %tmp81 to <4 x i16> %tmp82 = or <4 x i16> %tmp76b, %tmp81a - %tmp82a = bitcast <4 x i16> %tmp82 to x86_mmx - store x86_mmx %tmp82a, ptr %A - %tmp87 = load x86_mmx, ptr %B - %tmp82b = bitcast x86_mmx %tmp82a to <4 x i16> - %tmp87a = bitcast x86_mmx %tmp87 to <4 x i16> + %tmp82a = bitcast <4 x i16> %tmp82 to <1 x i64> + store <1 x i64> %tmp82a, ptr %A + %tmp87 = load <1 x i64>, ptr %B + %tmp82b = bitcast <1 x i64> %tmp82a to <4 x i16> + %tmp87a = bitcast <1 x i64> %tmp87 to <4 x i16> %tmp88 = xor <4 x i16> %tmp82b, %tmp87a - %tmp88a = bitcast <4 x i16> %tmp88 to x86_mmx - store x86_mmx %tmp88a, ptr %A + %tmp88a = bitcast <4 x i16> %tmp88 to <1 x i64> + store <1 x i64> %tmp88a, ptr %A tail call void @llvm.x86.mmx.emms( ) ret void } @@ -587,10 +586,10 @@ define void @ti8a(double %a, double %b) nounwind { ; X64-NEXT: movq %mm1, 0 ; X64-NEXT: retq entry: - %tmp1 = bitcast double %a to x86_mmx - %tmp2 = bitcast double %b to x86_mmx - %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %tmp1, x86_mmx %tmp2) - store x86_mmx %tmp3, ptr null + %tmp1 = bitcast double %a to <1 x i64> + %tmp2 = bitcast double %b to <1 x i64> + %tmp3 = tail call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> %tmp1, <1 x i64> %tmp2) + store <1 x i64> %tmp3, ptr null ret void } @@ -610,10 +609,10 @@ define void @ti16a(double %a, double %b) nounwind { ; X64-NEXT: movq %mm1, 0 ; X64-NEXT: retq entry: - %tmp1 = bitcast double %a to x86_mmx - %tmp2 = bitcast double %b to x86_mmx - %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %tmp1, x86_mmx %tmp2) - store x86_mmx %tmp3, ptr null + %tmp1 = bitcast double %a to <1 x i64> + %tmp2 = bitcast double %b to <1 x i64> + %tmp3 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %tmp1, <1 x i64> %tmp2) + store <1 x i64> %tmp3, ptr null ret void } @@ -633,10 +632,10 @@ define void @ti32a(double %a, double %b) nounwind { ; X64-NEXT: movq %mm1, 0 ; X64-NEXT: retq entry: - %tmp1 = bitcast double %a to x86_mmx - %tmp2 = bitcast double %b to x86_mmx - %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %tmp1, x86_mmx %tmp2) - store x86_mmx %tmp3, ptr null + %tmp1 = bitcast double %a to <1 x i64> + %tmp2 = bitcast double %b to <1 x i64> + %tmp3 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %tmp1, <1 x i64> %tmp2) + store <1 x i64> %tmp3, ptr null ret void } @@ -656,10 +655,10 @@ define void @ti64a(double %a, double %b) nounwind { ; X64-NEXT: movq %mm1, 0 ; X64-NEXT: retq entry: - %tmp1 = bitcast double %a to x86_mmx - %tmp2 = bitcast double %b to x86_mmx - %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %tmp1, x86_mmx %tmp2) - store x86_mmx %tmp3, ptr null + %tmp1 = bitcast double %a to <1 x i64> + %tmp2 = bitcast double %b to <1 x i64> + %tmp3 = tail call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> %tmp1, <1 x i64> %tmp2) + store <1 x i64> %tmp3, ptr null ret void } @@ -687,28 +686,28 @@ define i64 @pr43922() nounwind { ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: - %0 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx bitcast (<2 x i32> to x86_mmx), i32 268435456) - %1 = bitcast x86_mmx %0 to i64 + %0 = tail call <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64> bitcast (<2 x i32> to <1 x i64>), i32 268435456) + %1 = bitcast <1 x i64> %0 to i64 ret i64 %1 } -declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) +declare <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64>, i32) -declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64>, <1 x i64>) -declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>) declare void @llvm.x86.mmx.emms() -declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64>, <1 x i64>) diff --git a/llvm/test/CodeGen/X86/mmx-bitcast-fold.ll b/llvm/test/CodeGen/X86/mmx-bitcast-fold.ll index a1240911cd36a1..fb2517f5a891be 100644 --- a/llvm/test/CodeGen/X86/mmx-bitcast-fold.ll +++ b/llvm/test/CodeGen/X86/mmx-bitcast-fold.ll @@ -4,9 +4,9 @@ define void @bar() { entry: - %0 = bitcast double 0.0 to x86_mmx - %1 = call x86_mmx @foo(x86_mmx %0) + %0 = bitcast double 0.0 to <1 x i64> + %1 = call <1 x i64> @foo(<1 x i64> %0) ret void } -declare x86_mmx @foo(x86_mmx) +declare <1 x i64> @foo(<1 x i64>) diff --git a/llvm/test/CodeGen/X86/mmx-bitcast.ll b/llvm/test/CodeGen/X86/mmx-bitcast.ll index 49c2027f06604e..5e5be820dd5b42 100644 --- a/llvm/test/CodeGen/X86/mmx-bitcast.ll +++ b/llvm/test/CodeGen/X86/mmx-bitcast.ll @@ -8,9 +8,9 @@ define i64 @t0(ptr %p) { ; CHECK-NEXT: paddq %mm0, %mm0 ; CHECK-NEXT: movq %mm0, %rax ; CHECK-NEXT: retq - %t = load x86_mmx, ptr %p - %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %t) - %s = bitcast x86_mmx %u to i64 + %t = load <1 x i64>, ptr %p + %u = tail call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> %t, <1 x i64> %t) + %s = bitcast <1 x i64> %u to i64 ret i64 %s } @@ -21,9 +21,9 @@ define i64 @t1(ptr %p) { ; CHECK-NEXT: paddd %mm0, %mm0 ; CHECK-NEXT: movq %mm0, %rax ; CHECK-NEXT: retq - %t = load x86_mmx, ptr %p - %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %t) - %s = bitcast x86_mmx %u to i64 + %t = load <1 x i64>, ptr %p + %u = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %t, <1 x i64> %t) + %s = bitcast <1 x i64> %u to i64 ret i64 %s } @@ -34,9 +34,9 @@ define i64 @t2(ptr %p) { ; CHECK-NEXT: paddw %mm0, %mm0 ; CHECK-NEXT: movq %mm0, %rax ; CHECK-NEXT: retq - %t = load x86_mmx, ptr %p - %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %t) - %s = bitcast x86_mmx %u to i64 + %t = load <1 x i64>, ptr %p + %u = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %t, <1 x i64> %t) + %s = bitcast <1 x i64> %u to i64 ret i64 %s } @@ -47,13 +47,13 @@ define i64 @t3(ptr %p) { ; CHECK-NEXT: paddb %mm0, %mm0 ; CHECK-NEXT: movq %mm0, %rax ; CHECK-NEXT: retq - %t = load x86_mmx, ptr %p - %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %t) - %s = bitcast x86_mmx %u to i64 + %t = load <1 x i64>, ptr %p + %u = tail call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> %t, <1 x i64> %t) + %s = bitcast <1 x i64> %u to i64 ret i64 %s } -@R = external global x86_mmx +@R = external global <1 x i64> define void @t4(<1 x i64> %A, <1 x i64> %B) { ; CHECK-LABEL: t4: @@ -66,10 +66,8 @@ define void @t4(<1 x i64> %A, <1 x i64> %B) { ; CHECK-NEXT: emms ; CHECK-NEXT: retq entry: - %tmp2 = bitcast <1 x i64> %A to x86_mmx - %tmp3 = bitcast <1 x i64> %B to x86_mmx - %tmp7 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %tmp2, x86_mmx %tmp3) - store x86_mmx %tmp7, ptr @R + %tmp7 = tail call <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64> %A, <1 x i64> %B) + store <1 x i64> %tmp7, ptr @R tail call void @llvm.x86.mmx.emms() ret void } @@ -88,7 +86,7 @@ define i64 @t5(i32 %a, i32 %b) nounwind readnone { ret i64 %conv } -declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) +declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) define <1 x i64> @t6(i64 %t) { ; CHECK-LABEL: t6: @@ -98,16 +96,14 @@ define <1 x i64> @t6(i64 %t) { ; CHECK-NEXT: movq %mm0, %rax ; CHECK-NEXT: retq %t1 = insertelement <1 x i64> undef, i64 %t, i32 0 - %t0 = bitcast <1 x i64> %t1 to x86_mmx - %t2 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %t0, i32 48) - %t3 = bitcast x86_mmx %t2 to <1 x i64> - ret <1 x i64> %t3 + %t2 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %t1, i32 48) + ret <1 x i64> %t2 } -declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64>, <1 x i64>) declare void @llvm.x86.mmx.emms() diff --git a/llvm/test/CodeGen/X86/mmx-build-vector.ll b/llvm/test/CodeGen/X86/mmx-build-vector.ll index b919c9a33ea2f9..d8a010bacc683d 100644 --- a/llvm/test/CodeGen/X86/mmx-build-vector.ll +++ b/llvm/test/CodeGen/X86/mmx-build-vector.ll @@ -8,7 +8,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx2 | FileCheck %s --check-prefix=X64 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx512f | FileCheck %s --check-prefix=X64 -declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64>, <1 x i64>) ; ; v2i32 @@ -35,9 +35,9 @@ define void @build_v2i32_01(ptr%p0, i32 %a0, i32 %a1) nounwind { ; X64-NEXT: retq %1 = insertelement <2 x i32> undef, i32 %a0, i32 0 %2 = insertelement <2 x i32> %1, i32 %a1, i32 1 - %3 = bitcast <2 x i32> %2 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) - store x86_mmx %4, ptr%p0 + %3 = bitcast <2 x i32> %2 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %3, <1 x i64> %3) + store <1 x i64> %4, ptr%p0 ret void } @@ -58,9 +58,9 @@ define void @build_v2i32_0z(ptr%p0, i32 %a0, i32 %a1) nounwind { ; X64-NEXT: retq %1 = insertelement <2 x i32> undef, i32 %a0, i32 0 %2 = insertelement <2 x i32> %1, i32 0, i32 1 - %3 = bitcast <2 x i32> %2 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) - store x86_mmx %4, ptr%p0 + %3 = bitcast <2 x i32> %2 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %3, <1 x i64> %3) + store <1 x i64> %4, ptr%p0 ret void } @@ -92,9 +92,9 @@ define void @build_v2i32_u1(ptr%p0, i32 %a0, i32 %a1) nounwind { ; X64-NEXT: retq %1 = insertelement <2 x i32> undef, i32 undef, i32 0 %2 = insertelement <2 x i32> %1, i32 %a1, i32 1 - %3 = bitcast <2 x i32> %2 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) - store x86_mmx %4, ptr%p0 + %3 = bitcast <2 x i32> %2 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %3, <1 x i64> %3) + store <1 x i64> %4, ptr%p0 ret void } @@ -119,9 +119,9 @@ define void @build_v2i32_z1(ptr%p0, i32 %a0, i32 %a1) nounwind { ; X64-NEXT: retq %1 = insertelement <2 x i32> undef, i32 0, i32 0 %2 = insertelement <2 x i32> %1, i32 %a1, i32 1 - %3 = bitcast <2 x i32> %2 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) - store x86_mmx %4, ptr%p0 + %3 = bitcast <2 x i32> %2 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %3, <1 x i64> %3) + store <1 x i64> %4, ptr%p0 ret void } @@ -153,9 +153,9 @@ define void @build_v2i32_00(ptr%p0, i32 %a0, i32 %a1) nounwind { ; X64-NEXT: retq %1 = insertelement <2 x i32> undef, i32 %a0, i32 0 %2 = insertelement <2 x i32> %1, i32 %a0, i32 1 - %3 = bitcast <2 x i32> %2 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) - store x86_mmx %4, ptr%p0 + %3 = bitcast <2 x i32> %2 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %3, <1 x i64> %3) + store <1 x i64> %4, ptr%p0 ret void } @@ -194,9 +194,9 @@ define void @build_v4i16_0123(ptr%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwi %2 = insertelement <4 x i16> %1, i16 %a1, i32 1 %3 = insertelement <4 x i16> %2, i16 %a2, i32 2 %4 = insertelement <4 x i16> %3, i16 %a3, i32 3 - %5 = bitcast <4 x i16> %4 to x86_mmx - %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) - store x86_mmx %6, ptr%p0 + %5 = bitcast <4 x i16> %4 to <1 x i64> + %6 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %5, <1 x i64> %5) + store <1 x i64> %6, ptr%p0 ret void } @@ -229,9 +229,9 @@ define void @build_v4i16_01zz(ptr%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwi %2 = insertelement <4 x i16> %1, i16 %a1, i32 1 %3 = insertelement <4 x i16> %2, i16 0, i32 2 %4 = insertelement <4 x i16> %3, i16 0, i32 3 - %5 = bitcast <4 x i16> %4 to x86_mmx - %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) - store x86_mmx %6, ptr%p0 + %5 = bitcast <4 x i16> %4 to <1 x i64> + %6 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %5, <1 x i64> %5) + store <1 x i64> %6, ptr%p0 ret void } @@ -254,9 +254,9 @@ define void @build_v4i16_0uuz(ptr%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwi %2 = insertelement <4 x i16> %1, i16 undef, i32 1 %3 = insertelement <4 x i16> %2, i16 undef, i32 2 %4 = insertelement <4 x i16> %3, i16 0, i32 3 - %5 = bitcast <4 x i16> %4 to x86_mmx - %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) - store x86_mmx %6, ptr%p0 + %5 = bitcast <4 x i16> %4 to <1 x i64> + %6 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %5, <1 x i64> %5) + store <1 x i64> %6, ptr%p0 ret void } @@ -281,9 +281,9 @@ define void @build_v4i16_0zuz(ptr%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwi %2 = insertelement <4 x i16> %1, i16 0, i32 1 %3 = insertelement <4 x i16> %2, i16 undef, i32 2 %4 = insertelement <4 x i16> %3, i16 0, i32 3 - %5 = bitcast <4 x i16> %4 to x86_mmx - %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) - store x86_mmx %6, ptr%p0 + %5 = bitcast <4 x i16> %4 to <1 x i64> + %6 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %5, <1 x i64> %5) + store <1 x i64> %6, ptr%p0 ret void } @@ -316,9 +316,9 @@ define void @build_v4i16_012u(ptr%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwi %2 = insertelement <4 x i16> %1, i16 %a1, i32 1 %3 = insertelement <4 x i16> %2, i16 %a2, i32 2 %4 = insertelement <4 x i16> %3, i16 undef, i32 3 - %5 = bitcast <4 x i16> %4 to x86_mmx - %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) - store x86_mmx %6, ptr%p0 + %5 = bitcast <4 x i16> %4 to <1 x i64> + %6 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %5, <1 x i64> %5) + store <1 x i64> %6, ptr%p0 ret void } @@ -353,9 +353,9 @@ define void @build_v4i16_0u00(ptr%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwi %2 = insertelement <4 x i16> %1, i16 undef, i32 1 %3 = insertelement <4 x i16> %2, i16 %a0, i32 2 %4 = insertelement <4 x i16> %3, i16 %a0, i32 3 - %5 = bitcast <4 x i16> %4 to x86_mmx - %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) - store x86_mmx %6, ptr%p0 + %5 = bitcast <4 x i16> %4 to <1 x i64> + %6 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %5, <1 x i64> %5) + store <1 x i64> %6, ptr%p0 ret void } @@ -414,9 +414,9 @@ define void @build_v8i8_01234567(ptr%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, %6 = insertelement <8 x i8> %5, i8 %a5, i32 5 %7 = insertelement <8 x i8> %6, i8 %a6, i32 6 %8 = insertelement <8 x i8> %7, i8 %a7, i32 7 - %9 = bitcast <8 x i8> %8 to x86_mmx - %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9) - store x86_mmx %10, ptr%p0 + %9 = bitcast <8 x i8> %8 to <1 x i64> + %10 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %9, <1 x i64> %9) + store <1 x i64> %10, ptr%p0 ret void } @@ -469,9 +469,9 @@ define void @build_v8i8_0u2345z7(ptr%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, %6 = insertelement <8 x i8> %5, i8 %a5, i32 5 %7 = insertelement <8 x i8> %6, i8 0, i32 6 %8 = insertelement <8 x i8> %7, i8 %a7, i32 7 - %9 = bitcast <8 x i8> %8 to x86_mmx - %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9) - store x86_mmx %10, ptr%p0 + %9 = bitcast <8 x i8> %8 to <1 x i64> + %10 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %9, <1 x i64> %9) + store <1 x i64> %10, ptr%p0 ret void } @@ -522,9 +522,9 @@ define void @build_v8i8_0123zzzu(ptr%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, %6 = insertelement <8 x i8> %5, i8 0, i32 5 %7 = insertelement <8 x i8> %6, i8 0, i32 6 %8 = insertelement <8 x i8> %7, i8 undef, i32 7 - %9 = bitcast <8 x i8> %8 to x86_mmx - %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9) - store x86_mmx %10, ptr%p0 + %9 = bitcast <8 x i8> %8 to <1 x i64> + %10 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %9, <1 x i64> %9) + store <1 x i64> %10, ptr%p0 ret void } @@ -551,9 +551,9 @@ define void @build_v8i8_0uuuuzzz(ptr%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, %6 = insertelement <8 x i8> %5, i8 0, i32 5 %7 = insertelement <8 x i8> %6, i8 0, i32 6 %8 = insertelement <8 x i8> %7, i8 0, i32 7 - %9 = bitcast <8 x i8> %8 to x86_mmx - %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9) - store x86_mmx %10, ptr%p0 + %9 = bitcast <8 x i8> %8 to <1 x i64> + %10 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %9, <1 x i64> %9) + store <1 x i64> %10, ptr%p0 ret void } @@ -582,9 +582,9 @@ define void @build_v8i8_0zzzzzzu(ptr%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, %6 = insertelement <8 x i8> %5, i8 0, i32 5 %7 = insertelement <8 x i8> %6, i8 0, i32 6 %8 = insertelement <8 x i8> %7, i8 undef, i32 7 - %9 = bitcast <8 x i8> %8 to x86_mmx - %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9) - store x86_mmx %10, ptr%p0 + %9 = bitcast <8 x i8> %8 to <1 x i64> + %10 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %9, <1 x i64> %9) + store <1 x i64> %10, ptr%p0 ret void } @@ -626,9 +626,9 @@ define void @build_v8i8_00000000(ptr%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, %6 = insertelement <8 x i8> %5, i8 %a0, i32 5 %7 = insertelement <8 x i8> %6, i8 %a0, i32 6 %8 = insertelement <8 x i8> %7, i8 %a0, i32 7 - %9 = bitcast <8 x i8> %8 to x86_mmx - %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9) - store x86_mmx %10, ptr%p0 + %9 = bitcast <8 x i8> %8 to <1 x i64> + %10 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %9, <1 x i64> %9) + store <1 x i64> %10, ptr%p0 ret void } @@ -669,9 +669,9 @@ define void @build_v2f32_01(ptr%p0, float %a0, float %a1) nounwind { ; X64-NEXT: retq %1 = insertelement <2 x float> undef, float %a0, i32 0 %2 = insertelement <2 x float> %1, float %a1, i32 1 - %3 = bitcast <2 x float> %2 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) - store x86_mmx %4, ptr%p0 + %3 = bitcast <2 x float> %2 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %3, <1 x i64> %3) + store <1 x i64> %4, ptr%p0 ret void } @@ -707,9 +707,9 @@ define void @build_v2f32_0z(ptr%p0, float %a0, float %a1) nounwind { ; X64-NEXT: retq %1 = insertelement <2 x float> undef, float %a0, i32 0 %2 = insertelement <2 x float> %1, float 0.0, i32 1 - %3 = bitcast <2 x float> %2 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) - store x86_mmx %4, ptr%p0 + %3 = bitcast <2 x float> %2 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %3, <1 x i64> %3) + store <1 x i64> %4, ptr%p0 ret void } @@ -742,9 +742,9 @@ define void @build_v2f32_u1(ptr%p0, float %a0, float %a1) nounwind { ; X64-NEXT: retq %1 = insertelement <2 x float> undef, float undef, i32 0 %2 = insertelement <2 x float> %1, float %a1, i32 1 - %3 = bitcast <2 x float> %2 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) - store x86_mmx %4, ptr%p0 + %3 = bitcast <2 x float> %2 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %3, <1 x i64> %3) + store <1 x i64> %4, ptr%p0 ret void } @@ -780,9 +780,9 @@ define void @build_v2f32_z1(ptr%p0, float %a0, float %a1) nounwind { ; X64-NEXT: retq %1 = insertelement <2 x float> undef, float 0.0, i32 0 %2 = insertelement <2 x float> %1, float %a1, i32 1 - %3 = bitcast <2 x float> %2 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) - store x86_mmx %4, ptr%p0 + %3 = bitcast <2 x float> %2 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %3, <1 x i64> %3) + store <1 x i64> %4, ptr%p0 ret void } @@ -815,8 +815,8 @@ define void @build_v2f32_00(ptr%p0, float %a0, float %a1) nounwind { ; X64-NEXT: retq %1 = insertelement <2 x float> undef, float %a0, i32 0 %2 = insertelement <2 x float> %1, float %a0, i32 1 - %3 = bitcast <2 x float> %2 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) - store x86_mmx %4, ptr%p0 + %3 = bitcast <2 x float> %2 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %3, <1 x i64> %3) + store <1 x i64> %4, ptr%p0 ret void } diff --git a/llvm/test/CodeGen/X86/mmx-coalescing.ll b/llvm/test/CodeGen/X86/mmx-coalescing.ll index dac526fe20bbf0..589f5af4bb4d64 100644 --- a/llvm/test/CodeGen/X86/mmx-coalescing.ll +++ b/llvm/test/CodeGen/X86/mmx-coalescing.ll @@ -42,9 +42,9 @@ entry: %SA2 = getelementptr inbounds %SA, ptr %pSA, i64 0, i32 4 %v3 = load ptr, ptr %SA2, align 8 %v4 = bitcast <1 x i64> %v0 to <4 x i16> - %v5 = bitcast <4 x i16> %v4 to x86_mmx - %v6 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v5, i8 -18) - %v7 = bitcast x86_mmx %v6 to <4 x i16> + %v5 = bitcast <4 x i16> %v4 to <1 x i64> + %v6 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %v5, i8 -18) + %v7 = bitcast <1 x i64> %v6 to <4 x i16> %v8 = bitcast <4 x i16> %v7 to <1 x i64> %v9 = extractelement <1 x i64> %v8, i32 0 %v10 = bitcast i64 %v9 to <2 x i32> @@ -55,18 +55,18 @@ entry: if.A: %pa = phi <1 x i64> [ %v8, %entry ], [ %vx, %if.C ] %v17 = extractelement <1 x i64> %pa, i32 0 - %v18 = bitcast i64 %v17 to x86_mmx - %v19 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %v18, i32 %B) #2 - %v20 = bitcast x86_mmx %v19 to i64 + %v18 = bitcast i64 %v17 to <1 x i64> + %v19 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %v18, i32 %B) #2 + %v20 = bitcast <1 x i64> %v19 to i64 %v21 = insertelement <1 x i64> undef, i64 %v20, i32 0 %cmp3 = icmp eq i64 %v20, 0 br i1 %cmp3, label %if.C, label %merge if.B: %v34 = bitcast <1 x i64> %v8 to <4 x i16> - %v35 = bitcast <4 x i16> %v34 to x86_mmx - %v36 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v35, i8 -18) - %v37 = bitcast x86_mmx %v36 to <4 x i16> + %v35 = bitcast <4 x i16> %v34 to <1 x i64> + %v36 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %v35, i8 -18) + %v37 = bitcast <1 x i64> %v36 to <4 x i16> %v38 = bitcast <4 x i16> %v37 to <1 x i64> br label %if.C @@ -80,9 +80,9 @@ if.C: merge: %vy = phi <1 x i64> [ %v21, %if.A ], [ %vx, %if.C ] %v130 = bitcast <1 x i64> %vy to <4 x i16> - %v131 = bitcast <4 x i16> %v130 to x86_mmx - %v132 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v131, i8 -18) - %v133 = bitcast x86_mmx %v132 to <4 x i16> + %v131 = bitcast <4 x i16> %v130 to <1 x i64> + %v132 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %v131, i8 -18) + %v133 = bitcast <1 x i64> %v132 to <4 x i16> %v134 = bitcast <4 x i16> %v133 to <1 x i64> %v135 = extractelement <1 x i64> %v134, i32 0 %v136 = bitcast i64 %v135 to <2 x i32> @@ -91,5 +91,5 @@ merge: } -declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) -declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) +declare <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64>, i8) +declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) diff --git a/llvm/test/CodeGen/X86/mmx-cvt.ll b/llvm/test/CodeGen/X86/mmx-cvt.ll index 11473f3f6c2363..51a71dab37f6da 100644 --- a/llvm/test/CodeGen/X86/mmx-cvt.ll +++ b/llvm/test/CodeGen/X86/mmx-cvt.ll @@ -23,9 +23,9 @@ define void @cvt_v2f64_v2i32(<2 x double>, ptr) nounwind { %3 = tail call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %0) %4 = bitcast <4 x i32> %3 to <2 x i64> %5 = extractelement <2 x i64> %4, i32 0 - %6 = bitcast i64 %5 to x86_mmx - %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6) - %8 = bitcast x86_mmx %7 to i64 + %6 = bitcast i64 %5 to <1 x i64> + %7 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %6, <1 x i64> %6) + %8 = bitcast <1 x i64> %7 to i64 %9 = insertelement <1 x i64> undef, i64 %8, i32 0 store <1 x i64> %9, ptr %1 ret void @@ -49,9 +49,9 @@ define void @cvtt_v2f64_v2i32(<2 x double>, ptr) nounwind { %3 = tail call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %0) %4 = bitcast <4 x i32> %3 to <2 x i64> %5 = extractelement <2 x i64> %4, i32 0 - %6 = bitcast i64 %5 to x86_mmx - %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6) - %8 = bitcast x86_mmx %7 to i64 + %6 = bitcast i64 %5 to <1 x i64> + %7 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %6, <1 x i64> %6) + %8 = bitcast <1 x i64> %7 to i64 %9 = insertelement <1 x i64> undef, i64 %8, i32 0 store <1 x i64> %9, ptr %1 ret void @@ -73,9 +73,9 @@ define void @fptosi_v2f64_v2i32(<2 x double>, ptr) nounwind { ; X64-NEXT: movq %mm0, (%rdi) ; X64-NEXT: retq %3 = fptosi <2 x double> %0 to <2 x i32> - %4 = bitcast <2 x i32> %3 to x86_mmx - %5 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %4, x86_mmx %4) - %6 = bitcast x86_mmx %5 to i64 + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %4, <1 x i64> %4) + %6 = bitcast <1 x i64> %5 to i64 %7 = insertelement <1 x i64> undef, i64 %6, i32 0 store <1 x i64> %7, ptr %1 ret void @@ -99,9 +99,9 @@ define void @cvt_v2f32_v2i32(<4 x float>, ptr) nounwind { %3 = tail call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %0) %4 = bitcast <4 x i32> %3 to <2 x i64> %5 = extractelement <2 x i64> %4, i32 0 - %6 = bitcast i64 %5 to x86_mmx - %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6) - %8 = bitcast x86_mmx %7 to i64 + %6 = bitcast i64 %5 to <1 x i64> + %7 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %6, <1 x i64> %6) + %8 = bitcast <1 x i64> %7 to i64 %9 = insertelement <1 x i64> undef, i64 %8, i32 0 store <1 x i64> %9, ptr %1 ret void @@ -125,9 +125,9 @@ define void @cvtt_v2f32_v2i32(<4 x float>, ptr) nounwind { %3 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %0) %4 = bitcast <4 x i32> %3 to <2 x i64> %5 = extractelement <2 x i64> %4, i32 0 - %6 = bitcast i64 %5 to x86_mmx - %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6) - %8 = bitcast x86_mmx %7 to i64 + %6 = bitcast i64 %5 to <1 x i64> + %7 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %6, <1 x i64> %6) + %8 = bitcast <1 x i64> %7 to i64 %9 = insertelement <1 x i64> undef, i64 %8, i32 0 store <1 x i64> %9, ptr %1 ret void @@ -150,9 +150,9 @@ define void @fptosi_v4f32_v4i32(<4 x float>, ptr) nounwind { ; X64-NEXT: retq %3 = fptosi <4 x float> %0 to <4 x i32> %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <2 x i32> - %5 = bitcast <2 x i32> %4 to x86_mmx - %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) - %7 = bitcast x86_mmx %6 to i64 + %5 = bitcast <2 x i32> %4 to <1 x i64> + %6 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %5, <1 x i64> %5) + %7 = bitcast <1 x i64> %6 to i64 %8 = insertelement <1 x i64> undef, i64 %7, i32 0 store <1 x i64> %8, ptr %1 ret void @@ -176,9 +176,9 @@ define void @fptosi_v2f32_v2i32(<4 x float>, ptr) nounwind { %3 = fptosi <4 x float> %0 to <4 x i32> %4 = bitcast <4 x i32> %3 to <2 x i64> %5 = extractelement <2 x i64> %4, i32 0 - %6 = bitcast i64 %5 to x86_mmx - %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6) - %8 = bitcast x86_mmx %7 to i64 + %6 = bitcast i64 %5 to <1 x i64> + %7 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %6, <1 x i64> %6) + %8 = bitcast <1 x i64> %7 to i64 %9 = insertelement <1 x i64> undef, i64 %8, i32 0 store <1 x i64> %9, ptr %1 ret void @@ -210,9 +210,9 @@ define <2 x double> @sitofp_v2i32_v2f64(ptr) nounwind { ; X64-NEXT: movq2dq %mm0, %xmm0 ; X64-NEXT: cvtdq2pd %xmm0, %xmm0 ; X64-NEXT: retq - %2 = load x86_mmx, ptr %0, align 8 - %3 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 + %2 = load <1 x i64>, ptr %0, align 8 + %3 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %2, <1 x i64> %2) + %4 = bitcast <1 x i64> %3 to i64 %5 = insertelement <2 x i64> undef, i64 %4, i32 0 %6 = bitcast <2 x i64> %5 to <4 x i32> %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <2 x i32> @@ -237,9 +237,9 @@ define <4 x float> @sitofp_v2i32_v2f32(ptr) nounwind { ; X64-NEXT: movq2dq %mm0, %xmm0 ; X64-NEXT: cvtdq2ps %xmm0, %xmm0 ; X64-NEXT: retq - %2 = load x86_mmx, ptr %0, align 8 - %3 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %2) - %4 = bitcast x86_mmx %3 to <2 x i32> + %2 = load <1 x i64>, ptr %0, align 8 + %3 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %2, <1 x i64> %2) + %4 = bitcast <1 x i64> %3 to <2 x i32> %5 = shufflevector <2 x i32> %4, <2 x i32> zeroinitializer, <4 x i32> %6 = sitofp <4 x i32> %5 to <4 x float> ret <4 x float> %6 @@ -269,9 +269,9 @@ define <4 x float> @cvt_v2i32_v2f32(ptr) nounwind { ; X64-NEXT: movq2dq %mm0, %xmm0 ; X64-NEXT: cvtdq2ps %xmm0, %xmm0 ; X64-NEXT: retq - %2 = load x86_mmx, ptr %0, align 8 - %3 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 + %2 = load <1 x i64>, ptr %0, align 8 + %3 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %2, <1 x i64> %2) + %4 = bitcast <1 x i64> %3 to i64 %5 = insertelement <2 x i64> undef, i64 %4, i32 0 %6 = insertelement <2 x i64> %5, i64 0, i32 1 %7 = bitcast <2 x i64> %6 to <4 x i32> @@ -279,7 +279,7 @@ define <4 x float> @cvt_v2i32_v2f32(ptr) nounwind { ret <4 x float> %8 } -declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64>, <1 x i64>) declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) diff --git a/llvm/test/CodeGen/X86/mmx-fold-load.ll b/llvm/test/CodeGen/X86/mmx-fold-load.ll index a31339902bb645..6fe3bc4973185f 100644 --- a/llvm/test/CodeGen/X86/mmx-fold-load.ll +++ b/llvm/test/CodeGen/X86/mmx-fold-load.ll @@ -29,13 +29,13 @@ define i64 @t0(ptr %a, ptr %b) nounwind { ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: - %0 = load x86_mmx, ptr %a, align 8 + %0 = load <1 x i64>, ptr %a, align 8 %1 = load i32, ptr %b, align 4 - %2 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %0, i32 %1) - %3 = bitcast x86_mmx %2 to i64 + %2 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %0, i32 %1) + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) +declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) define i64 @t1(ptr %a, ptr %b) nounwind { ; X86-LABEL: t1: @@ -64,13 +64,13 @@ define i64 @t1(ptr %a, ptr %b) nounwind { ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: - %0 = load x86_mmx, ptr %a, align 8 + %0 = load <1 x i64>, ptr %a, align 8 %1 = load i32, ptr %b, align 4 - %2 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %0, i32 %1) - %3 = bitcast x86_mmx %2 to i64 + %2 = tail call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %0, i32 %1) + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) +declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) define i64 @t2(ptr %a, ptr %b) nounwind { ; X86-LABEL: t2: @@ -99,13 +99,13 @@ define i64 @t2(ptr %a, ptr %b) nounwind { ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: - %0 = load x86_mmx, ptr %a, align 8 + %0 = load <1 x i64>, ptr %a, align 8 %1 = load i32, ptr %b, align 4 - %2 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %0, i32 %1) - %3 = bitcast x86_mmx %2 to i64 + %2 = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> %0, i32 %1) + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) +declare <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64>, i32) define i64 @t3(ptr %a, ptr %b) nounwind { ; X86-LABEL: t3: @@ -134,13 +134,13 @@ define i64 @t3(ptr %a, ptr %b) nounwind { ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: - %0 = load x86_mmx, ptr %a, align 8 + %0 = load <1 x i64>, ptr %a, align 8 %1 = load i32, ptr %b, align 4 - %2 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %0, i32 %1) - %3 = bitcast x86_mmx %2 to i64 + %2 = tail call <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64> %0, i32 %1) + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) +declare <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64>, i32) define i64 @t4(ptr %a, ptr %b) nounwind { ; X86-LABEL: t4: @@ -169,13 +169,13 @@ define i64 @t4(ptr %a, ptr %b) nounwind { ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: - %0 = load x86_mmx, ptr %a, align 8 + %0 = load <1 x i64>, ptr %a, align 8 %1 = load i32, ptr %b, align 4 - %2 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %0, i32 %1) - %3 = bitcast x86_mmx %2 to i64 + %2 = tail call <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64> %0, i32 %1) + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) +declare <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64>, i32) define i64 @t5(ptr %a, ptr %b) nounwind { ; X86-LABEL: t5: @@ -204,13 +204,13 @@ define i64 @t5(ptr %a, ptr %b) nounwind { ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: - %0 = load x86_mmx, ptr %a, align 8 + %0 = load <1 x i64>, ptr %a, align 8 %1 = load i32, ptr %b, align 4 - %2 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %0, i32 %1) - %3 = bitcast x86_mmx %2 to i64 + %2 = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> %0, i32 %1) + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) +declare <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64>, i32) define i64 @t6(ptr %a, ptr %b) nounwind { ; X86-LABEL: t6: @@ -239,13 +239,13 @@ define i64 @t6(ptr %a, ptr %b) nounwind { ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: - %0 = load x86_mmx, ptr %a, align 8 + %0 = load <1 x i64>, ptr %a, align 8 %1 = load i32, ptr %b, align 4 - %2 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %0, i32 %1) - %3 = bitcast x86_mmx %2 to i64 + %2 = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> %0, i32 %1) + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) +declare <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64>, i32) define i64 @t7(ptr %a, ptr %b) nounwind { ; X86-LABEL: t7: @@ -274,15 +274,15 @@ define i64 @t7(ptr %a, ptr %b) nounwind { ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: - %0 = load x86_mmx, ptr %a, align 8 + %0 = load <1 x i64>, ptr %a, align 8 %1 = load i32, ptr %b, align 4 - %2 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %0, i32 %1) - %3 = bitcast x86_mmx %2 to i64 + %2 = tail call <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64> %0, i32 %1) + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) +declare <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64>, i32) -define i64 @tt0(x86_mmx %t, ptr %q) nounwind { +define i64 @tt0(<1 x i64> %t, ptr %q) nounwind { ; X86-LABEL: tt0: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp @@ -312,16 +312,16 @@ define i64 @tt0(x86_mmx %t, ptr %q) nounwind { ; X64-NEXT: emms ; X64-NEXT: retq entry: - %v = load x86_mmx, ptr %q - %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %v) - %s = bitcast x86_mmx %u to i64 + %v = load <1 x i64>, ptr %q + %u = tail call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> %t, <1 x i64> %v) + %s = bitcast <1 x i64> %u to i64 call void @llvm.x86.mmx.emms() ret i64 %s } -declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64>, <1 x i64>) declare void @llvm.x86.mmx.emms() -define i64 @tt1(x86_mmx %t, ptr %q) nounwind { +define i64 @tt1(<1 x i64> %t, ptr %q) nounwind { ; X86-LABEL: tt1: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp @@ -351,15 +351,15 @@ define i64 @tt1(x86_mmx %t, ptr %q) nounwind { ; X64-NEXT: emms ; X64-NEXT: retq entry: - %v = load x86_mmx, ptr %q - %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %v) - %s = bitcast x86_mmx %u to i64 + %v = load <1 x i64>, ptr %q + %u = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %t, <1 x i64> %v) + %s = bitcast <1 x i64> %u to i64 call void @llvm.x86.mmx.emms() ret i64 %s } -declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64>, <1 x i64>) -define i64 @tt2(x86_mmx %t, ptr %q) nounwind { +define i64 @tt2(<1 x i64> %t, ptr %q) nounwind { ; X86-LABEL: tt2: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp @@ -389,15 +389,15 @@ define i64 @tt2(x86_mmx %t, ptr %q) nounwind { ; X64-NEXT: emms ; X64-NEXT: retq entry: - %v = load x86_mmx, ptr %q - %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %v) - %s = bitcast x86_mmx %u to i64 + %v = load <1 x i64>, ptr %q + %u = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %t, <1 x i64> %v) + %s = bitcast <1 x i64> %u to i64 call void @llvm.x86.mmx.emms() ret i64 %s } -declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64>, <1 x i64>) -define i64 @tt3(x86_mmx %t, ptr %q) nounwind { +define i64 @tt3(<1 x i64> %t, ptr %q) nounwind { ; X86-LABEL: tt3: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp @@ -427,15 +427,15 @@ define i64 @tt3(x86_mmx %t, ptr %q) nounwind { ; X64-NEXT: emms ; X64-NEXT: retq entry: - %v = load x86_mmx, ptr %q - %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %v) - %s = bitcast x86_mmx %u to i64 + %v = load <1 x i64>, ptr %q + %u = tail call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> %t, <1 x i64> %v) + %s = bitcast <1 x i64> %u to i64 call void @llvm.x86.mmx.emms() ret i64 %s } -declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64>, <1 x i64>) -define i64 @tt4(x86_mmx %t, ptr %q) nounwind { +define i64 @tt4(<1 x i64> %t, ptr %q) nounwind { ; X86-LABEL: tt4: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp @@ -465,15 +465,15 @@ define i64 @tt4(x86_mmx %t, ptr %q) nounwind { ; X64-NEXT: emms ; X64-NEXT: retq entry: - %v = load x86_mmx, ptr %q - %u = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %t, x86_mmx %v) - %s = bitcast x86_mmx %u to i64 + %v = load <1 x i64>, ptr %q + %u = tail call <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64> %t, <1 x i64> %v) + %s = bitcast <1 x i64> %u to i64 call void @llvm.x86.mmx.emms() ret i64 %s } -declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64>, <1 x i64>) -define i64 @tt5(x86_mmx %t, ptr %q) nounwind { +define i64 @tt5(<1 x i64> %t, ptr %q) nounwind { ; X86-LABEL: tt5: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp @@ -503,15 +503,15 @@ define i64 @tt5(x86_mmx %t, ptr %q) nounwind { ; X64-NEXT: emms ; X64-NEXT: retq entry: - %v = load x86_mmx, ptr %q - %u = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %t, x86_mmx %v) - %s = bitcast x86_mmx %u to i64 + %v = load <1 x i64>, ptr %q + %u = tail call <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64> %t, <1 x i64> %v) + %s = bitcast <1 x i64> %u to i64 call void @llvm.x86.mmx.emms() ret i64 %s } -declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64>, <1 x i64>) -define i64 @tt6(x86_mmx %t, ptr %q) nounwind { +define i64 @tt6(<1 x i64> %t, ptr %q) nounwind { ; X86-LABEL: tt6: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp @@ -541,15 +541,15 @@ define i64 @tt6(x86_mmx %t, ptr %q) nounwind { ; X64-NEXT: emms ; X64-NEXT: retq entry: - %v = load x86_mmx, ptr %q - %u = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %t, x86_mmx %v) - %s = bitcast x86_mmx %u to i64 + %v = load <1 x i64>, ptr %q + %u = tail call <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64> %t, <1 x i64> %v) + %s = bitcast <1 x i64> %u to i64 call void @llvm.x86.mmx.emms() ret i64 %s } -declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64>, <1 x i64>) -define i64 @tt7(x86_mmx %t, ptr %q) nounwind { +define i64 @tt7(<1 x i64> %t, ptr %q) nounwind { ; X86-LABEL: tt7: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp @@ -579,15 +579,15 @@ define i64 @tt7(x86_mmx %t, ptr %q) nounwind { ; X64-NEXT: emms ; X64-NEXT: retq entry: - %v = load x86_mmx, ptr %q - %u = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %t, x86_mmx %v) - %s = bitcast x86_mmx %u to i64 + %v = load <1 x i64>, ptr %q + %u = tail call <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64> %t, <1 x i64> %v) + %s = bitcast <1 x i64> %u to i64 call void @llvm.x86.mmx.emms() ret i64 %s } -declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64>, <1 x i64>) -define i64 @tt8(x86_mmx %t, ptr %q) nounwind { +define i64 @tt8(<1 x i64> %t, ptr %q) nounwind { ; X86-LABEL: tt8: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp @@ -617,13 +617,13 @@ define i64 @tt8(x86_mmx %t, ptr %q) nounwind { ; X64-NEXT: emms ; X64-NEXT: retq entry: - %v = load x86_mmx, ptr %q - %u = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %t, x86_mmx %v) - %s = bitcast x86_mmx %u to i64 + %v = load <1 x i64>, ptr %q + %u = tail call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> %t, <1 x i64> %v) + %s = bitcast <1 x i64> %u to i64 call void @llvm.x86.mmx.emms() ret i64 %s } -declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <1 x i64>) define void @test_psrlq_by_volatile_shift_amount(ptr %t) nounwind { ; X86-LABEL: test_psrlq_by_volatile_shift_amount: @@ -653,8 +653,8 @@ entry: call void @llvm.lifetime.start(i64 4, ptr nonnull %0) store volatile i32 1, ptr %0, align 4 %1 = load volatile i32, ptr %0, align 4 - %2 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx bitcast (<1 x i64> to x86_mmx), i32 %1) - store x86_mmx %2, ptr %t, align 8 + %2 = tail call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> , i32 %1) + store <1 x i64> %2, ptr %t, align 8 call void @llvm.lifetime.end(i64 4, ptr nonnull %0) ret void } @@ -663,7 +663,7 @@ declare void @llvm.lifetime.start(i64, ptr nocapture) declare void @llvm.lifetime.end(i64, ptr nocapture) ; Make sure we shrink this vector load and fold it. -define x86_mmx @vec_load(ptr %x) { +define <1 x i64> @vec_load(ptr %x) { ; X86-LABEL: vec_load: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp @@ -694,10 +694,10 @@ define x86_mmx @vec_load(ptr %x) { %y = extractelement <4 x float> %z, i32 0 %a = insertelement <2 x float> undef, float %y, i32 0 %b = insertelement <2 x float> %a, float %y, i32 1 - %c = bitcast <2 x float> %b to x86_mmx - %d = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %c, x86_mmx %c) - ret x86_mmx %d + %c = bitcast <2 x float> %b to <1 x i64> + %d = tail call <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64> %c, <1 x i64> %c) + ret <1 x i64> %d } -declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64>, <1 x i64>) diff --git a/llvm/test/CodeGen/X86/mmx-fold-zero.ll b/llvm/test/CodeGen/X86/mmx-fold-zero.ll index b2c94e3aaa3a65..a6e1275875dbc8 100644 --- a/llvm/test/CodeGen/X86/mmx-fold-zero.ll +++ b/llvm/test/CodeGen/X86/mmx-fold-zero.ll @@ -115,32 +115,32 @@ define double @mmx_zero(double, double, double, double) nounwind { ; X64-LARGE-NEXT: paddw %mm2, %mm0 ; X64-LARGE-NEXT: movq2dq %mm0, %xmm0 ; X64-LARGE-NEXT: retq - %5 = bitcast double %0 to x86_mmx - %6 = bitcast double %1 to x86_mmx - %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %6) - %8 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %7, x86_mmx bitcast (double 0.000000e+00 to x86_mmx)) - %9 = bitcast double %2 to x86_mmx - %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %8, x86_mmx %9) - %11 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %5, x86_mmx %10) - %12 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %6, x86_mmx %11) - %13 = bitcast double %3 to x86_mmx - %14 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %12, x86_mmx %13) - %15 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %14, x86_mmx %9) - %16 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %15, x86_mmx %13) - %17 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %16, x86_mmx %10) - %18 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %17, x86_mmx %11) - %19 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %18, x86_mmx %8) - %20 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %19, x86_mmx %7) - %21 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %20, x86_mmx bitcast (double 0.000000e+00 to x86_mmx)) - %22 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %21, x86_mmx %12) - %23 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %22, x86_mmx %15) - %24 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %23, x86_mmx %6) - %25 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %24, x86_mmx %16) - %26 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %25, x86_mmx %17) - %27 = bitcast x86_mmx %26 to double + %5 = bitcast double %0 to <1 x i64> + %6 = bitcast double %1 to <1 x i64> + %7 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %5, <1 x i64> %6) + %8 = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %7, <1 x i64> bitcast (double 0.000000e+00 to <1 x i64>)) + %9 = bitcast double %2 to <1 x i64> + %10 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %8, <1 x i64> %9) + %11 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %5, <1 x i64> %10) + %12 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %6, <1 x i64> %11) + %13 = bitcast double %3 to <1 x i64> + %14 = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %12, <1 x i64> %13) + %15 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %14, <1 x i64> %9) + %16 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %15, <1 x i64> %13) + %17 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %16, <1 x i64> %10) + %18 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %17, <1 x i64> %11) + %19 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %18, <1 x i64> %8) + %20 = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %19, <1 x i64> %7) + %21 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %20, <1 x i64> bitcast (double 0.000000e+00 to <1 x i64>)) + %22 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %21, <1 x i64> %12) + %23 = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %22, <1 x i64> %15) + %24 = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %23, <1 x i64> %6) + %25 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %24, <1 x i64> %16) + %26 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %25, <1 x i64> %17) + %27 = bitcast <1 x i64> %26 to double ret double %27 } -declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64>, <1 x i64>) diff --git a/llvm/test/CodeGen/X86/mmx-intrinsics.ll b/llvm/test/CodeGen/X86/mmx-intrinsics.ll index 69fc6361075449..a7b6ed416622ef 100644 --- a/llvm/test/CodeGen/X86/mmx-intrinsics.ll +++ b/llvm/test/CodeGen/X86/mmx-intrinsics.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefixes=ALL,X64 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+avx | FileCheck %s --check-prefixes=ALL,X64 -declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test1: @@ -40,16 +40,16 @@ define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %2 = bitcast <4 x i16> %1 to x86_mmx - %3 = bitcast <4 x i16> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <4 x i16> + %2 = bitcast <4 x i16> %1 to <1 x i64> + %3 = bitcast <4 x i16> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <4 x i16> %6 = bitcast <4 x i16> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test88: @@ -85,16 +85,16 @@ define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test87: @@ -130,16 +130,16 @@ define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test86: @@ -175,16 +175,16 @@ define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test85: @@ -220,16 +220,16 @@ define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test84: @@ -265,16 +265,16 @@ define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test83: @@ -310,16 +310,16 @@ define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test82: @@ -355,16 +355,16 @@ define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test81: @@ -400,16 +400,16 @@ define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test80: @@ -445,16 +445,16 @@ define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test79: @@ -490,16 +490,16 @@ define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test78: @@ -535,16 +535,16 @@ define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test77: @@ -580,16 +580,16 @@ define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test76: @@ -625,16 +625,16 @@ define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test75: @@ -670,16 +670,16 @@ define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test74: @@ -715,16 +715,16 @@ define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64>, i32) nounwind readnone define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test73: @@ -754,15 +754,15 @@ define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind - %2 = bitcast x86_mmx %1 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to <2 x i32> %3 = bitcast <2 x i32> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } -declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64>, i32) nounwind readnone define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test72: @@ -792,9 +792,9 @@ define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind - %2 = bitcast x86_mmx %1 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to <4 x i16> %3 = bitcast <4 x i16> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 @@ -825,15 +825,15 @@ define i64 @test72_2(<1 x i64> %a) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 0) nounwind - %2 = bitcast x86_mmx %1 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> %mmx_var.i, i32 0) nounwind + %2 = bitcast <1 x i64> %1 to <4 x i16> %3 = bitcast <4 x i16> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } -declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test71: @@ -859,13 +859,13 @@ define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 - %mmx_var.i = bitcast i64 %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind - %2 = bitcast x86_mmx %1 to i64 + %mmx_var.i = bitcast i64 %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to i64 ret i64 %2 } -declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64>, i32) nounwind readnone define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test70: @@ -895,9 +895,9 @@ define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind - %2 = bitcast x86_mmx %1 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to <2 x i32> %3 = bitcast <2 x i32> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 @@ -928,15 +928,15 @@ define i64 @test70_2(<1 x i64> %a) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 0) nounwind - %2 = bitcast x86_mmx %1 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> %mmx_var.i, i32 0) nounwind + %2 = bitcast <1 x i64> %1 to <2 x i32> %3 = bitcast <2 x i32> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } -declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64>, i32) nounwind readnone define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test69: @@ -966,15 +966,15 @@ define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind - %2 = bitcast x86_mmx %1 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to <4 x i16> %3 = bitcast <4 x i16> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } -declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test68: @@ -1000,13 +1000,13 @@ define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 - %mmx_var.i = bitcast i64 %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind - %2 = bitcast x86_mmx %1 to i64 + %mmx_var.i = bitcast i64 %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to i64 ret i64 %2 } -declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64>, i32) nounwind readnone define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test67: @@ -1036,15 +1036,15 @@ define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind - %2 = bitcast x86_mmx %1 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to <2 x i32> %3 = bitcast <2 x i32> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } -declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64>, i32) nounwind readnone define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test66: @@ -1074,9 +1074,9 @@ define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind - %2 = bitcast x86_mmx %1 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to <4 x i16> %3 = bitcast <4 x i16> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 @@ -1107,15 +1107,15 @@ define i64 @test66_2(<1 x i64> %a) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 0) nounwind - %2 = bitcast x86_mmx %1 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> %mmx_var.i, i32 0) nounwind + %2 = bitcast <1 x i64> %1 to <4 x i16> %3 = bitcast <4 x i16> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } -declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test65: @@ -1146,17 +1146,17 @@ define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1.i = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test64: @@ -1187,17 +1187,17 @@ define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1.i = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test63: @@ -1224,15 +1224,15 @@ define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 - %mmx_var.i = bitcast i64 %0 to x86_mmx + %mmx_var.i = bitcast i64 %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1.i = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to i64 + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test62: @@ -1263,17 +1263,17 @@ define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1.i = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test61: @@ -1304,17 +1304,17 @@ define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1.i = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test60: @@ -1341,15 +1341,15 @@ define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 - %mmx_var.i = bitcast i64 %0 to x86_mmx + %mmx_var.i = bitcast i64 %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1.i = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to i64 + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test59: @@ -1380,17 +1380,17 @@ define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1.i = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test58: @@ -1421,17 +1421,17 @@ define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1.i = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pxor(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test56: @@ -1467,16 +1467,16 @@ define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pxor(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.por(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test55: @@ -1512,16 +1512,16 @@ define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.por(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pandn(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test54: @@ -1557,16 +1557,16 @@ define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pandn(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pand(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test53: @@ -1602,16 +1602,16 @@ define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pand(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test52: @@ -1647,10 +1647,10 @@ define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 @@ -1690,16 +1690,16 @@ define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test50: @@ -1735,16 +1735,16 @@ define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test49: @@ -1780,16 +1780,16 @@ define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test48: @@ -1825,16 +1825,16 @@ define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test47: @@ -1870,16 +1870,16 @@ define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test46: @@ -1915,16 +1915,16 @@ define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test45: @@ -1960,10 +1960,10 @@ define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 @@ -1994,17 +1994,17 @@ define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 - %mmx_var = bitcast i64 %0 to x86_mmx + %mmx_var = bitcast i64 %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1 = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1) - %3 = bitcast x86_mmx %2 to i64 + %mmx_var1 = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64> %mmx_var, <1 x i64> %mmx_var1) + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64>, <1 x i64>) nounwind readnone -declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test43: @@ -2040,16 +2040,16 @@ define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test42: @@ -2085,16 +2085,16 @@ define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test41: @@ -2130,16 +2130,16 @@ define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test40: @@ -2175,16 +2175,16 @@ define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test39: @@ -2220,16 +2220,16 @@ define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test38: @@ -2265,16 +2265,16 @@ define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test37: @@ -2310,16 +2310,16 @@ define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test36: @@ -2346,15 +2346,15 @@ define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 - %mmx_var = bitcast i64 %0 to x86_mmx + %mmx_var = bitcast i64 %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1 = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1) - %3 = bitcast x86_mmx %2 to i64 + %mmx_var1 = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> %mmx_var, <1 x i64> %mmx_var1) + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test35: @@ -2390,16 +2390,16 @@ define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test34: @@ -2435,16 +2435,16 @@ define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test33: @@ -2480,16 +2480,16 @@ define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test32: @@ -2525,14 +2525,14 @@ define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to i64 + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test31: @@ -2568,16 +2568,16 @@ define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test30: @@ -2613,16 +2613,16 @@ define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test29: @@ -2658,16 +2658,16 @@ define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test28: @@ -2703,16 +2703,16 @@ define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test27: @@ -2748,16 +2748,16 @@ define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test26: @@ -2793,16 +2793,16 @@ define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare void @llvm.x86.mmx.movnt.dq(ptr, x86_mmx) nounwind +declare void @llvm.x86.mmx.movnt.dq(ptr, <1 x i64>) nounwind define void @test25(ptr %p, <1 x i64> %a) nounwind optsize ssp { ; X86-LABEL: test25: @@ -2819,12 +2819,12 @@ define void @test25(ptr %p, <1 x i64> %a) nounwind optsize ssp { ; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 - %mmx_var.i = bitcast i64 %0 to x86_mmx - tail call void @llvm.x86.mmx.movnt.dq(ptr %p, x86_mmx %mmx_var.i) nounwind + %mmx_var.i = bitcast i64 %0 to <1 x i64> + tail call void @llvm.x86.mmx.movnt.dq(ptr %p, <1 x i64> %mmx_var.i) nounwind ret void } -declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone +declare i32 @llvm.x86.mmx.pmovmskb(<1 x i64>) nounwind readnone define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test24: @@ -2850,12 +2850,12 @@ define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx - %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind + %mmx_var.i = bitcast <8 x i8> %0 to <1 x i64> + %1 = tail call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %mmx_var.i) nounwind ret i32 %1 } -declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, ptr) nounwind +declare void @llvm.x86.mmx.maskmovq(<1 x i64>, <1 x i64>, ptr) nounwind define void @test23(<1 x i64> %d, <1 x i64> %n, ptr %p) nounwind optsize ssp { ; X86-LABEL: test23: @@ -2892,13 +2892,13 @@ define void @test23(<1 x i64> %d, <1 x i64> %n, ptr %p) nounwind optsize ssp { entry: %0 = bitcast <1 x i64> %n to <8 x i8> %1 = bitcast <1 x i64> %d to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, ptr %p) nounwind + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + tail call void @llvm.x86.mmx.maskmovq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i, ptr %p) nounwind ret void } -declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test22: @@ -2934,16 +2934,16 @@ define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone +declare <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64>, i8) nounwind readnone define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test21: @@ -2972,9 +2972,9 @@ define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %1 = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone - %3 = bitcast x86_mmx %2 to <4 x i16> + %1 = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %1, i8 3) nounwind readnone + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 @@ -3005,15 +3005,15 @@ define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %1 = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone - %3 = bitcast x86_mmx %2 to <4 x i16> + %1 = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %1, i8 3) nounwind readnone + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <2 x i32> %5 = extractelement <2 x i32> %4, i32 0 ret i32 %5 } -declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test20: @@ -3049,14 +3049,14 @@ define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to i64 + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone +declare <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>) nounwind readnone define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test19: @@ -3081,12 +3081,12 @@ define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> - %1 = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone + %1 = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %1) nounwind readnone ret <2 x double> %2 } -declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp { ; X86-LABEL: test18: @@ -3109,14 +3109,14 @@ define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp { ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: - %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone - %1 = bitcast x86_mmx %0 to <2 x i32> + %0 = tail call <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone + %1 = bitcast <1 x i64> %0 to <2 x i32> %2 = bitcast <2 x i32> %1 to <1 x i64> %3 = extractelement <1 x i64> %2, i32 0 ret i64 %3 } -declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp { ; X86-LABEL: test17: @@ -3139,14 +3139,14 @@ define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp { ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: - %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone - %1 = bitcast x86_mmx %0 to <2 x i32> + %0 = tail call <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone + %1 = bitcast <1 x i64> %0 to <2 x i32> %2 = bitcast <2 x i32> %1 to <1 x i64> %3 = extractelement <1 x i64> %2, i32 0 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64>, <1 x i64>, i8) nounwind readnone define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test16: @@ -3173,15 +3173,15 @@ define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 - %mmx_var = bitcast i64 %0 to x86_mmx + %mmx_var = bitcast i64 %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1 = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16) - %3 = bitcast x86_mmx %2 to i64 + %mmx_var1 = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %mmx_var, <1 x i64> %mmx_var1, i8 16) + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64>) nounwind readnone define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test15: @@ -3210,15 +3210,15 @@ define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> - %1 = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone - %3 = bitcast x86_mmx %2 to <2 x i32> + %1 = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64> %1) nounwind readnone + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64>) nounwind readnone define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test14: @@ -3247,15 +3247,15 @@ define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %1 = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone - %3 = bitcast x86_mmx %2 to <4 x i16> + %1 = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64> %1) nounwind readnone + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64>) nounwind readnone define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test13: @@ -3284,15 +3284,15 @@ define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp { ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <8 x i8> - %1 = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone - %3 = bitcast x86_mmx %2 to <8 x i8> + %1 = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64> %1) nounwind readnone + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test12: @@ -3328,16 +3328,16 @@ define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %2 = bitcast <2 x i32> %1 to x86_mmx - %3 = bitcast <2 x i32> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <2 x i32> + %2 = bitcast <2 x i32> %1 to <1 x i64> + %3 = bitcast <2 x i32> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <2 x i32> %6 = bitcast <2 x i32> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test11: @@ -3373,16 +3373,16 @@ define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %2 = bitcast <4 x i16> %1 to x86_mmx - %3 = bitcast <4 x i16> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <4 x i16> + %2 = bitcast <4 x i16> %1 to <1 x i64> + %3 = bitcast <4 x i16> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <4 x i16> %6 = bitcast <4 x i16> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test10: @@ -3418,16 +3418,16 @@ define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %2 = bitcast <8 x i8> %1 to x86_mmx - %3 = bitcast <8 x i8> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <8 x i8> + %2 = bitcast <8 x i8> %1 to <1 x i64> + %3 = bitcast <8 x i8> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <8 x i8> %6 = bitcast <8 x i8> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test9: @@ -3463,16 +3463,16 @@ define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %2 = bitcast <8 x i8> %1 to x86_mmx - %3 = bitcast <8 x i8> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <8 x i8> + %2 = bitcast <8 x i8> %1 to <1 x i64> + %3 = bitcast <8 x i8> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <8 x i8> %6 = bitcast <8 x i8> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test8: @@ -3508,16 +3508,16 @@ define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %2 = bitcast <4 x i16> %1 to x86_mmx - %3 = bitcast <4 x i16> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <4 x i16> + %2 = bitcast <4 x i16> %1 to <1 x i64> + %3 = bitcast <4 x i16> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <4 x i16> %6 = bitcast <4 x i16> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test7: @@ -3553,16 +3553,16 @@ define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %2 = bitcast <8 x i8> %1 to x86_mmx - %3 = bitcast <8 x i8> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <8 x i8> + %2 = bitcast <8 x i8> %1 to <1 x i64> + %3 = bitcast <8 x i8> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <8 x i8> %6 = bitcast <8 x i8> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test6: @@ -3598,16 +3598,16 @@ define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %2 = bitcast <4 x i16> %1 to x86_mmx - %3 = bitcast <4 x i16> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <4 x i16> + %2 = bitcast <4 x i16> %1 to <1 x i64> + %3 = bitcast <4 x i16> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <4 x i16> %6 = bitcast <4 x i16> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test5: @@ -3643,16 +3643,16 @@ define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %2 = bitcast <2 x i32> %1 to x86_mmx - %3 = bitcast <2 x i32> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <2 x i32> + %2 = bitcast <2 x i32> %1 to <1 x i64> + %3 = bitcast <2 x i32> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <2 x i32> %6 = bitcast <2 x i32> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test4: @@ -3688,16 +3688,16 @@ define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %2 = bitcast <4 x i16> %1 to x86_mmx - %3 = bitcast <4 x i16> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <4 x i16> + %2 = bitcast <4 x i16> %1 to <1 x i64> + %3 = bitcast <4 x i16> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <4 x i16> %6 = bitcast <4 x i16> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test3: @@ -3733,16 +3733,16 @@ define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %2 = bitcast <4 x i16> %1 to x86_mmx - %3 = bitcast <4 x i16> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <4 x i16> + %2 = bitcast <4 x i16> %1 to <1 x i64> + %3 = bitcast <4 x i16> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <4 x i16> %6 = bitcast <4 x i16> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test2: @@ -3778,16 +3778,16 @@ define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %2 = bitcast <2 x i32> %1 to x86_mmx - %3 = bitcast <2 x i32> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <2 x i32> + %2 = bitcast <2 x i32> %1 to <1 x i64> + %3 = bitcast <2 x i32> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <2 x i32> %6 = bitcast <2 x i32> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind { +define <4 x float> @test89(<4 x float> %a, <1 x i64> %b) nounwind { ; X86-LABEL: test89: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp @@ -3808,11 +3808,11 @@ define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind { ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: cvtpi2ps %mm0, %xmm0 ; X64-NEXT: retq - %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b) + %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, <1 x i64> %b) ret <4 x float> %c } -declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone +declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>) nounwind readnone define void @test90() { ; ALL-LABEL: test90: @@ -3852,13 +3852,11 @@ define <1 x i64> @test_mm_insert_pi16(<1 x i64> %a.coerce, i32 %d) nounwind { ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: - %0 = bitcast <1 x i64> %a.coerce to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %0, i32 %d, i32 2) - %2 = bitcast x86_mmx %1 to <1 x i64> - ret <1 x i64> %2 + %1 = tail call <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> %a.coerce, i32 %d, i32 2) + ret <1 x i64> %1 } -declare x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx, i32, i32 immarg) +declare <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64>, i32, i32 immarg) define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind { ; X86-LABEL: test_mm_extract_pi16: @@ -3883,9 +3881,8 @@ define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind { ; X64-NEXT: pextrw $2, %mm0, %eax ; X64-NEXT: retq entry: - %0 = bitcast <1 x i64> %a.coerce to x86_mmx - %1 = tail call i32 @llvm.x86.mmx.pextr.w(x86_mmx %0, i32 2) + %1 = tail call i32 @llvm.x86.mmx.pextr.w(<1 x i64> %a.coerce, i32 2) ret i32 %1 } -declare i32 @llvm.x86.mmx.pextr.w(x86_mmx, i32 immarg) +declare i32 @llvm.x86.mmx.pextr.w(<1 x i64>, i32 immarg) diff --git a/llvm/test/CodeGen/X86/mmx-only.ll b/llvm/test/CodeGen/X86/mmx-only.ll index eab67e08b95743..8a87350a794294 100644 --- a/llvm/test/CodeGen/X86/mmx-only.ll +++ b/llvm/test/CodeGen/X86/mmx-only.ll @@ -3,7 +3,7 @@ ; Test that turning off sse doesn't turn off mmx. -declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone { ; CHECK-LABEL: @test88 @@ -11,10 +11,10 @@ define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 diff --git a/llvm/test/CodeGen/X86/mxcsr-reg-usage.ll b/llvm/test/CodeGen/X86/mxcsr-reg-usage.ll index fd8bd1facaf6b2..6bb564c4b757e6 100644 --- a/llvm/test/CodeGen/X86/mxcsr-reg-usage.ll +++ b/llvm/test/CodeGen/X86/mxcsr-reg-usage.ll @@ -1,18 +1,18 @@ ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+mmx,+fma,+f16c,+avx512f -stop-after finalize-isel -o - %s | FileCheck %s ; This test ensures that the MXCSR is implicitly used by MMX FP instructions. -define x86_mmx @mxcsr_mmx(<4 x float> %a0) { +define <1 x i64> @mxcsr_mmx(<4 x float> %a0) { ; CHECK: MMX_CVTPS2PIrr %{{[0-9]}}, implicit $mxcsr ; CHECK: MMX_CVTPI2PSrr %{{[0-9]}}, killed %{{[0-9]}}, implicit $mxcsr ; CHECK: MMX_CVTTPS2PIrr killed %{{[0-9]}}, implicit $mxcsr ; CHECK: MMX_CVTPI2PDrr killed %{{[0-9]$}} ; CHECK: MMX_CVTPD2PIrr killed %{{[0-9]}}, implicit $mxcsr - %1 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0) - %2 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a0, x86_mmx %1) - %3 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %2) - %4 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %3) - %5 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %4) - ret x86_mmx %5 + %1 = call <1 x i64> @llvm.x86.sse.cvtps2pi(<4 x float> %a0) + %2 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a0, <1 x i64> %1) + %3 = call <1 x i64> @llvm.x86.sse.cvttps2pi(<4 x float> %2) + %4 = call <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %3) + %5 = call <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %4) + ret <1 x i64> %5 } define half @mxcsr_f16c(float %a) { @@ -41,11 +41,11 @@ define <8 x double> @mxcsr_fma_sae(<8 x double> %a, <8 x double> %b, <8 x double ret <8 x double> %res } -declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) -declare<4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) -declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) -declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) -declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) +declare <1 x i64> @llvm.x86.sse.cvtps2pi(<4 x float>) +declare<4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>) +declare <1 x i64> @llvm.x86.sse.cvttps2pi(<4 x float>) +declare <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>) +declare <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double>) declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) diff --git a/llvm/test/CodeGen/X86/nontemporal.ll b/llvm/test/CodeGen/X86/nontemporal.ll index 1f273eb43c6a60..3b6ffacb0b230e 100644 --- a/llvm/test/CodeGen/X86/nontemporal.ll +++ b/llvm/test/CodeGen/X86/nontemporal.ll @@ -193,11 +193,11 @@ define void @test_mmx(ptr nocapture %a0, ptr nocapture %a1) { ; X64-NEXT: movntq %mm0, (%rsi) ; X64-NEXT: retq entry: - %0 = load x86_mmx, ptr %a0 - %1 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %0, i32 3) - store x86_mmx %1, ptr %a1, align 8, !nontemporal !0 + %0 = load <1 x i64>, ptr %a0 + %1 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %0, i32 3) + store <1 x i64> %1, ptr %a1, align 8, !nontemporal !0 ret void } -declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone !0 = !{i32 1} diff --git a/llvm/test/CodeGen/X86/pr13859.ll b/llvm/test/CodeGen/X86/pr13859.ll index 9b290e6947d58d..35466478f289b1 100644 --- a/llvm/test/CodeGen/X86/pr13859.ll +++ b/llvm/test/CodeGen/X86/pr13859.ll @@ -13,8 +13,7 @@ entry: %a37 = insertelement <4 x i16> %a36, i16 %aconv, i32 1 %a38 = insertelement <4 x i16> %a37, i16 %aconv, i32 2 %a39 = insertelement <4 x i16> %a38, i16 %aconv, i32 3 - %a40 = bitcast <4 x i16> %a39 to x86_mmx - %a41 = bitcast x86_mmx %a40 to <1 x i64> + %a40 = bitcast <4 x i16> %a39 to <1 x i64> %a47 = trunc i32 %a32 to i1 br i1 %a47, label %a48, label %a49 @@ -23,6 +22,6 @@ a48: unreachable a49: - store <1 x i64> %a41, ptr %dest, align 8 ; !!! + store <1 x i64> %a40, ptr %dest, align 8 ; !!! ret void } diff --git a/llvm/test/CodeGen/X86/pr23246.ll b/llvm/test/CodeGen/X86/pr23246.ll index cd0ece12a19167..da3246a917ea3d 100644 --- a/llvm/test/CodeGen/X86/pr23246.ll +++ b/llvm/test/CodeGen/X86/pr23246.ll @@ -6,15 +6,14 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; PR23246 ; We're really only interested in doing something sane with the shuffle. -define <2 x i64> @test(x86_mmx %a) #0 { +define <2 x i64> @test(<1 x i64> %a) #0 { ; CHECK-LABEL: test: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rdi, %xmm0 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; CHECK-NEXT: retq entry: - %b = bitcast x86_mmx %a to <1 x i64> - %s = shufflevector <1 x i64> %b, <1 x i64> undef, <2 x i32> + %s = shufflevector <1 x i64> %a, <1 x i64> undef, <2 x i32> ret <2 x i64> %s } diff --git a/llvm/test/CodeGen/X86/pr29222.ll b/llvm/test/CodeGen/X86/pr29222.ll index 1ddcb1fb56524c..6b8ac918386faf 100644 --- a/llvm/test/CodeGen/X86/pr29222.ll +++ b/llvm/test/CodeGen/X86/pr29222.ll @@ -62,9 +62,9 @@ define i32 @PR29222(i32) nounwind { ; X64-AVX-NEXT: retq %2 = insertelement <2 x i32> undef, i32 %0, i32 0 %3 = shufflevector <2 x i32> %2, <2 x i32> undef, <2 x i32> zeroinitializer - %4 = bitcast <2 x i32> %3 to x86_mmx - %5 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %4, x86_mmx %4) - %6 = bitcast x86_mmx %5 to i64 + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = tail call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> %4, <1 x i64> %4) + %6 = bitcast <1 x i64> %5 to i64 %7 = insertelement <2 x i64> undef, i64 %6, i32 0 %8 = bitcast <2 x i64> %7 to <8 x i16> %9 = tail call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %8, <8 x i16> undef) @@ -73,5 +73,5 @@ define i32 @PR29222(i32) nounwind { ret i32 %11 } -declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64>, <1 x i64>) declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) diff --git a/llvm/test/CodeGen/X86/pr35982.ll b/llvm/test/CodeGen/X86/pr35982.ll index b6022698edaeb9..0ad35309b87bb4 100644 --- a/llvm/test/CodeGen/X86/pr35982.ll +++ b/llvm/test/CodeGen/X86/pr35982.ll @@ -35,9 +35,9 @@ define float @PR35982_emms(<1 x i64>) nounwind { %2 = bitcast <1 x i64> %0 to <2 x i32> %3 = extractelement <2 x i32> %2, i32 0 %4 = extractelement <1 x i64> %0, i32 0 - %5 = bitcast i64 %4 to x86_mmx - %6 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %5, x86_mmx %5) - %7 = bitcast x86_mmx %6 to <2 x i32> + %5 = bitcast i64 %4 to <1 x i64> + %6 = tail call <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64> %5, <1 x i64> %5) + %7 = bitcast <1 x i64> %6 to <2 x i32> %8 = extractelement <2 x i32> %7, i32 0 tail call void @llvm.x86.mmx.emms() %9 = sitofp i32 %3 to float @@ -46,5 +46,5 @@ define float @PR35982_emms(<1 x i64>) nounwind { ret float %11 } -declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64>, <1 x i64>) declare void @llvm.x86.mmx.emms() diff --git a/llvm/test/CodeGen/X86/select-mmx.ll b/llvm/test/CodeGen/X86/select-mmx.ll index 8339cb71d46718..8a4308a5af64b2 100644 --- a/llvm/test/CodeGen/X86/select-mmx.ll +++ b/llvm/test/CodeGen/X86/select-mmx.ll @@ -51,9 +51,9 @@ define i64 @test47(i64 %arg) { ; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl %cond = icmp eq i64 %arg, 0 - %slct = select i1 %cond, x86_mmx bitcast (i64 7 to x86_mmx), x86_mmx bitcast (i64 0 to x86_mmx) - %psll = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %slct, x86_mmx %slct) - %retc = bitcast x86_mmx %psll to i64 + %slct = select i1 %cond, <1 x i64> bitcast (i64 7 to <1 x i64>), <1 x i64> bitcast (i64 0 to <1 x i64>) + %psll = tail call <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64> %slct, <1 x i64> %slct) + %retc = bitcast <1 x i64> %psll to i64 ret i64 %retc } @@ -104,13 +104,13 @@ define i64 @test49(i64 %arg, i64 %x, i64 %y) { ; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl %cond = icmp eq i64 %arg, 0 - %xmmx = bitcast i64 %x to x86_mmx - %ymmx = bitcast i64 %y to x86_mmx - %slct = select i1 %cond, x86_mmx %xmmx, x86_mmx %ymmx - %psll = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %slct, x86_mmx %slct) - %retc = bitcast x86_mmx %psll to i64 + %xmmx = bitcast i64 %x to <1 x i64> + %ymmx = bitcast i64 %y to <1 x i64> + %slct = select i1 %cond, <1 x i64> %xmmx, <1 x i64> %ymmx + %psll = tail call <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64> %slct, <1 x i64> %slct) + %retc = bitcast <1 x i64> %psll to i64 ret i64 %retc } -declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64>, <1 x i64>) diff --git a/llvm/test/CodeGen/X86/stack-folding-mmx.ll b/llvm/test/CodeGen/X86/stack-folding-mmx.ll index 6652a8ca0dbd54..6eb99dd6c67582 100644 --- a/llvm/test/CodeGen/X86/stack-folding-mmx.ll +++ b/llvm/test/CodeGen/X86/stack-folding-mmx.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s -define x86_mmx @stack_fold_cvtpd2pi(<2 x double> %a0) { +define <1 x i64> @stack_fold_cvtpd2pi(<2 x double> %a0) { ; CHECK-LABEL: stack_fold_cvtpd2pi: ; CHECK: # %bb.0: ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -12,12 +12,12 @@ define x86_mmx @stack_fold_cvtpd2pi(<2 x double> %a0) { ; CHECK-NEXT: movq %mm0, %rax ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a0) nounwind readnone - ret x86_mmx %2 + %2 = call <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %a0) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone -define <2 x double> @stack_fold_cvtpi2pd(x86_mmx %a0) { +define <2 x double> @stack_fold_cvtpi2pd(<1 x i64> %a0) { ; CHECK-LABEL: stack_fold_cvtpi2pd: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %mm0 @@ -27,13 +27,13 @@ define <2 x double> @stack_fold_cvtpi2pd(x86_mmx %a0) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: cvtpi2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %a0) nounwind readnone + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %a0) nounwind readnone ret <2 x double> %2 } -declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone +declare <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>) nounwind readnone -define <4 x float> @stack_fold_cvtpi2ps(<4 x float> %a0, x86_mmx %a1) { +define <4 x float> @stack_fold_cvtpi2ps(<4 x float> %a0, <1 x i64> %a1) { ; CHECK-LABEL: stack_fold_cvtpi2ps: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %mm0 @@ -43,13 +43,13 @@ define <4 x float> @stack_fold_cvtpi2ps(<4 x float> %a0, x86_mmx %a1) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: cvtpi2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a0, x86_mmx %a1) nounwind readnone + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a0, <1 x i64> %a1) nounwind readnone ret <4 x float> %2 } -declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone +declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_cvtps2pi(<4 x float> %a0) { +define <1 x i64> @stack_fold_cvtps2pi(<4 x float> %a0) { ; CHECK-LABEL: stack_fold_cvtps2pi: ; CHECK: # %bb.0: ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -60,12 +60,12 @@ define x86_mmx @stack_fold_cvtps2pi(<4 x float> %a0) { ; CHECK-NEXT: movq %mm0, %rax ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0) nounwind readnone - ret x86_mmx %2 + %2 = call <1 x i64> @llvm.x86.sse.cvtps2pi(<4 x float> %a0) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone +declare <1 x i64> @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone -define x86_mmx @stack_fold_cvttpd2pi(<2 x double> %a0) { +define <1 x i64> @stack_fold_cvttpd2pi(<2 x double> %a0) { ; CHECK-LABEL: stack_fold_cvttpd2pi: ; CHECK: # %bb.0: ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -76,12 +76,12 @@ define x86_mmx @stack_fold_cvttpd2pi(<2 x double> %a0) { ; CHECK-NEXT: movq %mm0, %rax ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a0) nounwind readnone - ret x86_mmx %2 + %2 = call <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> %a0) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone -define x86_mmx @stack_fold_cvttps2pi(<4 x float> %a0) { +define <1 x i64> @stack_fold_cvttps2pi(<4 x float> %a0) { ; CHECK-LABEL: stack_fold_cvttps2pi: ; CHECK: # %bb.0: ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -92,15 +92,15 @@ define x86_mmx @stack_fold_cvttps2pi(<4 x float> %a0) { ; CHECK-NEXT: movq %mm0, %rax ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %a0) nounwind readnone - ret x86_mmx %2 + %2 = call <1 x i64> @llvm.x86.sse.cvttps2pi(<4 x float> %a0) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone +declare <1 x i64> @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone ; TODO stack_fold_movd_load ; padd forces execution on mmx -define i32 @stack_fold_movd_store(x86_mmx %a0) nounwind { +define i32 @stack_fold_movd_store(<1 x i64> %a0) nounwind { ; CHECK-LABEL: stack_fold_movd_store: ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rbp @@ -123,8 +123,8 @@ define i32 @stack_fold_movd_store(x86_mmx %a0) nounwind { ; CHECK-NEXT: popq %r15 ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq - %1 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a0, x86_mmx %a0) - %2 = bitcast x86_mmx %1 to <2 x i32> + %1 = call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> %a0, <1 x i64> %a0) + %2 = bitcast <1 x i64> %1 to <2 x i32> %3 = extractelement <2 x i32> %2, i32 0 %4 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() ret i32 %3 @@ -133,7 +133,7 @@ define i32 @stack_fold_movd_store(x86_mmx %a0) nounwind { ; TODO stack_fold_movq_load ; padd forces execution on mmx -define i64 @stack_fold_movq_store(x86_mmx %a0) nounwind { +define i64 @stack_fold_movq_store(<1 x i64> %a0) nounwind { ; CHECK-LABEL: stack_fold_movq_store: ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rbp @@ -156,13 +156,13 @@ define i64 @stack_fold_movq_store(x86_mmx %a0) nounwind { ; CHECK-NEXT: popq %r15 ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq - %1 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a0, x86_mmx %a0) - %2 = bitcast x86_mmx %1 to i64 + %1 = call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> %a0, <1 x i64> %a0) + %2 = bitcast <1 x i64> %1 to i64 %3 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() ret i64 %2 } -define x86_mmx @stack_fold_pabsb(x86_mmx %a0) { +define <1 x i64> @stack_fold_pabsb(<1 x i64> %a0) { ; CHECK-LABEL: stack_fold_pabsb: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %mm0 @@ -173,13 +173,13 @@ define x86_mmx @stack_fold_pabsb(x86_mmx %a0) { ; CHECK-NEXT: pabsb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload ; CHECK-NEXT: movq %mm0, %rax ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %a0) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64> %a0) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pabsd(x86_mmx %a0) { +define <1 x i64> @stack_fold_pabsd(<1 x i64> %a0) { ; CHECK-LABEL: stack_fold_pabsd: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %mm0 @@ -190,13 +190,13 @@ define x86_mmx @stack_fold_pabsd(x86_mmx %a0) { ; CHECK-NEXT: pabsd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload ; CHECK-NEXT: movq %mm0, %rax ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %a0) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64> %a0) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pabsw(x86_mmx %a0) { +define <1 x i64> @stack_fold_pabsw(<1 x i64> %a0) { ; CHECK-LABEL: stack_fold_pabsw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %mm0 @@ -207,13 +207,13 @@ define x86_mmx @stack_fold_pabsw(x86_mmx %a0) { ; CHECK-NEXT: pabsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload ; CHECK-NEXT: movq %mm0, %rax ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %a0) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64> %a0) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64>) nounwind readnone -define x86_mmx @stack_fold_packssdw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_packssdw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_packssdw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -224,13 +224,13 @@ define x86_mmx @stack_fold_packssdw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_packsswb(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_packsswb(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_packsswb: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -241,13 +241,13 @@ define x86_mmx @stack_fold_packsswb(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_packuswb(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_packuswb(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_packuswb: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -258,13 +258,13 @@ define x86_mmx @stack_fold_packuswb(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_paddb(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_paddb(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_paddb: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -275,13 +275,13 @@ define x86_mmx @stack_fold_paddb(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_paddd(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_paddd(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_paddd: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -292,13 +292,13 @@ define x86_mmx @stack_fold_paddd(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_paddq(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_paddq(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_paddq: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -309,13 +309,13 @@ define x86_mmx @stack_fold_paddq(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_paddsb(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_paddsb(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_paddsb: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -326,13 +326,13 @@ define x86_mmx @stack_fold_paddsb(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_paddsw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_paddsw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_paddsw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -343,13 +343,13 @@ define x86_mmx @stack_fold_paddsw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_paddusb(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_paddusb(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_paddusb: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -360,13 +360,13 @@ define x86_mmx @stack_fold_paddusb(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_paddusw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_paddusw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_paddusw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -377,13 +377,13 @@ define x86_mmx @stack_fold_paddusw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_paddw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_paddw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_paddw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -394,13 +394,13 @@ define x86_mmx @stack_fold_paddw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_palignr(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_palignr(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_palignr: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -411,13 +411,13 @@ define x86_mmx @stack_fold_palignr(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %a, x86_mmx %b, i8 1) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %a, <1 x i64> %b, i8 1) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64>, <1 x i64>, i8) nounwind readnone -define x86_mmx @stack_fold_pand(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pand(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pand: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -428,13 +428,13 @@ define x86_mmx @stack_fold_pand(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pand(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pand(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pandn(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pandn(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pandn: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -445,13 +445,13 @@ define x86_mmx @stack_fold_pandn(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pandn(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pandn(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pavgb(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pavgb(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pavgb: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -462,13 +462,13 @@ define x86_mmx @stack_fold_pavgb(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pavgw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pavgw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pavgw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -479,13 +479,13 @@ define x86_mmx @stack_fold_pavgw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pcmpeqb(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pcmpeqb(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pcmpeqb: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -496,13 +496,13 @@ define x86_mmx @stack_fold_pcmpeqb(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pcmpeqd(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pcmpeqd(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pcmpeqd: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -513,13 +513,13 @@ define x86_mmx @stack_fold_pcmpeqd(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pcmpeqw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pcmpeqw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pcmpeqw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -530,13 +530,13 @@ define x86_mmx @stack_fold_pcmpeqw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pcmpgtb(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pcmpgtb(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pcmpgtb: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -547,13 +547,13 @@ define x86_mmx @stack_fold_pcmpgtb(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pcmpgtd(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pcmpgtd(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pcmpgtd: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -564,13 +564,13 @@ define x86_mmx @stack_fold_pcmpgtd(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pcmpgtw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pcmpgtw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pcmpgtw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -581,13 +581,13 @@ define x86_mmx @stack_fold_pcmpgtw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_phaddd(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_phaddd(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_phaddd: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -598,13 +598,13 @@ define x86_mmx @stack_fold_phaddd(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_phaddsw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_phaddsw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_phaddsw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -615,13 +615,13 @@ define x86_mmx @stack_fold_phaddsw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_phaddw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_phaddw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_phaddw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -632,13 +632,13 @@ define x86_mmx @stack_fold_phaddw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_phsubd(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_phsubd(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_phsubd: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -649,13 +649,13 @@ define x86_mmx @stack_fold_phsubd(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_phsubsw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_phsubsw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_phsubsw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -666,13 +666,13 @@ define x86_mmx @stack_fold_phsubsw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_phsubw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_phsubw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_phsubw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -683,15 +683,15 @@ define x86_mmx @stack_fold_phsubw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64>, <1 x i64>) nounwind readnone ; TODO stack_fold_pinsrw -define x86_mmx @stack_fold_pmaddubsw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pmaddubsw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pmaddubsw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -702,13 +702,13 @@ define x86_mmx @stack_fold_pmaddubsw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pmaddwd(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pmaddwd(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pmaddwd: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -719,13 +719,13 @@ define x86_mmx @stack_fold_pmaddwd(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pmaxsw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pmaxsw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pmaxsw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -736,13 +736,13 @@ define x86_mmx @stack_fold_pmaxsw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pmaxub(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pmaxub(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pmaxub: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -753,13 +753,13 @@ define x86_mmx @stack_fold_pmaxub(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pminsw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pminsw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pminsw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -770,13 +770,13 @@ define x86_mmx @stack_fold_pminsw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pminub(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pminub(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pminub: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -787,13 +787,13 @@ define x86_mmx @stack_fold_pminub(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pmulhrsw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pmulhrsw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pmulhrsw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -804,13 +804,13 @@ define x86_mmx @stack_fold_pmulhrsw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pmulhuw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pmulhuw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pmulhuw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -821,13 +821,13 @@ define x86_mmx @stack_fold_pmulhuw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pmulhw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pmulhw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pmulhw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -838,13 +838,13 @@ define x86_mmx @stack_fold_pmulhw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pmullw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pmullw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pmullw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -855,13 +855,13 @@ define x86_mmx @stack_fold_pmullw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pmuludq(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pmuludq(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pmuludq: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -872,13 +872,13 @@ define x86_mmx @stack_fold_pmuludq(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_por(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_por(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_por: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -889,13 +889,13 @@ define x86_mmx @stack_fold_por(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.por(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.por(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_psadbw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_psadbw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_psadbw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -906,13 +906,13 @@ define x86_mmx @stack_fold_psadbw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pshufb(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pshufb(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pshufb: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -923,13 +923,13 @@ define x86_mmx @stack_fold_pshufb(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pshufw(x86_mmx %a) { +define <1 x i64> @stack_fold_pshufw(<1 x i64> %a) { ; CHECK-LABEL: stack_fold_pshufw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %mm0 @@ -941,13 +941,13 @@ define x86_mmx @stack_fold_pshufw(x86_mmx %a) { ; CHECK-NEXT: # mm0 = mem[1,0,0,0] ; CHECK-NEXT: movq %mm0, %rax ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %a, i8 1) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %a, i8 1) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone +declare <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64>, i8) nounwind readnone -define x86_mmx @stack_fold_psignb(x86_mmx %a0, x86_mmx %a1) { +define <1 x i64> @stack_fold_psignb(<1 x i64> %a0, <1 x i64> %a1) { ; CHECK-LABEL: stack_fold_psignb: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -958,13 +958,13 @@ define x86_mmx @stack_fold_psignb(x86_mmx %a0, x86_mmx %a1) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %a0, x86_mmx %a1) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64> %a0, <1 x i64> %a1) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_psignd(x86_mmx %a0, x86_mmx %a1) { +define <1 x i64> @stack_fold_psignd(<1 x i64> %a0, <1 x i64> %a1) { ; CHECK-LABEL: stack_fold_psignd: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -975,13 +975,13 @@ define x86_mmx @stack_fold_psignd(x86_mmx %a0, x86_mmx %a1) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %a0, x86_mmx %a1) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64> %a0, <1 x i64> %a1) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_psignw(x86_mmx %a0, x86_mmx %a1) { +define <1 x i64> @stack_fold_psignw(<1 x i64> %a0, <1 x i64> %a1) { ; CHECK-LABEL: stack_fold_psignw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -992,13 +992,13 @@ define x86_mmx @stack_fold_psignw(x86_mmx %a0, x86_mmx %a1) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %a0, x86_mmx %a1) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64> %a0, <1 x i64> %a1) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pslld(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pslld(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pslld: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1009,13 +1009,13 @@ define x86_mmx @stack_fold_pslld(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_psllq(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_psllq(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_psllq: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1026,13 +1026,13 @@ define x86_mmx @stack_fold_psllq(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_psllw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_psllw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_psllw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1043,13 +1043,13 @@ define x86_mmx @stack_fold_psllw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_psrad(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_psrad(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_psrad: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1060,13 +1060,13 @@ define x86_mmx @stack_fold_psrad(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_psraw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_psraw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_psraw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1077,13 +1077,13 @@ define x86_mmx @stack_fold_psraw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_psrld(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_psrld(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_psrld: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1094,13 +1094,13 @@ define x86_mmx @stack_fold_psrld(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_psrlq(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_psrlq(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_psrlq: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1111,13 +1111,13 @@ define x86_mmx @stack_fold_psrlq(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_psrlw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_psrlw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_psrlw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1128,13 +1128,13 @@ define x86_mmx @stack_fold_psrlw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_psubb(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_psubb(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_psubb: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1145,13 +1145,13 @@ define x86_mmx @stack_fold_psubb(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_psubd(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_psubd(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_psubd: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1162,13 +1162,13 @@ define x86_mmx @stack_fold_psubd(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_psubq(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_psubq(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_psubq: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1179,13 +1179,13 @@ define x86_mmx @stack_fold_psubq(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_psubsb(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_psubsb(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_psubsb: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1196,13 +1196,13 @@ define x86_mmx @stack_fold_psubsb(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_psubsw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_psubsw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_psubsw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1213,13 +1213,13 @@ define x86_mmx @stack_fold_psubsw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_psubusb(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_psubusb(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_psubusb: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1230,13 +1230,13 @@ define x86_mmx @stack_fold_psubusb(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_psubusw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_psubusw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_psubusw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1247,13 +1247,13 @@ define x86_mmx @stack_fold_psubusw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_psubw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_psubw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_psubw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1264,13 +1264,13 @@ define x86_mmx @stack_fold_psubw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_punpckhbw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_punpckhbw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_punpckhbw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1281,13 +1281,13 @@ define x86_mmx @stack_fold_punpckhbw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_punpckhdq(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_punpckhdq(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_punpckhdq: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1298,13 +1298,13 @@ define x86_mmx @stack_fold_punpckhdq(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_punpckhwd(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_punpckhwd(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_punpckhwd: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1315,13 +1315,13 @@ define x86_mmx @stack_fold_punpckhwd(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_punpcklbw(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_punpcklbw(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_punpcklbw: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1332,13 +1332,13 @@ define x86_mmx @stack_fold_punpcklbw(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_punpckldq(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_punpckldq(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_punpckldq: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1349,13 +1349,13 @@ define x86_mmx @stack_fold_punpckldq(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_punpcklwd(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_punpcklwd(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_punpcklwd: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1366,13 +1366,13 @@ define x86_mmx @stack_fold_punpcklwd(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64>, <1 x i64>) nounwind readnone -define x86_mmx @stack_fold_pxor(x86_mmx %a, x86_mmx %b) { +define <1 x i64> @stack_fold_pxor(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: stack_fold_pxor: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %mm0 @@ -1383,8 +1383,8 @@ define x86_mmx @stack_fold_pxor(x86_mmx %a, x86_mmx %b) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 + %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() + %2 = call <1 x i64> @llvm.x86.mmx.pxor(<1 x i64> %a, <1 x i64> %b) nounwind readnone + ret <1 x i64> %2 } -declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pxor(<1 x i64>, <1 x i64>) nounwind readnone diff --git a/llvm/test/CodeGen/X86/vec_extract-mmx.ll b/llvm/test/CodeGen/X86/vec_extract-mmx.ll index 6fd90243a93033..cd375c04168818 100644 --- a/llvm/test/CodeGen/X86/vec_extract-mmx.ll +++ b/llvm/test/CodeGen/X86/vec_extract-mmx.ll @@ -20,9 +20,9 @@ define i32 @test0(ptr %v4) nounwind { entry: %v5 = load <1 x i64>, ptr %v4, align 8 %v12 = bitcast <1 x i64> %v5 to <4 x i16> - %v13 = bitcast <4 x i16> %v12 to x86_mmx - %v14 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v13, i8 -18) - %v15 = bitcast x86_mmx %v14 to <4 x i16> + %v13 = bitcast <4 x i16> %v12 to <1 x i64> + %v14 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %v13, i8 -18) + %v15 = bitcast <1 x i64> %v14 to <4 x i16> %v16 = bitcast <4 x i16> %v15 to <1 x i64> %v17 = extractelement <1 x i64> %v16, i32 0 %v18 = bitcast i64 %v17 to <2 x i32> @@ -52,12 +52,12 @@ entry: %0 = load i32, ptr %ptr, align 4 %1 = insertelement <2 x i32> undef, i32 %0, i32 0 %2 = insertelement <2 x i32> %1, i32 0, i32 1 - %3 = bitcast <2 x i32> %2 to x86_mmx - %4 = bitcast x86_mmx %3 to i64 + %3 = bitcast <2 x i32> %2 to <1 x i64> + %4 = bitcast <1 x i64> %3 to i64 %5 = bitcast i64 %4 to <4 x i16> - %6 = bitcast <4 x i16> %5 to x86_mmx - %7 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %6, i8 -24) - %8 = bitcast x86_mmx %7 to <4 x i16> + %6 = bitcast <4 x i16> %5 to <1 x i64> + %7 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %6, i8 -24) + %8 = bitcast <1 x i64> %7 to <4 x i16> %9 = bitcast <4 x i16> %8 to <1 x i64> %10 = extractelement <1 x i64> %9, i32 0 %11 = bitcast i64 %10 to <2 x i32> @@ -82,9 +82,9 @@ define i32 @test2(ptr nocapture readonly %ptr) nounwind { ; X64-NEXT: emms ; X64-NEXT: retq entry: - %0 = load x86_mmx, ptr %ptr, align 8 - %1 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %0, i8 -24) - %2 = bitcast x86_mmx %1 to <4 x i16> + %0 = load <1 x i64>, ptr %ptr, align 8 + %1 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %0, i8 -24) + %2 = bitcast <1 x i64> %1 to <4 x i16> %3 = bitcast <4 x i16> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 %5 = bitcast i64 %4 to <2 x i32> @@ -93,7 +93,7 @@ entry: ret i32 %6 } -define i32 @test3(x86_mmx %a) nounwind { +define i32 @test3(<1 x i64> %a) nounwind { ; X86-LABEL: test3: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -104,13 +104,13 @@ define i32 @test3(x86_mmx %a) nounwind { ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq - %tmp0 = bitcast x86_mmx %a to <2 x i32> + %tmp0 = bitcast <1 x i64> %a to <2 x i32> %tmp1 = extractelement <2 x i32> %tmp0, i32 0 ret i32 %tmp1 } ; Verify we don't muck with extractelts from the upper lane. -define i32 @test4(x86_mmx %a) nounwind { +define i32 @test4(<1 x i64> %a) nounwind { ; X86-LABEL: test4: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -122,10 +122,10 @@ define i32 @test4(x86_mmx %a) nounwind { ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] ; X64-NEXT: movd %xmm0, %eax ; X64-NEXT: retq - %tmp0 = bitcast x86_mmx %a to <2 x i32> + %tmp0 = bitcast <1 x i64> %a to <2 x i32> %tmp1 = extractelement <2 x i32> %tmp0, i32 1 ret i32 %tmp1 } -declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) +declare <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64>, i8) declare void @llvm.x86.mmx.emms() diff --git a/llvm/test/CodeGen/X86/vec_insert-5.ll b/llvm/test/CodeGen/X86/vec_insert-5.ll index 34280aa647aab7..176ae81e08a764 100644 --- a/llvm/test/CodeGen/X86/vec_insert-5.ll +++ b/llvm/test/CodeGen/X86/vec_insert-5.ll @@ -26,8 +26,8 @@ define void @t1(i32 %a, ptr %P) nounwind { %tmp12 = shl i32 %a, 12 %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1 %tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0 - %tmp23 = bitcast <2 x i32> %tmp22 to x86_mmx - store x86_mmx %tmp23, ptr %P + %tmp23 = bitcast <2 x i32> %tmp22 to <1 x i64> + store <1 x i64> %tmp23, ptr %P ret void } diff --git a/llvm/test/CodeGen/X86/vec_insert-7.ll b/llvm/test/CodeGen/X86/vec_insert-7.ll index aed8782ba40aa7..67473febf28c77 100644 --- a/llvm/test/CodeGen/X86/vec_insert-7.ll +++ b/llvm/test/CodeGen/X86/vec_insert-7.ll @@ -5,7 +5,7 @@ ; MMX insertelement is not available; these are promoted to xmm. ; (Without SSE they are split to two ints, and the code is much better.) -define x86_mmx @mmx_movzl(x86_mmx %x) nounwind { +define <1 x i64> @mmx_movzl(<1 x i64> %x) nounwind { ; X86-LABEL: mmx_movzl: ; X86: ## %bb.0: ; X86-NEXT: movl $32, %eax @@ -16,9 +16,9 @@ define x86_mmx @mmx_movzl(x86_mmx %x) nounwind { ; X64: ## %bb.0: ; X64-NEXT: movl $32, %eax ; X64-NEXT: retq - %tmp = bitcast x86_mmx %x to <2 x i32> + %tmp = bitcast <1 x i64> %x to <2 x i32> %tmp3 = insertelement <2 x i32> %tmp, i32 32, i32 0 %tmp8 = insertelement <2 x i32> %tmp3, i32 0, i32 1 - %tmp9 = bitcast <2 x i32> %tmp8 to x86_mmx - ret x86_mmx %tmp9 + %tmp9 = bitcast <2 x i32> %tmp8 to <1 x i64> + ret <1 x i64> %tmp9 } diff --git a/llvm/test/CodeGen/X86/vec_insert-mmx.ll b/llvm/test/CodeGen/X86/vec_insert-mmx.ll index c00417080fe361..f95b34685211d7 100644 --- a/llvm/test/CodeGen/X86/vec_insert-mmx.ll +++ b/llvm/test/CodeGen/X86/vec_insert-mmx.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-darwin -mattr=+mmx,+sse4.1 | FileCheck %s --check-prefix=X64 ; This is not an MMX operation; promoted to xmm. -define x86_mmx @t0(i32 %A) nounwind { +define <1 x i64> @t0(i32 %A) nounwind { ; X86-LABEL: t0: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -17,8 +17,8 @@ define x86_mmx @t0(i32 %A) nounwind { ; X64-NEXT: movq %xmm0, %rax ; X64-NEXT: retq %tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1 - %tmp4 = bitcast <2 x i32> %tmp3 to x86_mmx - ret x86_mmx %tmp4 + %tmp4 = bitcast <2 x i32> %tmp3 to <1 x i64> + ret <1 x i64> %tmp4 } define <8 x i8> @t1(i8 zeroext %x) nounwind { diff --git a/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll b/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll index 709be6534d777d..60800673ed2dd4 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll @@ -52,9 +52,9 @@ entry: %tmp542 = bitcast <2 x i32> %tmp529 to <4 x i16> %tmp543 = add <4 x i16> %tmp542, < i16 0, i16 16448, i16 24672, i16 28784 > %tmp555 = bitcast <4 x i16> %tmp543 to <8 x i8> - %tmp556 = bitcast <8 x i8> %tmp555 to x86_mmx - %tmp557 = bitcast <8 x i8> zeroinitializer to x86_mmx - tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp557, x86_mmx %tmp556, ptr null) + %tmp556 = bitcast <8 x i8> %tmp555 to <1 x i64> + %tmp557 = bitcast <8 x i8> zeroinitializer to <1 x i64> + tail call void @llvm.x86.mmx.maskmovq( <1 x i64> %tmp557, <1 x i64> %tmp556, ptr null) ret void } @@ -115,19 +115,19 @@ define <4 x float> @pr35869() nounwind { ; X64-NEXT: punpcklwd %mm1, %mm0 ## mm0 = mm0[0],mm1[0],mm0[1],mm1[1] ; X64-NEXT: cvtpi2ps %mm0, %xmm0 ; X64-NEXT: retq - %1 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx bitcast (<8 x i8> to x86_mmx), x86_mmx bitcast (<8 x i8> zeroinitializer to x86_mmx)) - %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx bitcast (<4 x i16> zeroinitializer to x86_mmx), x86_mmx %1) - %3 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %1, x86_mmx %2) - %4 = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> zeroinitializer, x86_mmx %3) + %1 = tail call <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64> bitcast (<8 x i8> to <1 x i64>), <1 x i64> bitcast (<8 x i8> zeroinitializer to <1 x i64>)) + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64> bitcast (<4 x i16> zeroinitializer to <1 x i64>), <1 x i64> %1) + %3 = tail call <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64> %1, <1 x i64> %2) + %4 = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> zeroinitializer, <1 x i64> %3) %5 = shufflevector <4 x float> %4, <4 x float> undef, <4 x i32> - %6 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %1, x86_mmx %2) - %7 = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %5, x86_mmx %6) + %6 = tail call <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64> %1, <1 x i64> %2) + %7 = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %5, <1 x i64> %6) ret <4 x float> %7 } -declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, ptr) -declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) -declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) -declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) +declare void @llvm.x86.mmx.maskmovq(<1 x i64>, <1 x i64>, ptr) +declare <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64>, <1 x i64>) +declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>) diff --git a/llvm/test/CodeGen/X86/x86-64-psub.ll b/llvm/test/CodeGen/X86/x86-64-psub.ll index 9817d798fd4bf8..4c11464075ec92 100644 --- a/llvm/test/CodeGen/X86/x86-64-psub.ll +++ b/llvm/test/CodeGen/X86/x86-64-psub.ll @@ -32,11 +32,11 @@ entry: %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8> - %3 = bitcast <8 x i8> %2 to x86_mmx + %3 = bitcast <8 x i8> %2 to <1 x i64> %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8> - %5 = bitcast <8 x i8> %4 to x86_mmx - %6 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %3, x86_mmx %5) nounwind - %7 = bitcast x86_mmx %6 to <8 x i8> + %5 = bitcast <8 x i8> %4 to <1 x i64> + %6 = tail call <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64> %3, <1 x i64> %5) nounwind + %7 = bitcast <1 x i64> %6 to <8 x i8> %8 = bitcast <8 x i8> %7 to <1 x i64> %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 ret i64 %retval.0.extract.i15 @@ -66,11 +66,11 @@ entry: %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16> - %3 = bitcast <4 x i16> %2 to x86_mmx + %3 = bitcast <4 x i16> %2 to <1 x i64> %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16> - %5 = bitcast <4 x i16> %4 to x86_mmx - %6 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %3, x86_mmx %5) nounwind - %7 = bitcast x86_mmx %6 to <4 x i16> + %5 = bitcast <4 x i16> %4 to <1 x i64> + %6 = tail call <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64> %3, <1 x i64> %5) nounwind + %7 = bitcast <1 x i64> %6 to <4 x i16> %8 = bitcast <4 x i16> %7 to <1 x i64> %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 ret i64 %retval.0.extract.i15 @@ -100,11 +100,11 @@ entry: %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 %2 = bitcast <1 x i64> %__m1.0.insert.i to <2 x i32> - %3 = bitcast <2 x i32> %2 to x86_mmx + %3 = bitcast <2 x i32> %2 to <1 x i64> %4 = bitcast <1 x i64> %__m2.0.insert.i to <2 x i32> - %5 = bitcast <2 x i32> %4 to x86_mmx - %6 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %3, x86_mmx %5) nounwind - %7 = bitcast x86_mmx %6 to <2 x i32> + %5 = bitcast <2 x i32> %4 to <1 x i64> + %6 = tail call <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64> %3, <1 x i64> %5) nounwind + %7 = bitcast <1 x i64> %6 to <2 x i32> %8 = bitcast <2 x i32> %7 to <1 x i64> %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 ret i64 %retval.0.extract.i15 @@ -134,11 +134,11 @@ entry: %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8> - %3 = bitcast <8 x i8> %2 to x86_mmx + %3 = bitcast <8 x i8> %2 to <1 x i64> %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8> - %5 = bitcast <8 x i8> %4 to x86_mmx - %6 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %3, x86_mmx %5) nounwind - %7 = bitcast x86_mmx %6 to <8 x i8> + %5 = bitcast <8 x i8> %4 to <1 x i64> + %6 = tail call <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64> %3, <1 x i64> %5) nounwind + %7 = bitcast <1 x i64> %6 to <8 x i8> %8 = bitcast <8 x i8> %7 to <1 x i64> %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 ret i64 %retval.0.extract.i15 @@ -168,11 +168,11 @@ entry: %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16> - %3 = bitcast <4 x i16> %2 to x86_mmx + %3 = bitcast <4 x i16> %2 to <1 x i64> %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16> - %5 = bitcast <4 x i16> %4 to x86_mmx - %6 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %3, x86_mmx %5) nounwind - %7 = bitcast x86_mmx %6 to <4 x i16> + %5 = bitcast <4 x i16> %4 to <1 x i64> + %6 = tail call <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64> %3, <1 x i64> %5) nounwind + %7 = bitcast <1 x i64> %6 to <4 x i16> %8 = bitcast <4 x i16> %7 to <1 x i64> %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 ret i64 %retval.0.extract.i15 @@ -202,11 +202,11 @@ entry: %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8> - %3 = bitcast <8 x i8> %2 to x86_mmx + %3 = bitcast <8 x i8> %2 to <1 x i64> %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8> - %5 = bitcast <8 x i8> %4 to x86_mmx - %6 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %3, x86_mmx %5) nounwind - %7 = bitcast x86_mmx %6 to <8 x i8> + %5 = bitcast <8 x i8> %4 to <1 x i64> + %6 = tail call <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64> %3, <1 x i64> %5) nounwind + %7 = bitcast <1 x i64> %6 to <8 x i8> %8 = bitcast <8 x i8> %7 to <1 x i64> %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 ret i64 %retval.0.extract.i15 @@ -236,26 +236,26 @@ entry: %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16> - %3 = bitcast <4 x i16> %2 to x86_mmx + %3 = bitcast <4 x i16> %2 to <1 x i64> %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16> - %5 = bitcast <4 x i16> %4 to x86_mmx - %6 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %3, x86_mmx %5) nounwind - %7 = bitcast x86_mmx %6 to <4 x i16> + %5 = bitcast <4 x i16> %4 to <1 x i64> + %6 = tail call <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64> %3, <1 x i64> %5) nounwind + %7 = bitcast <1 x i64> %6 to <4 x i16> %8 = bitcast <4 x i16> %7 to <1 x i64> %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 ret i64 %retval.0.extract.i15 } -declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64>, <1 x i64>) nounwind readnone -declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64>, <1 x i64>) nounwind readnone -declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64>, <1 x i64>) nounwind readnone -declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64>, <1 x i64>) nounwind readnone -declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64>, <1 x i64>) nounwind readnone -declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64>, <1 x i64>) nounwind readnone -declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64>, <1 x i64>) nounwind readnone diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll index dce1aa2bcd1d41..1d2e38eb5e63d8 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll @@ -4,7 +4,7 @@ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test1(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test1( @@ -35,16 +35,16 @@ define i64 @test1(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %2 = bitcast <4 x i16> %1 to x86_mmx - %3 = bitcast <4 x i16> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <4 x i16> + %2 = bitcast <4 x i16> %1 to <1 x i64> + %3 = bitcast <4 x i16> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <4 x i16> %6 = bitcast <4 x i16> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test88(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test88( @@ -75,16 +75,16 @@ define i64 @test88(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test87(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test87( @@ -115,16 +115,16 @@ define i64 @test87(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test86(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test86( @@ -155,16 +155,16 @@ define i64 @test86(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test85(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test85( @@ -195,16 +195,16 @@ define i64 @test85(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test84(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test84( @@ -235,16 +235,16 @@ define i64 @test84(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test83(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test83( @@ -275,16 +275,16 @@ define i64 @test83(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test82(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test82( @@ -315,16 +315,16 @@ define i64 @test82(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test81(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test81( @@ -355,16 +355,16 @@ define i64 @test81(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test80(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test80( @@ -395,16 +395,16 @@ define i64 @test80(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test79(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test79( @@ -435,16 +435,16 @@ define i64 @test79(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test78(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test78( @@ -475,16 +475,16 @@ define i64 @test78(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test77(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test77( @@ -515,16 +515,16 @@ define i64 @test77(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test76(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test76( @@ -563,16 +563,16 @@ define i64 @test76(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test75(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test75( @@ -611,16 +611,16 @@ define i64 @test75(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test74(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test74( @@ -659,16 +659,16 @@ define i64 @test74(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64>, i32) nounwind readnone define i64 @test73(<1 x i64> %a) #0 { ; CHECK-LABEL: define i64 @test73( @@ -694,15 +694,15 @@ define i64 @test73(<1 x i64> %a) #0 { ; entry: %0 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind - %2 = bitcast x86_mmx %1 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to <2 x i32> %3 = bitcast <2 x i32> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } -declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64>, i32) nounwind readnone define i64 @test72(<1 x i64> %a) #0 { ; CHECK-LABEL: define i64 @test72( @@ -728,9 +728,9 @@ define i64 @test72(<1 x i64> %a) #0 { ; entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind - %2 = bitcast x86_mmx %1 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to <4 x i16> %3 = bitcast <4 x i16> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 @@ -760,15 +760,15 @@ define i64 @test72_2(<1 x i64> %a) #0 { ; entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 0) nounwind - %2 = bitcast x86_mmx %1 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> %mmx_var.i, i32 0) nounwind + %2 = bitcast <1 x i64> %1 to <4 x i16> %3 = bitcast <4 x i16> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } -declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone define i64 @test71(<1 x i64> %a) #0 { ; CHECK-LABEL: define i64 @test71( @@ -790,13 +790,13 @@ define i64 @test71(<1 x i64> %a) #0 { ; entry: %0 = extractelement <1 x i64> %a, i32 0 - %mmx_var.i = bitcast i64 %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind - %2 = bitcast x86_mmx %1 to i64 + %mmx_var.i = bitcast i64 %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to i64 ret i64 %2 } -declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64>, i32) nounwind readnone define i64 @test70(<1 x i64> %a) #0 { ; CHECK-LABEL: define i64 @test70( @@ -822,9 +822,9 @@ define i64 @test70(<1 x i64> %a) #0 { ; entry: %0 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind - %2 = bitcast x86_mmx %1 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to <2 x i32> %3 = bitcast <2 x i32> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 @@ -854,15 +854,15 @@ define i64 @test70_2(<1 x i64> %a) #0 { ; entry: %0 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 0) nounwind - %2 = bitcast x86_mmx %1 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> %mmx_var.i, i32 0) nounwind + %2 = bitcast <1 x i64> %1 to <2 x i32> %3 = bitcast <2 x i32> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } -declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64>, i32) nounwind readnone define i64 @test69(<1 x i64> %a) #0 { ; CHECK-LABEL: define i64 @test69( @@ -888,15 +888,15 @@ define i64 @test69(<1 x i64> %a) #0 { ; entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind - %2 = bitcast x86_mmx %1 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to <4 x i16> %3 = bitcast <4 x i16> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } -declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone define i64 @test68(<1 x i64> %a) #0 { ; CHECK-LABEL: define i64 @test68( @@ -918,13 +918,13 @@ define i64 @test68(<1 x i64> %a) #0 { ; entry: %0 = extractelement <1 x i64> %a, i32 0 - %mmx_var.i = bitcast i64 %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind - %2 = bitcast x86_mmx %1 to i64 + %mmx_var.i = bitcast i64 %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to i64 ret i64 %2 } -declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64>, i32) nounwind readnone define i64 @test67(<1 x i64> %a) #0 { ; CHECK-LABEL: define i64 @test67( @@ -950,15 +950,15 @@ define i64 @test67(<1 x i64> %a) #0 { ; entry: %0 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind - %2 = bitcast x86_mmx %1 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to <2 x i32> %3 = bitcast <2 x i32> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } -declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64>, i32) nounwind readnone define i64 @test66(<1 x i64> %a) #0 { ; CHECK-LABEL: define i64 @test66( @@ -984,9 +984,9 @@ define i64 @test66(<1 x i64> %a) #0 { ; entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind - %2 = bitcast x86_mmx %1 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to <4 x i16> %3 = bitcast <4 x i16> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 @@ -1016,15 +1016,15 @@ define i64 @test66_2(<1 x i64> %a) #0 { ; entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 0) nounwind - %2 = bitcast x86_mmx %1 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> %mmx_var.i, i32 0) nounwind + %2 = bitcast <1 x i64> %1 to <4 x i16> %3 = bitcast <4 x i16> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } -declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test65(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test65( @@ -1059,17 +1059,17 @@ define i64 @test65(<1 x i64> %a, <1 x i64> %b) #0 { ; entry: %0 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1.i = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test64(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test64( @@ -1104,17 +1104,17 @@ define i64 @test64(<1 x i64> %a, <1 x i64> %b) #0 { ; entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1.i = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test63(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test63( @@ -1145,15 +1145,15 @@ define i64 @test63(<1 x i64> %a, <1 x i64> %b) #0 { ; entry: %0 = extractelement <1 x i64> %a, i32 0 - %mmx_var.i = bitcast i64 %0 to x86_mmx + %mmx_var.i = bitcast i64 %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1.i = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to i64 + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test62(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test62( @@ -1188,17 +1188,17 @@ define i64 @test62(<1 x i64> %a, <1 x i64> %b) #0 { ; entry: %0 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1.i = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test61(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test61( @@ -1233,17 +1233,17 @@ define i64 @test61(<1 x i64> %a, <1 x i64> %b) #0 { ; entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1.i = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test60(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test60( @@ -1274,15 +1274,15 @@ define i64 @test60(<1 x i64> %a, <1 x i64> %b) #0 { ; entry: %0 = extractelement <1 x i64> %a, i32 0 - %mmx_var.i = bitcast i64 %0 to x86_mmx + %mmx_var.i = bitcast i64 %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1.i = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to i64 + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test59(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test59( @@ -1317,17 +1317,17 @@ define i64 @test59(<1 x i64> %a, <1 x i64> %b) #0 { ; entry: %0 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1.i = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test58(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test58( @@ -1362,17 +1362,17 @@ define i64 @test58(<1 x i64> %a, <1 x i64> %b) #0 { ; entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1.i = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pxor(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test56(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test56( @@ -1403,16 +1403,16 @@ define i64 @test56(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pxor(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.por(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test55(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test55( @@ -1443,16 +1443,16 @@ define i64 @test55(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.por(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pandn(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test54(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test54( @@ -1483,16 +1483,16 @@ define i64 @test54(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pandn(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pand(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test53(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test53( @@ -1523,16 +1523,16 @@ define i64 @test53(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pand(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test52(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test52( @@ -1563,10 +1563,10 @@ define i64 @test52(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 @@ -1601,16 +1601,16 @@ define i64 @test51(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test50(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test50( @@ -1641,16 +1641,16 @@ define i64 @test50(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test49( @@ -1685,16 +1685,16 @@ define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test48(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test48( @@ -1725,16 +1725,16 @@ define i64 @test48(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test47(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test47( @@ -1765,16 +1765,16 @@ define i64 @test47(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test46(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test46( @@ -1805,16 +1805,16 @@ define i64 @test46(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test45(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test45( @@ -1845,10 +1845,10 @@ define i64 @test45(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 @@ -1878,17 +1878,17 @@ define i64 @test44(<1 x i64> %a, <1 x i64> %b) #0 { ; entry: %0 = extractelement <1 x i64> %a, i32 0 - %mmx_var = bitcast i64 %0 to x86_mmx + %mmx_var = bitcast i64 %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1 = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1) - %3 = bitcast x86_mmx %2 to i64 + %mmx_var1 = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64> %mmx_var, <1 x i64> %mmx_var1) + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64>, <1 x i64>) nounwind readnone -declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test43(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test43( @@ -1919,16 +1919,16 @@ define i64 @test43(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test42(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test42( @@ -1959,16 +1959,16 @@ define i64 @test42(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test41(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test41( @@ -1999,16 +1999,16 @@ define i64 @test41(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test40(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test40( @@ -2039,16 +2039,16 @@ define i64 @test40(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test39(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test39( @@ -2079,16 +2079,16 @@ define i64 @test39(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test38(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test38( @@ -2119,16 +2119,16 @@ define i64 @test38(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test37(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test37( @@ -2159,16 +2159,16 @@ define i64 @test37(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test36(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test36( @@ -2194,15 +2194,15 @@ define i64 @test36(<1 x i64> %a, <1 x i64> %b) #0 { ; entry: %0 = extractelement <1 x i64> %a, i32 0 - %mmx_var = bitcast i64 %0 to x86_mmx + %mmx_var = bitcast i64 %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1 = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1) - %3 = bitcast x86_mmx %2 to i64 + %mmx_var1 = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> %mmx_var, <1 x i64> %mmx_var1) + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test35(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test35( @@ -2233,16 +2233,16 @@ define i64 @test35(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test34(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test34( @@ -2273,16 +2273,16 @@ define i64 @test34(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test33(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test33( @@ -2313,16 +2313,16 @@ define i64 @test33(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test32(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test32( @@ -2354,14 +2354,14 @@ define i64 @test32(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to i64 + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test31(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test31( @@ -2392,16 +2392,16 @@ define i64 @test31(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test30(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test30( @@ -2432,16 +2432,16 @@ define i64 @test30(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test29(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test29( @@ -2472,16 +2472,16 @@ define i64 @test29(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test28(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test28( @@ -2512,16 +2512,16 @@ define i64 @test28(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test27(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test27( @@ -2552,16 +2552,16 @@ define i64 @test27(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test26(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test26( @@ -2592,16 +2592,16 @@ define i64 @test26(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare void @llvm.x86.mmx.movnt.dq(ptr, x86_mmx) nounwind +declare void @llvm.x86.mmx.movnt.dq(ptr, <1 x i64>) nounwind define void @test25(ptr %p, <1 x i64> %a) nounwind optsize ssp #0 { ; CHECK-LABEL: define void @test25( @@ -2629,12 +2629,12 @@ define void @test25(ptr %p, <1 x i64> %a) nounwind optsize ssp #0 { ; entry: %0 = extractelement <1 x i64> %a, i32 0 - %mmx_var.i = bitcast i64 %0 to x86_mmx - tail call void @llvm.x86.mmx.movnt.dq(ptr %p, x86_mmx %mmx_var.i) nounwind + %mmx_var.i = bitcast i64 %0 to <1 x i64> + tail call void @llvm.x86.mmx.movnt.dq(ptr %p, <1 x i64> %mmx_var.i) nounwind ret void } -declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone +declare i32 @llvm.x86.mmx.pmovmskb(<1 x i64>) nounwind readnone define i32 @test24(<1 x i64> %a) #0 { ; CHECK-LABEL: define i32 @test24( @@ -2659,12 +2659,12 @@ define i32 @test24(<1 x i64> %a) #0 { ; entry: %0 = bitcast <1 x i64> %a to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx - %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind + %mmx_var.i = bitcast <8 x i8> %0 to <1 x i64> + %1 = tail call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %mmx_var.i) nounwind ret i32 %1 } -declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, ptr) nounwind +declare void @llvm.x86.mmx.maskmovq(<1 x i64>, <1 x i64>, ptr) nounwind define void @test23(<1 x i64> %d, <1 x i64> %n, ptr %p) nounwind optsize ssp #0 { ; CHECK-LABEL: define void @test23( @@ -2700,13 +2700,13 @@ define void @test23(<1 x i64> %d, <1 x i64> %n, ptr %p) nounwind optsize ssp #0 entry: %0 = bitcast <1 x i64> %n to <8 x i8> %1 = bitcast <1 x i64> %d to <8 x i8> - %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx - %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx - tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, ptr %p) nounwind + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + tail call void @llvm.x86.mmx.maskmovq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i, ptr %p) nounwind ret void } -declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test22(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test22( @@ -2737,16 +2737,16 @@ define i64 @test22(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone +declare <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64>, i8) nounwind readnone define i64 @test21(<1 x i64> %a) #0 { ; CHECK-LABEL: define i64 @test21( @@ -2774,9 +2774,9 @@ define i64 @test21(<1 x i64> %a) #0 { ; entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %1 = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone - %3 = bitcast x86_mmx %2 to <4 x i16> + %1 = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %1, i8 3) nounwind readnone + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 @@ -2808,15 +2808,15 @@ define i32 @test21_2(<1 x i64> %a) #0 { ; entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %1 = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone - %3 = bitcast x86_mmx %2 to <4 x i16> + %1 = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %1, i8 3) nounwind readnone + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <2 x i32> %5 = extractelement <2 x i32> %4, i32 0 ret i32 %5 } -declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test20(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test20( @@ -2843,14 +2843,14 @@ define i64 @test20(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx - %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to i64 + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone +declare <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>) nounwind readnone define <2 x double> @test19(<1 x i64> %a) #0 { ; CHECK-LABEL: define <2 x double> @test19( @@ -2875,12 +2875,12 @@ define <2 x double> @test19(<1 x i64> %a) #0 { ; entry: %0 = bitcast <1 x i64> %a to <2 x i32> - %1 = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone + %1 = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %1) nounwind readnone ret <2 x double> %2 } -declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone define i64 @test18(<2 x double> %a) #0 { ; CHECK-LABEL: define i64 @test18( @@ -2903,14 +2903,14 @@ define i64 @test18(<2 x double> %a) #0 { ; CHECK-NEXT: ret i64 [[TMP3]] ; entry: - %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone - %1 = bitcast x86_mmx %0 to <2 x i32> + %0 = tail call <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone + %1 = bitcast <1 x i64> %0 to <2 x i32> %2 = bitcast <2 x i32> %1 to <1 x i64> %3 = extractelement <1 x i64> %2, i32 0 ret i64 %3 } -declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone define i64 @test17(<2 x double> %a) #0 { ; CHECK-LABEL: define i64 @test17( @@ -2933,14 +2933,14 @@ define i64 @test17(<2 x double> %a) #0 { ; CHECK-NEXT: ret i64 [[TMP3]] ; entry: - %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone - %1 = bitcast x86_mmx %0 to <2 x i32> + %0 = tail call <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone + %1 = bitcast <1 x i64> %0 to <2 x i32> %2 = bitcast <2 x i32> %1 to <1 x i64> %3 = extractelement <1 x i64> %2, i32 0 ret i64 %3 } -declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64>, <1 x i64>, i8) nounwind readnone define i64 @test16(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test16( @@ -2974,15 +2974,15 @@ define i64 @test16(<1 x i64> %a, <1 x i64> %b) #0 { ; entry: %0 = extractelement <1 x i64> %a, i32 0 - %mmx_var = bitcast i64 %0 to x86_mmx + %mmx_var = bitcast i64 %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 - %mmx_var1 = bitcast i64 %1 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16) - %3 = bitcast x86_mmx %2 to i64 + %mmx_var1 = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %mmx_var, <1 x i64> %mmx_var1, i8 16) + %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } -declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64>) nounwind readnone define i64 @test15(<1 x i64> %a) #0 { ; CHECK-LABEL: define i64 @test15( @@ -3006,15 +3006,15 @@ define i64 @test15(<1 x i64> %a) #0 { ; entry: %0 = bitcast <1 x i64> %a to <2 x i32> - %1 = bitcast <2 x i32> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone - %3 = bitcast x86_mmx %2 to <2 x i32> + %1 = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64> %1) nounwind readnone + %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64>) nounwind readnone define i64 @test14(<1 x i64> %a) #0 { ; CHECK-LABEL: define i64 @test14( @@ -3038,15 +3038,15 @@ define i64 @test14(<1 x i64> %a) #0 { ; entry: %0 = bitcast <1 x i64> %a to <4 x i16> - %1 = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone - %3 = bitcast x86_mmx %2 to <4 x i16> + %1 = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64> %1) nounwind readnone + %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64>) nounwind readnone define i64 @test13(<1 x i64> %a) #0 { ; CHECK-LABEL: define i64 @test13( @@ -3070,15 +3070,15 @@ define i64 @test13(<1 x i64> %a) #0 { ; entry: %0 = bitcast <1 x i64> %a to <8 x i8> - %1 = bitcast <8 x i8> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone - %3 = bitcast x86_mmx %2 to <8 x i8> + %1 = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64> %1) nounwind readnone + %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } -declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test12(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test12( @@ -3109,16 +3109,16 @@ define i64 @test12(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %2 = bitcast <2 x i32> %1 to x86_mmx - %3 = bitcast <2 x i32> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <2 x i32> + %2 = bitcast <2 x i32> %1 to <1 x i64> + %3 = bitcast <2 x i32> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <2 x i32> %6 = bitcast <2 x i32> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test11(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test11( @@ -3149,16 +3149,16 @@ define i64 @test11(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %2 = bitcast <4 x i16> %1 to x86_mmx - %3 = bitcast <4 x i16> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <4 x i16> + %2 = bitcast <4 x i16> %1 to <1 x i64> + %3 = bitcast <4 x i16> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <4 x i16> %6 = bitcast <4 x i16> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test10(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test10( @@ -3189,16 +3189,16 @@ define i64 @test10(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %2 = bitcast <8 x i8> %1 to x86_mmx - %3 = bitcast <8 x i8> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <8 x i8> + %2 = bitcast <8 x i8> %1 to <1 x i64> + %3 = bitcast <8 x i8> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <8 x i8> %6 = bitcast <8 x i8> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test9(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test9( @@ -3229,16 +3229,16 @@ define i64 @test9(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %2 = bitcast <8 x i8> %1 to x86_mmx - %3 = bitcast <8 x i8> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <8 x i8> + %2 = bitcast <8 x i8> %1 to <1 x i64> + %3 = bitcast <8 x i8> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <8 x i8> %6 = bitcast <8 x i8> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test8(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test8( @@ -3269,16 +3269,16 @@ define i64 @test8(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %2 = bitcast <4 x i16> %1 to x86_mmx - %3 = bitcast <4 x i16> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <4 x i16> + %2 = bitcast <4 x i16> %1 to <1 x i64> + %3 = bitcast <4 x i16> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <4 x i16> %6 = bitcast <4 x i16> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test7( @@ -3313,16 +3313,16 @@ define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> - %2 = bitcast <8 x i8> %1 to x86_mmx - %3 = bitcast <8 x i8> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <8 x i8> + %2 = bitcast <8 x i8> %1 to <1 x i64> + %3 = bitcast <8 x i8> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <8 x i8> %6 = bitcast <8 x i8> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test6(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test6( @@ -3353,16 +3353,16 @@ define i64 @test6(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %2 = bitcast <4 x i16> %1 to x86_mmx - %3 = bitcast <4 x i16> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <4 x i16> + %2 = bitcast <4 x i16> %1 to <1 x i64> + %3 = bitcast <4 x i16> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <4 x i16> %6 = bitcast <4 x i16> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test5(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test5( @@ -3393,16 +3393,16 @@ define i64 @test5(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %2 = bitcast <2 x i32> %1 to x86_mmx - %3 = bitcast <2 x i32> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <2 x i32> + %2 = bitcast <2 x i32> %1 to <1 x i64> + %3 = bitcast <2 x i32> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <2 x i32> %6 = bitcast <2 x i32> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test4(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test4( @@ -3433,16 +3433,16 @@ define i64 @test4(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %2 = bitcast <4 x i16> %1 to x86_mmx - %3 = bitcast <4 x i16> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <4 x i16> + %2 = bitcast <4 x i16> %1 to <1 x i64> + %3 = bitcast <4 x i16> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <4 x i16> %6 = bitcast <4 x i16> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test3(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test3( @@ -3473,16 +3473,16 @@ define i64 @test3(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> - %2 = bitcast <4 x i16> %1 to x86_mmx - %3 = bitcast <4 x i16> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <4 x i16> + %2 = bitcast <4 x i16> %1 to <1 x i64> + %3 = bitcast <4 x i16> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <4 x i16> %6 = bitcast <4 x i16> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test2(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-LABEL: define i64 @test2( @@ -3513,16 +3513,16 @@ define i64 @test2(<1 x i64> %a, <1 x i64> %b) #0 { entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> - %2 = bitcast <2 x i32> %1 to x86_mmx - %3 = bitcast <2 x i32> %0 to x86_mmx - %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone - %5 = bitcast x86_mmx %4 to <2 x i32> + %2 = bitcast <2 x i32> %1 to <1 x i64> + %3 = bitcast <2 x i32> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <2 x i32> %6 = bitcast <2 x i32> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } -define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind #0 { +define <4 x float> @test89(<4 x float> %a, <1 x i64> %b) nounwind #0 { ; ALL-LABEL: test89: ; ALL: # %bb.0: ; ALL-NEXT: cvtpi2ps %mm0, %xmm0 @@ -3546,11 +3546,11 @@ define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind #0 { ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[C]] ; - %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b) + %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, <1 x i64> %b) ret <4 x float> %c } -declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone +declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>) nounwind readnone define void @test90() #0 { ; ALL-LABEL: test90: @@ -3576,29 +3576,25 @@ define <1 x i64> @test_mm_insert_pi16(<1 x i64> %a.coerce, i32 %d) nounwind #0 { ; CHECK-NEXT: [[TMP3:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[A_COERCE]] to <1 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP3]] to i64 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP6]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] -; CHECK: 4: +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable -; CHECK: 5: -; CHECK-NEXT: [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> [[TMP8]], i32 [[D]], i32 2) -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[TMP9]] to <1 x i64> +; CHECK: 4: +; CHECK-NEXT: [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> [[A_COERCE]], i32 [[D]], i32 2) ; CHECK-NEXT: store <1 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret <1 x i64> [[TMP2]] +; CHECK-NEXT: ret <1 x i64> [[TMP9]] ; entry: - %0 = bitcast <1 x i64> %a.coerce to x86_mmx - %1 = tail call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %0, i32 %d, i32 2) - %2 = bitcast x86_mmx %1 to <1 x i64> - ret <1 x i64> %2 + %1 = tail call <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> %a.coerce, i32 %d, i32 2) + ret <1 x i64> %1 } -declare x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx, i32, i32 immarg) +declare <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64>, i32, i32 immarg) define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind #0 { ; CHECK-LABEL: define i32 @test_mm_extract_pi16( @@ -3606,25 +3602,23 @@ define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind #0 { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[A_COERCE]] to <1 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP2]] to i64 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP5]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] -; CHECK: 3: +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable -; CHECK: 4: -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.x86.mmx.pextr.w(<1 x i64> [[TMP6]], i32 2) +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.x86.mmx.pextr.w(<1 x i64> [[A_COERCE]], i32 2) ; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i32 [[TMP1]] ; entry: - %0 = bitcast <1 x i64> %a.coerce to x86_mmx - %1 = tail call i32 @llvm.x86.mmx.pextr.w(x86_mmx %0, i32 2) + %1 = tail call i32 @llvm.x86.mmx.pextr.w(<1 x i64> %a.coerce, i32 2) ret i32 %1 } -declare i32 @llvm.x86.mmx.pextr.w(x86_mmx, i32 immarg) +declare i32 @llvm.x86.mmx.pextr.w(<1 x i64>, i32 immarg) attributes #0 = { sanitize_memory } ;. diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll b/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll index 5197f3277ed80a..fe5cf9dcc65b20 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll @@ -6,9 +6,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-unknown-linux-gnu" declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone -declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>) nounwind readnone declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone -declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64>, <1 x i64>) nounwind readnone define <4 x i32> @Test_sse2_pmadd_wd(<8 x i16> %a, <8 x i16> %b) sanitize_memory { entry: @@ -24,10 +24,10 @@ entry: ; CHECK: ret <4 x i32> -define x86_mmx @Test_ssse3_pmadd_ub_sw(x86_mmx %a, x86_mmx %b) sanitize_memory { +define <1 x i64> @Test_ssse3_pmadd_ub_sw(<1 x i64> %a, <1 x i64> %b) sanitize_memory { entry: - %c = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a, x86_mmx %b) nounwind - ret x86_mmx %c + %c = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> %a, <1 x i64> %b) nounwind + ret <1 x i64> %c } ; CHECK-LABEL: @Test_ssse3_pmadd_ub_sw( @@ -53,10 +53,10 @@ define <2 x i64> @Test_x86_sse2_psad_bw(<16 x i8> %a, <16 x i8> %b) sanitize_mem ; CHECK: ret <2 x i64> -define x86_mmx @Test_x86_mmx_psad_bw(x86_mmx %a, x86_mmx %b) sanitize_memory { +define <1 x i64> @Test_x86_mmx_psad_bw(<1 x i64> %a, <1 x i64> %b) sanitize_memory { entry: - %c = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a, x86_mmx %b) nounwind - ret x86_mmx %c + %c = tail call <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64> %a, <1 x i64> %b) nounwind + ret <1 x i64> %c } ; CHECK-LABEL: @Test_x86_mmx_psad_bw( diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector_cvt.ll b/llvm/test/Instrumentation/MemorySanitizer/vector_cvt.ll index 6ae03f288e2c0e..e9202700b1df74 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/vector_cvt.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/vector_cvt.ll @@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu" declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone -declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone +declare <1 x i64> @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32) nounwind readnone ; Single argument vector conversion. @@ -27,12 +27,12 @@ entry: ; CHECK: store i32 0, {{.*}} @__msan_retval_tls ; CHECK: ret i32 -; x86_mmx packed vector conversion. +; <1 x i64> packed vector conversion. -define x86_mmx @test_cvtps2pi(<4 x float> %value) sanitize_memory { +define <1 x i64> @test_cvtps2pi(<4 x float> %value) sanitize_memory { entry: - %0 = tail call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %value) - ret x86_mmx %0 + %0 = tail call <1 x i64> @llvm.x86.sse.cvtps2pi(<4 x float> %value) + ret <1 x i64> %0 } ; CHECK-LABEL: @test_cvtps2pi diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector_pack.ll b/llvm/test/Instrumentation/MemorySanitizer/vector_pack.ll index 1289abd63667ee..13f7a1612de943 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/vector_pack.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/vector_pack.ll @@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu" declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) nounwind readnone -declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64>, <1 x i64>) nounwind readnone define <8 x i16> @Test_packssdw_128(<4 x i32> %a, <4 x i32> %b) sanitize_memory { entry: @@ -41,10 +41,10 @@ entry: ; CHECK: ret <32 x i8> -define x86_mmx @Test_mmx_packuswb(x86_mmx %a, x86_mmx %b) sanitize_memory { +define <1 x i64> @Test_mmx_packuswb(<1 x i64> %a, <1 x i64> %b) sanitize_memory { entry: - %c = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %a, x86_mmx %b) nounwind - ret x86_mmx %c + %c = tail call <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64> %a, <1 x i64> %b) nounwind + ret <1 x i64> %c } ; CHECK-LABEL: @Test_mmx_packuswb( diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector_shift.ll b/llvm/test/Instrumentation/MemorySanitizer/vector_shift.ll index 3c6c44194e3ac5..441dd8f64e2842 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/vector_shift.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/vector_shift.ll @@ -7,7 +7,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) +declare <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64>, <1 x i64>) declare <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32>, <16 x i32>) declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) @@ -19,10 +19,10 @@ declare <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16>, i32) define i64 @test_mmx(i64 %x.coerce, i64 %y.coerce) sanitize_memory { entry: %0 = bitcast i64 %x.coerce to <2 x i32> - %1 = bitcast <2 x i32> %0 to x86_mmx - %2 = bitcast i64 %y.coerce to x86_mmx - %3 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to <2 x i32> + %1 = bitcast <2 x i32> %0 to <1 x i64> + %2 = bitcast i64 %y.coerce to <1 x i64> + %3 = tail call <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64> %1, <1 x i64> %2) + %4 = bitcast <1 x i64> %3 to <2 x i32> %5 = bitcast <2 x i32> %4 to <1 x i64> %6 = extractelement <1 x i64> %5, i32 0 ret i64 %6 diff --git a/llvm/test/MC/X86/x86-GCC-inline-asm-Y-constraints.ll b/llvm/test/MC/X86/x86-GCC-inline-asm-Y-constraints.ll index 990e2810d851e4..2e9da9f2b5b779 100644 --- a/llvm/test/MC/X86/x86-GCC-inline-asm-Y-constraints.ll +++ b/llvm/test/MC/X86/x86-GCC-inline-asm-Y-constraints.ll @@ -11,7 +11,7 @@ define void @f_Ym(i64 %m.coerce) { ; CHECK: ## InlineAsm End entry: - %0 = tail call x86_mmx asm sideeffect "movq $0, %mm1\0A\09", "=^Ym,~{dirflag},~{fpsr},~{flags}"() + %0 = tail call <1 x i64> asm sideeffect "movq $0, %mm1\0A\09", "=^Ym,~{dirflag},~{fpsr},~{flags}"() ret void } diff --git a/llvm/test/Transforms/GlobalOpt/x86_mmx_load.ll b/llvm/test/Transforms/GlobalOpt/x86_mmx_load.ll deleted file mode 100644 index e352900e2a458e..00000000000000 --- a/llvm/test/Transforms/GlobalOpt/x86_mmx_load.ll +++ /dev/null @@ -1,12 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=globalopt < %s | FileCheck %s - -@m64 = internal global <1 x i64> zeroinitializer - -define i32 @load_mmx() { -; CHECK-LABEL: @load_mmx( -; CHECK-NEXT: ret i32 0 -; - %temp = load x86_mmx, ptr @m64 - ret i32 0 -} diff --git a/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll b/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll index 9fbc39241d8e98..04bba79aada0ab 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll @@ -7,12 +7,12 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; DemandedBits - MOVMSK zeros the upper bits of the result. ; -define i32 @test_upper_x86_mmx_pmovmskb(x86_mmx %a0) { +define i32 @test_upper_x86_mmx_pmovmskb(<1 x i64> %a0) { ; CHECK-LABEL: @test_upper_x86_mmx_pmovmskb( ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> [[A0:%.*]]) ; CHECK-NEXT: ret i32 [[TMP1]] ; - %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0) + %1 = call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %a0) %2 = and i32 %1, 255 ret i32 %2 } @@ -87,11 +87,11 @@ define i32 @test_upper_x86_avx_movmsk_pd_256(<4 x double> %a0) { ; DemandedBits - If we don't use the lower bits then we just return zero. ; -define i32 @test_lower_x86_mmx_pmovmskb(x86_mmx %a0) { +define i32 @test_lower_x86_mmx_pmovmskb(<1 x i64> %a0) { ; CHECK-LABEL: @test_lower_x86_mmx_pmovmskb( ; CHECK-NEXT: ret i32 0 ; - %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0) + %1 = call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %a0) %2 = and i32 %1, -256 ret i32 %2 } @@ -151,7 +151,7 @@ define i32 @undef_x86_mmx_pmovmskb() { ; CHECK-LABEL: @undef_x86_mmx_pmovmskb( ; CHECK-NEXT: ret i32 0 ; - %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx undef) + %1 = call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> undef) ret i32 %1 } @@ -264,8 +264,8 @@ define i32 @fold_x86_mmx_pmovmskb() { ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> ) ; CHECK-NEXT: ret i32 [[TMP1]] ; - %1 = bitcast <8 x i8> to x86_mmx - %2 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %1) + %1 = bitcast <8 x i8> to <1 x i64> + %2 = call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %1) ret i32 %2 } @@ -437,7 +437,7 @@ define i32 @sext_sse_movmsk_ps_must_replicate_bits(<2 x i1> %x) { ret i32 %r } -declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) +declare i32 @llvm.x86.mmx.pmovmskb(<1 x i64>) declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll index 6cee7bb650fd31..564e82945c13f0 100644 --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -937,27 +937,6 @@ define float @test2c() { ret float extractelement (<2 x float> bitcast (double bitcast (<2 x float> to double) to <2 x float>), i32 0) } -define i64 @test_mmx(<2 x i32> %x) { -; ALL-LABEL: @test_mmx( -; ALL-NEXT: [[C:%.*]] = bitcast <2 x i32> [[X:%.*]] to i64 -; ALL-NEXT: ret i64 [[C]] -; - %A = bitcast <2 x i32> %x to x86_mmx - %B = bitcast x86_mmx %A to <2 x i32> - %C = bitcast <2 x i32> %B to i64 - ret i64 %C -} - -define i64 @test_mmx_const(<2 x i32> %c) { -; ALL-LABEL: @test_mmx_const( -; ALL-NEXT: ret i64 0 -; - %A = bitcast <2 x i32> zeroinitializer to x86_mmx - %B = bitcast x86_mmx %A to <2 x i32> - %C = bitcast <2 x i32> %B to i64 - ret i64 %C -} - ; PR12514 define i1 @test67(i1 %a, i32 %b) { ; ALL-LABEL: @test67( diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/gep-zeroinit-vector.ll b/llvm/test/Transforms/InstSimplify/ConstProp/gep-zeroinit-vector.ll index bce07b07756209..c383ff7a90ded2 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/gep-zeroinit-vector.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/gep-zeroinit-vector.ll @@ -12,18 +12,5 @@ define <2 x ptr> @test_gep() { ; CHECK-NEXT: ret <2 x ptr> ; %A = getelementptr [1 x %rec8], ptr @a, <2 x i16> zeroinitializer, <2 x i64> zeroinitializer - %B = bitcast <2 x ptr> %A to <2 x ptr> - ret <2 x ptr> %B -} - -; Testcase that verify the cast-of-cast when the outer/second cast is to a -; vector type. - -define <4 x i16> @test_mmx_const() { -; CHECK-LABEL: @test_mmx_const( -; CHECK-NEXT: ret <4 x i16> zeroinitializer -; - %A = bitcast <2 x i32> zeroinitializer to x86_mmx - %B = bitcast x86_mmx %A to <4 x i16> - ret <4 x i16> %B + ret <2 x ptr> %A } diff --git a/llvm/test/Transforms/SCCP/crash.ll b/llvm/test/Transforms/SCCP/crash.ll index 8f8ad44db437b0..47d9329f6f03da 100644 --- a/llvm/test/Transforms/SCCP/crash.ll +++ b/llvm/test/Transforms/SCCP/crash.ll @@ -28,7 +28,7 @@ define i32 @test2([4 x i32] %A) { ret i32 %B } -define x86_mmx @test3() { - %load = load x86_mmx, ptr null - ret x86_mmx %load +define <1 x i64> @test3() { + %load = load <1 x i64>, ptr null + ret <1 x i64> %load } diff --git a/llvm/test/Transforms/SROA/pr57796.ll b/llvm/test/Transforms/SROA/pr57796.ll index dbcb6d07849717..4eb6a7107dad30 100644 --- a/llvm/test/Transforms/SROA/pr57796.ll +++ b/llvm/test/Transforms/SROA/pr57796.ll @@ -29,13 +29,13 @@ entry: %call.i = call align 32 ptr @value_set_type(ptr align 32 %ref.tmp.i) %0 = load <32 x i8>, ptr %call.i, align 32 store <32 x i8> %0, ptr %ref.tmp, align 32 - %1 = load x86_mmx, ptr %ref.tmp, align 32 - %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 0) - store x86_mmx %2, ptr @A, align 8 + %1 = load <1 x i64>, ptr %ref.tmp, align 32 + %2 = call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %1, i8 0) + store <1 x i64> %2, ptr @A, align 8 ret void } -declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8 immarg) +declare <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64>, i8 immarg) declare dso_local void @value_create(ptr sret(%struct.Value) align 32) diff --git a/llvm/test/Verifier/atomics.ll b/llvm/test/Verifier/atomics.ll index fe70ba082cb4c1..f835b98b243456 100644 --- a/llvm/test/Verifier/atomics.ll +++ b/llvm/test/Verifier/atomics.ll @@ -3,12 +3,12 @@ ; CHECK: atomic store operand must have integer, pointer, or floating point type! ; CHECK: atomic load operand must have integer, pointer, or floating point type! -define void @foo(ptr %P, x86_mmx %v) { - store atomic x86_mmx %v, ptr %P unordered, align 8 +define void @foo(ptr %P, <1 x i64> %v) { + store atomic <1 x i64> %v, ptr %P unordered, align 8 ret void } -define x86_mmx @bar(ptr %P) { - %v = load atomic x86_mmx, ptr %P unordered, align 8 - ret x86_mmx %v +define <1 x i64> @bar(ptr %P) { + %v = load atomic <1 x i64>, ptr %P unordered, align 8 + ret <1 x i64> %v }