forked from GPUOpen-Drivers/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merged master:4a2673d79fd into amd-gfx:e87ce7d5df8
Local branch amd-gfx e87ce7d Merged master:45ebe38ffc4 into amd-gfx:86103e348b6 Remote branch master 4a2673d [X86][AVX] Add SimplifyMultipleUseDemandedBits VBROADCAST handling to SimplifyDemandedVectorElts.
- Loading branch information
Showing
4 changed files
with
801 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
; RUN: opt -O3 -S < %s | FileCheck %s | ||
; RUN: opt -passes='default<O3>' -S < %s | FileCheck %s | ||
|
||
target triple = "x86_64--" | ||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
|
||
; PR42174 - https://bugs.llvm.org/show_bug.cgi?id=42174 | ||
; This test should match the IR produced by clang after running -mem2reg. | ||
; All math before the final 'add' should be scalarized. | ||
|
||
define <4 x i32> @square(<4 x i32> %num, i32 %y, i32 %x, i32 %h, i32 %k, i32 %w, i32 %p, i32 %j, i32 %u) { | ||
; CHECK-LABEL: @square( | ||
; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[K:%.*]], 2 | ||
; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[DIV]], i32 0 | ||
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[P:%.*]], 6234 | ||
; CHECK-NEXT: [[SPLATINSERT2:%.*]] = insertelement <4 x i32> undef, i32 [[MUL]], i32 0 | ||
; CHECK-NEXT: [[MUL5:%.*]] = mul nsw i32 [[H:%.*]], 75 | ||
; CHECK-NEXT: [[SPLATINSERT6:%.*]] = insertelement <4 x i32> undef, i32 [[MUL5]], i32 0 | ||
; CHECK-NEXT: [[DIV9:%.*]] = sdiv i32 [[J:%.*]], 3452 | ||
; CHECK-NEXT: [[SPLATINSERT10:%.*]] = insertelement <4 x i32> undef, i32 [[DIV9]], i32 0 | ||
; CHECK-NEXT: [[MUL13:%.*]] = mul nsw i32 [[W:%.*]], 53 | ||
; CHECK-NEXT: [[SPLATINSERT14:%.*]] = insertelement <4 x i32> undef, i32 [[MUL13]], i32 0 | ||
; CHECK-NEXT: [[DIV17:%.*]] = sdiv i32 [[X:%.*]], 820 | ||
; CHECK-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> undef, i32 [[DIV17]], i32 0 | ||
; CHECK-NEXT: [[MUL21:%.*]] = shl nsw i32 [[U:%.*]], 2 | ||
; CHECK-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> undef, i32 [[MUL21]], i32 0 | ||
; CHECK-NEXT: [[SPLATINSERT25:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0 | ||
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SPLATINSERT25]], <i32 1, i32 undef, i32 undef, i32 undef> | ||
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[SPLATINSERT18]] | ||
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[SPLATINSERT6]] | ||
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[SPLATINSERT]] | ||
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[SPLATINSERT14]] | ||
; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[SPLATINSERT2]] | ||
; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP6]], [[SPLATINSERT10]] | ||
; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[SPLATINSERT22]] | ||
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], <i32 317425, i32 undef, i32 undef, i32 undef> | ||
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> undef, <4 x i32> zeroinitializer | ||
; CHECK-NEXT: [[ADD29:%.*]] = add <4 x i32> [[TMP10]], [[NUM:%.*]] | ||
; CHECK-NEXT: ret <4 x i32> [[ADD29]] | ||
; | ||
%add = add <4 x i32> %num, <i32 1, i32 1, i32 1, i32 1> | ||
%div = sdiv i32 %k, 2 | ||
%splatinsert = insertelement <4 x i32> undef, i32 %div, i32 0 | ||
%splat = shufflevector <4 x i32> %splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer | ||
%add1 = add <4 x i32> %add, %splat | ||
%mul = mul nsw i32 %p, 6234 | ||
%splatinsert2 = insertelement <4 x i32> undef, i32 %mul, i32 0 | ||
%splat3 = shufflevector <4 x i32> %splatinsert2, <4 x i32> undef, <4 x i32> zeroinitializer | ||
%add4 = add <4 x i32> %add1, %splat3 | ||
%mul5 = mul nsw i32 75, %h | ||
%splatinsert6 = insertelement <4 x i32> undef, i32 %mul5, i32 0 | ||
%splat7 = shufflevector <4 x i32> %splatinsert6, <4 x i32> undef, <4 x i32> zeroinitializer | ||
%add8 = add <4 x i32> %add4, %splat7 | ||
%div9 = sdiv i32 %j, 3452 | ||
%splatinsert10 = insertelement <4 x i32> undef, i32 %div9, i32 0 | ||
%splat11 = shufflevector <4 x i32> %splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer | ||
%add12 = add <4 x i32> %add8, %splat11 | ||
%mul13 = mul nsw i32 53, %w | ||
%splatinsert14 = insertelement <4 x i32> undef, i32 %mul13, i32 0 | ||
%splat15 = shufflevector <4 x i32> %splatinsert14, <4 x i32> undef, <4 x i32> zeroinitializer | ||
%add16 = add <4 x i32> %add12, %splat15 | ||
%div17 = sdiv i32 %x, 820 | ||
%splatinsert18 = insertelement <4 x i32> undef, i32 %div17, i32 0 | ||
%splat19 = shufflevector <4 x i32> %splatinsert18, <4 x i32> undef, <4 x i32> zeroinitializer | ||
%add20 = add <4 x i32> %add16, %splat19 | ||
%mul21 = mul nsw i32 4, %u | ||
%splatinsert22 = insertelement <4 x i32> undef, i32 %mul21, i32 0 | ||
%splat23 = shufflevector <4 x i32> %splatinsert22, <4 x i32> undef, <4 x i32> zeroinitializer | ||
%add24 = add <4 x i32> %add20, %splat23 | ||
%splatinsert25 = insertelement <4 x i32> undef, i32 %y, i32 0 | ||
%splat26 = shufflevector <4 x i32> %splatinsert25, <4 x i32> undef, <4 x i32> zeroinitializer | ||
%add27 = add <4 x i32> %add24, %splat26 | ||
%add28 = add <4 x i32> %add27, <i32 25, i32 25, i32 25, i32 25> | ||
%add29 = add <4 x i32> %add28, <i32 317400, i32 317400, i32 317400, i32 317400> | ||
ret <4 x i32> %add29 | ||
} | ||
|
Oops, something went wrong.