forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Transforms][SLP] Added bytecast support
This commit adds bytecast support for SLP vectorizer. Before, a sequence of bytecasts would not be possible to combine in a single SIMF bundle - which led to poor vectorization if vectorization at all. Now, bytecasts can be also combine together. Currently, we model the cost of vectorizing a bytecast as 0 - since a byte to integer conversion is nothing else but a no-op. However, for some cases (e.g. to pointer conversion) we may want to have something better. Note: The cost of a bytecast is encoded directly in SLPVectorize. The main reason for this is that byttecast is not a member of ISD, and hence cannot be used in TLI. Moreover, this commit adds a pattern match for bytecasts. It is needed for load combining, where we look for ``` zext(bytecast(load)) ``` sequence.
- Loading branch information
1 parent
c1a3c4d
commit 165cc34
Showing
4 changed files
with
301 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
170 changes: 170 additions & 0 deletions
170
llvm/test/Transforms/SLPVectorizer/AArch64/widen-bytecast.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
; RUN: opt -slp-vectorizer -S < %s | FileCheck %s | ||
|
||
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" | ||
target triple = "aarch64" | ||
|
||
; This should not be matched as a load combining candidate. | ||
; There are no 'or' operations, so it can't be a bswap or | ||
; other pattern that we are expecting the backend to handle. | ||
|
||
define void @PR50256_with_byte(b8* %a, i16* %c, i32 %n) { | ||
; CHECK-LABEL: @PR50256_with_byte( | ||
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds b8, b8* [[A:%.*]], i64 1 | ||
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds b8, b8* [[A]], i64 2 | ||
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds b8, b8* [[A]], i64 3 | ||
; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds b8, b8* [[A]], i64 4 | ||
; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds b8, b8* [[A]], i64 5 | ||
; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds b8, b8* [[A]], i64 7 | ||
; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds b8, b8* [[A]], i64 6 | ||
; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds b8, b8* [[A]], i64 8 | ||
; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds b8, b8* [[A]], i64 9 | ||
; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds b8, b8* [[A]], i64 10 | ||
; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds b8, b8* [[A]], i64 11 | ||
; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds b8, b8* [[A]], i64 12 | ||
; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds b8, b8* [[A]], i64 13 | ||
; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds b8, b8* [[A]], i64 14 | ||
; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds b8, b8* [[A]], i64 15 | ||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast b8* [[A]] to <8 x b8>* | ||
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x b8>, <8 x b8>* [[TMP1]], align 1 | ||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast b8* [[ARRAYIDX_8]] to <8 x b8>* | ||
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x b8>, <8 x b8>* [[TMP3]], align 1 | ||
; CHECK-NEXT: [[TMP5:%.*]] = bytecast <8 x b8> [[TMP2]] to <8 x i8> | ||
; CHECK-NEXT: [[TMP6:%.*]] = bytecast <8 x b8> [[TMP4]] to <8 x i8> | ||
; CHECK-NEXT: [[TMP7:%.*]] = zext <8 x i8> [[TMP5]] to <8 x i16> | ||
; CHECK-NEXT: [[TMP8:%.*]] = zext <8 x i8> [[TMP6]] to <8 x i16> | ||
; CHECK-NEXT: [[TMP9:%.*]] = shl nuw <8 x i16> [[TMP7]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> | ||
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw <8 x i16> [[TMP8]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> | ||
; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i16, i16* [[C:%.*]], i64 1 | ||
; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds i16, i16* [[C]], i64 2 | ||
; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds i16, i16* [[C]], i64 3 | ||
; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds i16, i16* [[C]], i64 4 | ||
; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds i16, i16* [[C]], i64 5 | ||
; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds i16, i16* [[C]], i64 6 | ||
; CHECK-NEXT: [[ARRAYIDX3_7:%.*]] = getelementptr inbounds i16, i16* [[C]], i64 7 | ||
; CHECK-NEXT: [[ARRAYIDX3_8:%.*]] = getelementptr inbounds i16, i16* [[C]], i64 8 | ||
; CHECK-NEXT: [[ARRAYIDX3_9:%.*]] = getelementptr inbounds i16, i16* [[C]], i64 9 | ||
; CHECK-NEXT: [[ARRAYIDX3_10:%.*]] = getelementptr inbounds i16, i16* [[C]], i64 10 | ||
; CHECK-NEXT: [[ARRAYIDX3_11:%.*]] = getelementptr inbounds i16, i16* [[C]], i64 11 | ||
; CHECK-NEXT: [[ARRAYIDX3_12:%.*]] = getelementptr inbounds i16, i16* [[C]], i64 12 | ||
; CHECK-NEXT: [[ARRAYIDX3_13:%.*]] = getelementptr inbounds i16, i16* [[C]], i64 13 | ||
; CHECK-NEXT: [[ARRAYIDX3_14:%.*]] = getelementptr inbounds i16, i16* [[C]], i64 14 | ||
; CHECK-NEXT: [[ARRAYIDX3_15:%.*]] = getelementptr inbounds i16, i16* [[C]], i64 15 | ||
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16* [[C]] to <8 x i16>* | ||
; CHECK-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* [[TMP11]], align 2 | ||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[ARRAYIDX3_8]] to <8 x i16>* | ||
; CHECK-NEXT: store <8 x i16> [[TMP10]], <8 x i16>* [[TMP12]], align 2 | ||
; CHECK-NEXT: ret void | ||
; | ||
%arrayidx.1 = getelementptr inbounds b8, b8* %a, i64 1 | ||
%arrayidx.2 = getelementptr inbounds b8, b8* %a, i64 2 | ||
%arrayidx.3 = getelementptr inbounds b8, b8* %a, i64 3 | ||
%arrayidx.4 = getelementptr inbounds b8, b8* %a, i64 4 | ||
%arrayidx.5 = getelementptr inbounds b8, b8* %a, i64 5 | ||
%arrayidx.7 = getelementptr inbounds b8, b8* %a, i64 7 | ||
%arrayidx.6 = getelementptr inbounds b8, b8* %a, i64 6 | ||
%arrayidx.8 = getelementptr inbounds b8, b8* %a, i64 8 | ||
%arrayidx.9 = getelementptr inbounds b8, b8* %a, i64 9 | ||
%arrayidx.10 = getelementptr inbounds b8, b8* %a, i64 10 | ||
%arrayidx.11 = getelementptr inbounds b8, b8* %a, i64 11 | ||
%arrayidx.12 = getelementptr inbounds b8, b8* %a, i64 12 | ||
%arrayidx.13 = getelementptr inbounds b8, b8* %a, i64 13 | ||
%arrayidx.14 = getelementptr inbounds b8, b8* %a, i64 14 | ||
%arrayidx.15 = getelementptr inbounds b8, b8* %a, i64 15 | ||
%b = load b8, b8* %a, align 1 | ||
%b1 = load b8, b8* %arrayidx.1, align 1 | ||
%b2 = load b8, b8* %arrayidx.2, align 1 | ||
%b3 = load b8, b8* %arrayidx.3, align 1 | ||
%b4 = load b8, b8* %arrayidx.4, align 1 | ||
%b5 = load b8, b8* %arrayidx.5, align 1 | ||
%b6 = load b8, b8* %arrayidx.6, align 1 | ||
%b7 = load b8, b8* %arrayidx.7, align 1 | ||
%b8 = load b8, b8* %arrayidx.8, align 1 | ||
%b9 = load b8, b8* %arrayidx.9, align 1 | ||
%b10 = load b8, b8* %arrayidx.10, align 1 | ||
%b11 = load b8, b8* %arrayidx.11, align 1 | ||
%b12 = load b8, b8* %arrayidx.12, align 1 | ||
%b13 = load b8, b8* %arrayidx.13, align 1 | ||
%b14 = load b8, b8* %arrayidx.14, align 1 | ||
%b15 = load b8, b8* %arrayidx.15, align 1 | ||
%i = bytecast b8 %b to i8 | ||
%i1 = bytecast b8 %b1 to i8 | ||
%i2 = bytecast b8 %b2 to i8 | ||
%i3 = bytecast b8 %b3 to i8 | ||
%i4 = bytecast b8 %b4 to i8 | ||
%i5 = bytecast b8 %b5 to i8 | ||
%i6 = bytecast b8 %b6 to i8 | ||
%i7 = bytecast b8 %b7 to i8 | ||
%i8 = bytecast b8 %b8 to i8 | ||
%i9 = bytecast b8 %b9 to i8 | ||
%i10 = bytecast b8 %b10 to i8 | ||
%i11 = bytecast b8 %b11 to i8 | ||
%i12 = bytecast b8 %b12 to i8 | ||
%i13 = bytecast b8 %b13 to i8 | ||
%i14 = bytecast b8 %b14 to i8 | ||
%i15 = bytecast b8 %b15 to i8 | ||
%conv5 = zext i8 %i to i16 | ||
%conv5.1 = zext i8 %i1 to i16 | ||
%conv5.2 = zext i8 %i2 to i16 | ||
%conv5.3 = zext i8 %i3 to i16 | ||
%conv5.4 = zext i8 %i4 to i16 | ||
%conv5.5 = zext i8 %i5 to i16 | ||
%conv5.6 = zext i8 %i6 to i16 | ||
%conv5.7 = zext i8 %i7 to i16 | ||
%conv5.8 = zext i8 %i8 to i16 | ||
%conv5.9 = zext i8 %i9 to i16 | ||
%conv5.10 = zext i8 %i10 to i16 | ||
%conv5.11 = zext i8 %i11 to i16 | ||
%conv5.12 = zext i8 %i12 to i16 | ||
%conv5.13 = zext i8 %i13 to i16 | ||
%conv5.14 = zext i8 %i14 to i16 | ||
%conv5.15 = zext i8 %i15 to i16 | ||
%shl = shl nuw i16 %conv5, 8 | ||
%shl.1 = shl nuw i16 %conv5.1, 8 | ||
%shl.2 = shl nuw i16 %conv5.2, 8 | ||
%shl.3 = shl nuw i16 %conv5.3, 8 | ||
%shl.4 = shl nuw i16 %conv5.4, 8 | ||
%shl.5 = shl nuw i16 %conv5.5, 8 | ||
%shl.6 = shl nuw i16 %conv5.6, 8 | ||
%shl.7 = shl nuw i16 %conv5.7, 8 | ||
%shl.8 = shl nuw i16 %conv5.8, 8 | ||
%shl.9 = shl nuw i16 %conv5.9, 8 | ||
%shl.10 = shl nuw i16 %conv5.10, 8 | ||
%shl.11 = shl nuw i16 %conv5.11, 8 | ||
%shl.12 = shl nuw i16 %conv5.12, 8 | ||
%shl.13 = shl nuw i16 %conv5.13, 8 | ||
%shl.14 = shl nuw i16 %conv5.14, 8 | ||
%shl.15 = shl nuw i16 %conv5.15, 8 | ||
%arrayidx3.1 = getelementptr inbounds i16, i16* %c, i64 1 | ||
%arrayidx3.2 = getelementptr inbounds i16, i16* %c, i64 2 | ||
%arrayidx3.3 = getelementptr inbounds i16, i16* %c, i64 3 | ||
%arrayidx3.4 = getelementptr inbounds i16, i16* %c, i64 4 | ||
%arrayidx3.5 = getelementptr inbounds i16, i16* %c, i64 5 | ||
%arrayidx3.6 = getelementptr inbounds i16, i16* %c, i64 6 | ||
%arrayidx3.7 = getelementptr inbounds i16, i16* %c, i64 7 | ||
%arrayidx3.8 = getelementptr inbounds i16, i16* %c, i64 8 | ||
%arrayidx3.9 = getelementptr inbounds i16, i16* %c, i64 9 | ||
%arrayidx3.10 = getelementptr inbounds i16, i16* %c, i64 10 | ||
%arrayidx3.11 = getelementptr inbounds i16, i16* %c, i64 11 | ||
%arrayidx3.12 = getelementptr inbounds i16, i16* %c, i64 12 | ||
%arrayidx3.13 = getelementptr inbounds i16, i16* %c, i64 13 | ||
%arrayidx3.14 = getelementptr inbounds i16, i16* %c, i64 14 | ||
%arrayidx3.15 = getelementptr inbounds i16, i16* %c, i64 15 | ||
store i16 %shl, i16* %c, align 2 | ||
store i16 %shl.1, i16* %arrayidx3.1, align 2 | ||
store i16 %shl.2, i16* %arrayidx3.2, align 2 | ||
store i16 %shl.3, i16* %arrayidx3.3, align 2 | ||
store i16 %shl.4, i16* %arrayidx3.4, align 2 | ||
store i16 %shl.5, i16* %arrayidx3.5, align 2 | ||
store i16 %shl.6, i16* %arrayidx3.6, align 2 | ||
store i16 %shl.7, i16* %arrayidx3.7, align 2 | ||
store i16 %shl.8, i16* %arrayidx3.8, align 2 | ||
store i16 %shl.9, i16* %arrayidx3.9, align 2 | ||
store i16 %shl.10, i16* %arrayidx3.10, align 2 | ||
store i16 %shl.11, i16* %arrayidx3.11, align 2 | ||
store i16 %shl.12, i16* %arrayidx3.12, align 2 | ||
store i16 %shl.13, i16* %arrayidx3.13, align 2 | ||
store i16 %shl.14, i16* %arrayidx3.14, align 2 | ||
store i16 %shl.15, i16* %arrayidx3.15, align 2 | ||
ret void | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=SSE2 | ||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=SLM | ||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX | ||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX | ||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX | ||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX | ||
|
||
define <2 x i64> @load_bytecast_ext(b8* %p0) { | ||
; SSE2-LABEL: @load_bytecast_ext( | ||
; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds b8, b8* [[P0:%.*]], i64 1 | ||
; SSE2-NEXT: [[TMP1:%.*]] = bitcast b8* [[P0]] to <2 x b8>* | ||
; SSE2-NEXT: [[TMP2:%.*]] = load <2 x b8>, <2 x b8>* [[TMP1]], align 1 | ||
; SSE2-NEXT: [[TMP3:%.*]] = bytecast <2 x b8> [[TMP2]] to <2 x i8> | ||
; SSE2-NEXT: [[TMP4:%.*]] = zext <2 x i8> [[TMP3]] to <2 x i64> | ||
; SSE2-NEXT: ret <2 x i64> [[TMP4]] | ||
; | ||
; SLM-LABEL: @load_bytecast_ext( | ||
; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds b8, b8* [[P0:%.*]], i64 1 | ||
; SLM-NEXT: [[TMP1:%.*]] = bitcast b8* [[P0]] to <2 x b8>* | ||
; SLM-NEXT: [[TMP2:%.*]] = load <2 x b8>, <2 x b8>* [[TMP1]], align 1 | ||
; SLM-NEXT: [[TMP3:%.*]] = bytecast <2 x b8> [[TMP2]] to <2 x i8> | ||
; SLM-NEXT: [[TMP4:%.*]] = zext <2 x i8> [[TMP3]] to <2 x i64> | ||
; SLM-NEXT: ret <2 x i64> [[TMP4]] | ||
; | ||
; AVX-LABEL: @load_bytecast_ext( | ||
; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds b8, b8* [[P0:%.*]], i64 1 | ||
; AVX-NEXT: [[TMP1:%.*]] = bitcast b8* [[P0]] to <2 x b8>* | ||
; AVX-NEXT: [[TMP2:%.*]] = load <2 x b8>, <2 x b8>* [[TMP1]], align 1 | ||
; AVX-NEXT: [[TMP3:%.*]] = bytecast <2 x b8> [[TMP2]] to <2 x i8> | ||
; AVX-NEXT: [[TMP4:%.*]] = zext <2 x i8> [[TMP3]] to <2 x i64> | ||
; AVX-NEXT: ret <2 x i64> [[TMP4]] | ||
; | ||
%p1 = getelementptr inbounds b8, b8* %p0, i64 1 | ||
%b0 = load b8, b8* %p0, align 1 | ||
%b1 = load b8, b8* %p1, align 1 | ||
%i0 = bytecast b8 %b0 to i8 | ||
%i1 = bytecast b8 %b1 to i8 | ||
%x0 = zext i8 %i0 to i64 | ||
%x1 = zext i8 %i1 to i64 | ||
%v0 = insertelement <2 x i64> poison, i64 %x0, i32 0 | ||
%v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1 | ||
ret <2 x i64> %v1 | ||
} | ||
|
||
define <8 x i8> @load_bytecast(b8* %p0) { | ||
; SSE2-LABEL: @load_bytecast( | ||
; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds b8, b8* [[P0:%.*]], i64 1 | ||
; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds b8, b8* [[P0]], i64 2 | ||
; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds b8, b8* [[P0]], i64 3 | ||
; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds b8, b8* [[P0]], i64 4 | ||
; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds b8, b8* [[P0]], i64 5 | ||
; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds b8, b8* [[P0]], i64 6 | ||
; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds b8, b8* [[P0]], i64 7 | ||
; SSE2-NEXT: [[TMP1:%.*]] = bitcast b8* [[P0]] to <8 x b8>* | ||
; SSE2-NEXT: [[TMP2:%.*]] = load <8 x b8>, <8 x b8>* [[TMP1]], align 1 | ||
; SSE2-NEXT: [[TMP3:%.*]] = bytecast <8 x b8> [[TMP2]] to <8 x i8> | ||
; SSE2-NEXT: ret <8 x i8> [[TMP3]] | ||
; | ||
; SLM-LABEL: @load_bytecast( | ||
; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds b8, b8* [[P0:%.*]], i64 1 | ||
; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds b8, b8* [[P0]], i64 2 | ||
; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds b8, b8* [[P0]], i64 3 | ||
; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds b8, b8* [[P0]], i64 4 | ||
; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds b8, b8* [[P0]], i64 5 | ||
; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds b8, b8* [[P0]], i64 6 | ||
; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds b8, b8* [[P0]], i64 7 | ||
; SLM-NEXT: [[TMP1:%.*]] = bitcast b8* [[P0]] to <8 x b8>* | ||
; SLM-NEXT: [[TMP2:%.*]] = load <8 x b8>, <8 x b8>* [[TMP1]], align 1 | ||
; SLM-NEXT: [[TMP3:%.*]] = bytecast <8 x b8> [[TMP2]] to <8 x i8> | ||
; SLM-NEXT: ret <8 x i8> [[TMP3]] | ||
; | ||
; AVX-LABEL: @load_bytecast( | ||
; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds b8, b8* [[P0:%.*]], i64 1 | ||
; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds b8, b8* [[P0]], i64 2 | ||
; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds b8, b8* [[P0]], i64 3 | ||
; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds b8, b8* [[P0]], i64 4 | ||
; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds b8, b8* [[P0]], i64 5 | ||
; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds b8, b8* [[P0]], i64 6 | ||
; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds b8, b8* [[P0]], i64 7 | ||
; AVX-NEXT: [[TMP1:%.*]] = bitcast b8* [[P0]] to <8 x b8>* | ||
; AVX-NEXT: [[TMP2:%.*]] = load <8 x b8>, <8 x b8>* [[TMP1]], align 1 | ||
; AVX-NEXT: [[TMP3:%.*]] = bytecast <8 x b8> [[TMP2]] to <8 x i8> | ||
; AVX-NEXT: ret <8 x i8> [[TMP3]] | ||
; | ||
%p1 = getelementptr inbounds b8, b8* %p0, i64 1 | ||
%p2 = getelementptr inbounds b8, b8* %p0, i64 2 | ||
%p3 = getelementptr inbounds b8, b8* %p0, i64 3 | ||
%p4 = getelementptr inbounds b8, b8* %p0, i64 4 | ||
%p5 = getelementptr inbounds b8, b8* %p0, i64 5 | ||
%p6 = getelementptr inbounds b8, b8* %p0, i64 6 | ||
%p7 = getelementptr inbounds b8, b8* %p0, i64 7 | ||
%b0 = load b8, b8* %p0, align 1 | ||
%b1 = load b8, b8* %p1, align 1 | ||
%b2 = load b8, b8* %p2, align 1 | ||
%b3 = load b8, b8* %p3, align 1 | ||
%b4 = load b8, b8* %p4, align 1 | ||
%b5 = load b8, b8* %p5, align 1 | ||
%b6 = load b8, b8* %p6, align 1 | ||
%b7 = load b8, b8* %p7, align 1 | ||
%x0 = bytecast b8 %b0 to i8 | ||
%x1 = bytecast b8 %b1 to i8 | ||
%x2 = bytecast b8 %b2 to i8 | ||
%x3 = bytecast b8 %b3 to i8 | ||
%x4 = bytecast b8 %b4 to i8 | ||
%x5 = bytecast b8 %b5 to i8 | ||
%x6 = bytecast b8 %b6 to i8 | ||
%x7 = bytecast b8 %b7 to i8 | ||
%v0 = insertelement <8 x i8> poison, i8 %x0, i32 0 | ||
%v1 = insertelement <8 x i8> %v0, i8 %x1, i32 1 | ||
%v2 = insertelement <8 x i8> %v1, i8 %x2, i32 2 | ||
%v3 = insertelement <8 x i8> %v2, i8 %x3, i32 3 | ||
%v4 = insertelement <8 x i8> %v3, i8 %x4, i32 4 | ||
%v5 = insertelement <8 x i8> %v4, i8 %x5, i32 5 | ||
%v6 = insertelement <8 x i8> %v5, i8 %x6, i32 6 | ||
%v7 = insertelement <8 x i8> %v6, i8 %x7, i32 7 | ||
ret <8 x i8> %v7 | ||
} |