Skip to content


[Polly] Fix prevectorization of fused loops.
Browse files Browse the repository at this point in the history
The schedule of a fused loop has one isl_space per statement, such that
a conversion to a isl_map fails. However, the prevectorization is
interested in the schedule space only: Converting to the non-union
representation only after extracting the schedule range fixes the problem.

This fixes
  • Loading branch information
Meinersbur committed Jul 10, 2020
1 parent 32bf468 commit c0bc995
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 2 deletions.
4 changes: 2 additions & 2 deletions polly/lib/Transform/ScheduleOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -385,8 +385,8 @@ ScheduleTreeOptimizer::isolateFullPartialTiles(isl::schedule_node Node,
assert(isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band);
Node = Node.child(0).child(0);
isl::union_map SchedRelUMap = Node.get_prefix_schedule_relation();
isl::map ScheduleRelation = isl::map::from_union_map(SchedRelUMap);
isl::set ScheduleRange = ScheduleRelation.range();
isl::union_set ScheduleRangeUSet = SchedRelUMap.range();
isl::set ScheduleRange{ScheduleRangeUSet};
isl::set IsolateDomain = getPartialTilePrefixes(ScheduleRange, VectorWidth);
auto AtomicOption = getDimOptions(IsolateDomain.get_ctx(), "atomic");
isl::union_set IsolateOption = getIsolateOptions(IsolateDomain, 1);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
; RUN: opt %loadPolly -polly-opt-isl -polly-opt-fusion=max -polly-vectorizer=stripmine -polly-invariant-load-hoisting -polly-optimized-scops -analyze < %s | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"

%struct.stCfg_Incell_DetailThreshold. = type { [60 x i8]*, [60 x i32]*, [60 x i32]*, [60 x i32]*, [60 x i32]*, [60 x i32]*, [60 x i32]* }
@ft8006m_g_stCfg_Incell_DetailThreshold = external dso_local local_unnamed_addr global %struct.stCfg_Incell_DetailThreshold., align 8
declare dso_local i32 @ft8006m_atoi() local_unnamed_addr #0

define void @func() {
switch i32 undef, label %cleanup [
i32 10, label %if.end
i32 14, label %if.end
i32 16, label %if.end

if.end: ; preds = %entry, %entry, %entry
%call15 = call i32 @ft8006m_atoi() #1
%0 = zext i32 %call15 to i64
br label %for.cond

for.cond: ; preds = %for.inc39, %if.end
%indvars.iv302 = phi i64 [ %indvars.iv.next303, %for.inc39 ], [ 0, %if.end ]
%exitcond304 = icmp eq i64 %indvars.iv302, 60
br i1 %exitcond304, label %cleanup, label %for.cond21

for.cond21: ; preds = %for.body23, %for.cond
%indvars.iv296 = phi i64 [ %indvars.iv.next297, %for.body23 ], [ 0, %for.cond ]
%exitcond298 = icmp eq i64 %indvars.iv296, 60
br i1 %exitcond298, label %for.cond28, label %for.body23

for.body23: ; preds = %for.cond21
%1 = load [60 x i32]*, [60 x i32]** getelementptr inbounds (%struct.stCfg_Incell_DetailThreshold., %struct.stCfg_Incell_DetailThreshold.* @ft8006m_g_stCfg_Incell_DetailThreshold, i64 0, i32 2), align 8
%arrayidx25 = getelementptr [60 x i32], [60 x i32]* %1, i64 %indvars.iv302, i64 %indvars.iv296
store i32 undef, i32* %arrayidx25, align 4
%indvars.iv.next297 = add nuw nsw i64 %indvars.iv296, 1
br label %for.cond21

for.cond28: ; preds = %for.body30, %for.cond21
%indvars.iv299 = phi i64 [ %indvars.iv.next300, %for.body30 ], [ 0, %for.cond21 ]
%exitcond301 = icmp eq i64 %indvars.iv299, 60
br i1 %exitcond301, label %for.inc39, label %for.body30

for.body30: ; preds = %for.cond28
%2 = load [60 x i32]*, [60 x i32]** getelementptr inbounds (%struct.stCfg_Incell_DetailThreshold., %struct.stCfg_Incell_DetailThreshold.* @ft8006m_g_stCfg_Incell_DetailThreshold, i64 0, i32 2), align 8
%arrayidx34 = getelementptr [60 x i32], [60 x i32]* %2, i64 %0, i64 %indvars.iv299
store i32 undef, i32* %arrayidx34, align 4
%indvars.iv.next300 = add nuw nsw i64 %indvars.iv299, 1
br label %for.cond28

for.inc39: ; preds = %for.cond28
%indvars.iv.next303 = add nuw nsw i64 %indvars.iv302, 1
br label %for.cond

cleanup: ; preds = %for.cond, %entry
ret void

; CHECK-LABEL: Printing analysis 'Polly - Optimize schedule of SCoP' for region: 'for.cond => cleanup' in function 'func':
; CHECK: Calculated schedule:
; CHECK: domain: "[call15] -> { Stmt_for_body23[i0, i1] : 0 <= i0 <= 59 and 0 <= i1 <= 59; Stmt_for_body30[i0, i1] : 0 <= i0 <= 59 and 0 <= i1 <= 59 }"
; CHECK: child:
; CHECK: mark: "1st level tiling - Tiles"
; CHECK: child:
; CHECK: schedule: "[call15] -> [{ Stmt_for_body23[i0, i1] -> [(floor((i0 + i1)/32))]; Stmt_for_body30[i0, i1] -> [(floor((call15 + i1)/32))] }, { Stmt_for_body23[i0, i1] -> [(floor((i0)/32))]; Stmt_for_body30[i0, i1] -> [(floor((i0)/32))] }]"
; CHECK: permutable: 1
; CHECK: coincident: [ 1, 0 ]
; CHECK: child:
; CHECK: mark: "1st level tiling - Points"
; CHECK: child:
; CHECK: schedule: "[call15] -> [{ Stmt_for_body23[i0, i1] -> [(floor((i0 + i1)/4) - 8*floor((i0 + i1)/32))]; Stmt_for_body30[i0, i1] -> [(floor((call15 + i1)/4) - 8*floor((call15 + i1)/32))] }]"
; CHECK: permutable: 1
; CHECK: coincident: [ 1 ]
; CHECK: options: "[call15] -> { atomic[0]; isolate{{\[\[}}i0, i1] -> [i2]] : 0 <= i1 <= 1 and 0 <= i2 <= 7 and call15 - 32i0 <= 4i2 <= 56 + call15 - 32i0 and (call15 >= 120 or i2 < -8i0 + 8i1); isolate{{\[\[}}i0, 0] -> [i2]] : 0 <= i2 <= 7 and ((call15 >= 120 and -8i0 <= i2 <= 21 - 8i0) or (call15 >= 92 and i2 <= 28 - 8i0 and 4i2 >= call15 - 32i0) or (call15 <= 91 and -8i0 <= i2 <= 21 - 8i0) or (92 <= call15 <= 119 and -8i0 <= i2 <= 21 - 8i0) or (call15 <= 91 and 22 - 8i0 <= i2 <= 28 - 8i0 and 4i2 <= 56 + call15 - 32i0) or (call15 <= 119 and i2 >= 29 - 8i0 and call15 - 32i0 <= 4i2 <= 56 + call15 - 32i0)); isolate{{\[\[}}i0, 1] -> [i2]] : i2 >= 0 and 8 - 8i0 <= i2 <= 7 and ((call15 >= 120 and i2 <= 28 - 8i0) or (call15 <= 119 and i2 >= 29 - 8i0 and 4i2 <= 56 + call15 - 32i0) or (call15 <= 119 and i2 <= 28 - 8i0)) }"
; CHECK: child:
; CHECK: schedule: "[call15] -> [{ Stmt_for_body23[i0, i1] -> [((i0) mod 32)]; Stmt_for_body30[i0, i1] -> [((i0) mod 32)] }]"
; CHECK: permutable: 1
; CHECK: child:
; CHECK: mark: "SIMD"
; CHECK: child:
; CHECK: sequence:
; CHECK: - filter: "[call15] -> { Stmt_for_body23[i0, i1] }"
; CHECK: child:
; CHECK: schedule: "[call15] -> [{ Stmt_for_body23[i0, i1] -> [((i0 + i1) mod 4)]; Stmt_for_body30[i0, i1] -> [((call15 + i1) mod 4)] }]"
; CHECK: permutable: 1
; CHECK: coincident: [ 1 ]
; CHECK: - filter: "[call15] -> { Stmt_for_body30[i0, i1] }"
; CHECK: child:
; CHECK: schedule: "[call15] -> [{ Stmt_for_body23[i0, i1] -> [((i0 + i1) mod 4)]; Stmt_for_body30[i0, i1] -> [((call15 + i1) mod 4)] }]"
; CHECK: permutable: 1
; CHECK: coincident: [ 1 ]

0 comments on commit c0bc995

Please sign in to comment.