Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1386,7 +1386,8 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
if (!isa<VPWidenRecipe, VPWidenSelectRecipe, VPReplicateRecipe>(&R))
if (!isa<VPWidenRecipe, VPWidenCastRecipe, VPWidenSelectRecipe,
VPReplicateRecipe>(&R))
continue;
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
Expand Down Expand Up @@ -1422,6 +1423,15 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
}))
continue;

if (auto *CastR = dyn_cast<VPWidenCastRecipe>(RepOrWidenR)) {
VPBuilder Builder(CastR);
auto *Clone = Builder.createScalarCast(
CastR->getOpcode(), CastR->getOperand(0), CastR->getResultType(),
CastR->getDebugLoc());
CastR->replaceAllUsesWith(Clone);
CastR->eraseFromParent();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need this: remove deadRecipes should automatically take care of this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

simplifyRecipes runs after this before removeDeadRecipes, which is sensitive to the number of users. So we should remove the dead recipes if we can

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can then guard it with isDeadRecipe as it is done for the replicate below?

continue;
}
Comment on lines +1426 to +1434
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think vputils::getSingleScalarClone would be cleaner, but I'm happy to land this first. #161667.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you dropped the flags and metadata.

auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(),
RepOrWidenR->operands(),
true /*IsSingleScalar*/);
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,10 @@ bool vputils::isSingleScalar(const VPValue *VPV) {
return PreservesUniformity(WidenR->getOpcode()) &&
all_of(WidenR->operands(), isSingleScalar);
}
if (auto *CastR = dyn_cast<VPWidenCastRecipe>(VPV)) {
return PreservesUniformity(CastR->getOpcode()) &&
all_of(CastR->operands(), isSingleScalar);
}
if (auto *VPI = dyn_cast<VPInstruction>(VPV))
return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
(PreservesUniformity(VPI->getOpcode()) &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -492,18 +492,18 @@ define i64 @partial_reduction_mul_two_users(i64 %n, ptr %a, i16 %b, i32 %c) {
; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i32> [[TMP2]] to <8 x i64>
; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i64> @llvm.vector.partial.reduce.add.v4i64.v8i64(<4 x i64> [[VEC_PHI]], <8 x i64> [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT2]] to <8 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i32> [[TMP5]] to <8 x i64>
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP5]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP6]] to i64
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[PARTIAL_REDUCE]])
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP6]], i32 7
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
Expand All @@ -520,7 +520,7 @@ define i64 @partial_reduction_mul_two_users(i64 %n, ptr %a, i16 %b, i32 %c) {
; CHECK-NEXT: [[LOAD_EXT:%.*]] = sext i16 [[LOAD]] to i32
; CHECK-NEXT: [[LOAD_EXT_EXT]] = sext i32 [[LOAD_EXT]] to i64
; CHECK-NEXT: [[EXITCOND740_NOT:%.*]] = icmp eq i64 [[IV]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND740_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK-NEXT: br i1 [[EXITCOND740_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], %[[LOOP]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[ADD_LCSSA]]
Expand Down
161 changes: 124 additions & 37 deletions llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -66,63 +66,150 @@ define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8
; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT14]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX15]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 [[TMP2]])
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 [[TMP2]])
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE22:.*]] ]
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE22]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 4, i8 8, i8 12>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE22]] ]
; CHECK-NEXT: [[TMP28:%.*]] = load i8, ptr [[SRC]], align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[TMP28]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP25:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i64>
; CHECK-NEXT: [[TMP26:%.*]] = zext <4 x i8> [[VEC_IND]] to <4 x i64>
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0
; CHECK-NEXT: br i1 [[TMP27]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE46:.*]] ]
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE46]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i8> [ <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 32, i8 36, i8 40, i8 44, i8 48, i8 52, i8 56, i8 60>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE46]] ]
; CHECK-NEXT: [[TMP25:%.*]] = load i8, ptr [[SRC]], align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
; CHECK-NEXT: [[TMP29:%.*]] = zext i8 [[TMP25]] to i64
; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP29]], 1
; CHECK-NEXT: [[TMP27:%.*]] = zext <16 x i8> [[VEC_IND]] to <16 x i64>
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 0
; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: [[TMP102:%.*]] = extractelement <4 x i64> [[TMP26]], i32 0
; CHECK-NEXT: [[TMP102:%.*]] = extractelement <16 x i64> [[TMP27]], i32 0
; CHECK-NEXT: [[TMP103:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP102]], i64 [[OFF]]
; CHECK-NEXT: [[TMP104:%.*]] = extractelement <4 x i64> [[TMP25]], i32 0
; CHECK-NEXT: [[TMP105:%.*]] = or i64 [[TMP104]], 1
; CHECK-NEXT: store i64 [[TMP105]], ptr [[TMP103]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP103]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 1
; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
; CHECK: [[PRED_STORE_IF17]]:
; CHECK-NEXT: [[TMP108:%.*]] = extractelement <4 x i64> [[TMP26]], i32 1
; CHECK-NEXT: [[TMP108:%.*]] = extractelement <16 x i64> [[TMP27]], i32 1
; CHECK-NEXT: [[TMP109:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP108]], i64 [[OFF]]
; CHECK-NEXT: [[TMP110:%.*]] = extractelement <4 x i64> [[TMP25]], i32 1
; CHECK-NEXT: [[TMP111:%.*]] = or i64 [[TMP110]], 1
; CHECK-NEXT: store i64 [[TMP111]], ptr [[TMP109]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP109]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE18]]
; CHECK: [[PRED_STORE_CONTINUE18]]:
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2
; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 2
; CHECK-NEXT: br i1 [[TMP34]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
; CHECK: [[PRED_STORE_IF19]]:
; CHECK-NEXT: [[TMP114:%.*]] = extractelement <4 x i64> [[TMP26]], i32 2
; CHECK-NEXT: [[TMP114:%.*]] = extractelement <16 x i64> [[TMP27]], i32 2
; CHECK-NEXT: [[TMP115:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP114]], i64 [[OFF]]
; CHECK-NEXT: [[TMP116:%.*]] = extractelement <4 x i64> [[TMP25]], i32 2
; CHECK-NEXT: [[TMP117:%.*]] = or i64 [[TMP116]], 1
; CHECK-NEXT: store i64 [[TMP117]], ptr [[TMP115]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP115]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]]
; CHECK: [[PRED_STORE_CONTINUE20]]:
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3
; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22]]
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 3
; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
; CHECK: [[PRED_STORE_IF21]]:
; CHECK-NEXT: [[TMP120:%.*]] = extractelement <4 x i64> [[TMP26]], i32 3
; CHECK-NEXT: [[TMP120:%.*]] = extractelement <16 x i64> [[TMP27]], i32 3
; CHECK-NEXT: [[TMP121:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP120]], i64 [[OFF]]
; CHECK-NEXT: [[TMP122:%.*]] = extractelement <4 x i64> [[TMP25]], i32 3
; CHECK-NEXT: [[TMP123:%.*]] = or i64 [[TMP122]], 1
; CHECK-NEXT: store i64 [[TMP123]], ptr [[TMP121]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP121]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]]
; CHECK: [[PRED_STORE_CONTINUE22]]:
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 4
; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
; CHECK: [[PRED_STORE_IF23]]:
; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i64> [[TMP27]], i32 4
; CHECK-NEXT: [[TMP42:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP41]], i64 [[OFF]]
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP42]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]]
; CHECK: [[PRED_STORE_CONTINUE24]]:
; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 5
; CHECK-NEXT: br i1 [[TMP43]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
; CHECK: [[PRED_STORE_IF25]]:
; CHECK-NEXT: [[TMP44:%.*]] = extractelement <16 x i64> [[TMP27]], i32 5
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP44]], i64 [[OFF]]
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP45]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]]
; CHECK: [[PRED_STORE_CONTINUE26]]:
; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 6
; CHECK-NEXT: br i1 [[TMP46]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
; CHECK: [[PRED_STORE_IF27]]:
; CHECK-NEXT: [[TMP76:%.*]] = extractelement <16 x i64> [[TMP27]], i32 6
; CHECK-NEXT: [[TMP77:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP76]], i64 [[OFF]]
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP77]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]]
; CHECK: [[PRED_STORE_CONTINUE28]]:
; CHECK-NEXT: [[TMP49:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 7
; CHECK-NEXT: br i1 [[TMP49]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
; CHECK: [[PRED_STORE_IF29]]:
; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i64> [[TMP27]], i32 7
; CHECK-NEXT: [[TMP51:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP50]], i64 [[OFF]]
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP51]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE30]]
; CHECK: [[PRED_STORE_CONTINUE30]]:
; CHECK-NEXT: [[TMP52:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 8
; CHECK-NEXT: br i1 [[TMP52]], label %[[PRED_STORE_IF31:.*]], label %[[PRED_STORE_CONTINUE32:.*]]
; CHECK: [[PRED_STORE_IF31]]:
; CHECK-NEXT: [[TMP53:%.*]] = extractelement <16 x i64> [[TMP27]], i32 8
; CHECK-NEXT: [[TMP54:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP53]], i64 [[OFF]]
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP54]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE32]]
; CHECK: [[PRED_STORE_CONTINUE32]]:
; CHECK-NEXT: [[TMP55:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 9
; CHECK-NEXT: br i1 [[TMP55]], label %[[PRED_STORE_IF33:.*]], label %[[PRED_STORE_CONTINUE34:.*]]
; CHECK: [[PRED_STORE_IF33]]:
; CHECK-NEXT: [[TMP56:%.*]] = extractelement <16 x i64> [[TMP27]], i32 9
; CHECK-NEXT: [[TMP57:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP56]], i64 [[OFF]]
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP57]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE34]]
; CHECK: [[PRED_STORE_CONTINUE34]]:
; CHECK-NEXT: [[TMP58:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 10
; CHECK-NEXT: br i1 [[TMP58]], label %[[PRED_STORE_IF35:.*]], label %[[PRED_STORE_CONTINUE36:.*]]
; CHECK: [[PRED_STORE_IF35]]:
; CHECK-NEXT: [[TMP59:%.*]] = extractelement <16 x i64> [[TMP27]], i32 10
; CHECK-NEXT: [[TMP60:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP59]], i64 [[OFF]]
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP60]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE36]]
; CHECK: [[PRED_STORE_CONTINUE36]]:
; CHECK-NEXT: [[TMP61:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 11
; CHECK-NEXT: br i1 [[TMP61]], label %[[PRED_STORE_IF37:.*]], label %[[PRED_STORE_CONTINUE38:.*]]
; CHECK: [[PRED_STORE_IF37]]:
; CHECK-NEXT: [[TMP62:%.*]] = extractelement <16 x i64> [[TMP27]], i32 11
; CHECK-NEXT: [[TMP63:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP62]], i64 [[OFF]]
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP63]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE38]]
; CHECK: [[PRED_STORE_CONTINUE38]]:
; CHECK-NEXT: [[TMP64:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 12
; CHECK-NEXT: br i1 [[TMP64]], label %[[PRED_STORE_IF39:.*]], label %[[PRED_STORE_CONTINUE40:.*]]
; CHECK: [[PRED_STORE_IF39]]:
; CHECK-NEXT: [[TMP65:%.*]] = extractelement <16 x i64> [[TMP27]], i32 12
; CHECK-NEXT: [[TMP66:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP65]], i64 [[OFF]]
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP66]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE40]]
; CHECK: [[PRED_STORE_CONTINUE40]]:
; CHECK-NEXT: [[TMP67:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 13
; CHECK-NEXT: br i1 [[TMP67]], label %[[PRED_STORE_IF41:.*]], label %[[PRED_STORE_CONTINUE42:.*]]
; CHECK: [[PRED_STORE_IF41]]:
; CHECK-NEXT: [[TMP68:%.*]] = extractelement <16 x i64> [[TMP27]], i32 13
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP68]], i64 [[OFF]]
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP69]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE42]]
; CHECK: [[PRED_STORE_CONTINUE42]]:
; CHECK-NEXT: [[TMP70:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 14
; CHECK-NEXT: br i1 [[TMP70]], label %[[PRED_STORE_IF43:.*]], label %[[PRED_STORE_CONTINUE44:.*]]
; CHECK: [[PRED_STORE_IF43]]:
; CHECK-NEXT: [[TMP71:%.*]] = extractelement <16 x i64> [[TMP27]], i32 14
; CHECK-NEXT: [[TMP72:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP71]], i64 [[OFF]]
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP72]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE44]]
; CHECK: [[PRED_STORE_CONTINUE44]]:
; CHECK-NEXT: [[TMP73:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 15
; CHECK-NEXT: br i1 [[TMP73]], label %[[PRED_STORE_IF45:.*]], label %[[PRED_STORE_CONTINUE46]]
; CHECK: [[PRED_STORE_IF45]]:
; CHECK-NEXT: [[TMP74:%.*]] = extractelement <16 x i64> [[TMP27]], i32 15
; CHECK-NEXT: [[TMP75:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP74]], i64 [[OFF]]
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP75]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE46]]
; CHECK: [[PRED_STORE_CONTINUE46]]:
; CHECK-NEXT: store i8 0, ptr [[DST_2]], align 1, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX_NEXT]], i32 [[TMP2]])
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX_NEXT]], i32 [[TMP2]])
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
; CHECK-NEXT: [[TMP48:%.*]] = xor i1 [[TMP47]], true
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 16)
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 64)
; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
Expand Down
Loading
Loading