@@ -1387,9 +1387,11 @@ class LoopVectorizationCostModel {
13871387 // If we might exit from anywhere but the latch, must run the exiting
13881388 // iteration in scalar form.
13891389 if (TheLoop->getExitingBlock () != TheLoop->getLoopLatch ()) {
1390- LLVM_DEBUG (
1391- dbgs () << " LV: Loop requires scalar epilogue: multiple exits\n " );
1392- return true ;
1390+ if (!Legal->canVectorizeMultiCond ()) {
1391+ LLVM_DEBUG (
1392+ dbgs () << " LV: Loop requires scalar epilogue: multiple exits\n " );
1393+ return true ;
1394+ }
13931395 }
13941396 if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue ()) {
13951397 LLVM_DEBUG (dbgs () << " LV: Loop requires scalar epilogue: "
@@ -2571,8 +2573,17 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
25712573 LoopVectorPreHeader = OrigLoop->getLoopPreheader ();
25722574 assert (LoopVectorPreHeader && " Invalid loop structure" );
25732575 LoopExitBlock = OrigLoop->getUniqueExitBlock (); // may be nullptr
2574- assert ((LoopExitBlock || Cost->requiresScalarEpilogue (VF.isVector ())) &&
2575- " multiple exit loop without required epilogue?" );
2576+ if (Legal->canVectorizeMultiCond ()) {
2577+ BasicBlock *Latch = OrigLoop->getLoopLatch ();
2578+ BasicBlock *TrueSucc =
2579+ cast<BranchInst>(Latch->getTerminator ())->getSuccessor (0 );
2580+ BasicBlock *FalseSucc =
2581+ cast<BranchInst>(Latch->getTerminator ())->getSuccessor (1 );
2582+ LoopExitBlock = OrigLoop->contains (TrueSucc) ? FalseSucc : TrueSucc;
2583+ } else {
2584+ assert ((LoopExitBlock || Cost->requiresScalarEpilogue (VF.isVector ())) &&
2585+ " multiple exit loop without required epilogue?" );
2586+ }
25762587
25772588 LoopMiddleBlock =
25782589 SplitBlock (LoopVectorPreHeader, LoopVectorPreHeader->getTerminator (), DT,
@@ -2943,24 +2954,26 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
29432954 VPRegionBlock *VectorRegion = State.Plan ->getVectorLoopRegion ();
29442955 VPBasicBlock *LatchVPBB = VectorRegion->getExitingBasicBlock ();
29452956 Loop *VectorLoop = LI->getLoopFor (State.CFG .VPBB2IRBB [LatchVPBB]);
2946- if (Cost->requiresScalarEpilogue (VF.isVector ())) {
2947- // No edge from the middle block to the unique exit block has been inserted
2948- // and there is nothing to fix from vector loop; phis should have incoming
2949- // from scalar loop only.
2950- } else {
2951- // TODO: Check VPLiveOuts to see if IV users need fixing instead of checking
2952- // the cost model.
2953-
2954- // If we inserted an edge from the middle block to the unique exit block,
2955- // update uses outside the loop (phis) to account for the newly inserted
2956- // edge.
2957-
2958- // Fix-up external users of the induction variables.
2959- for (const auto &Entry : Legal->getInductionVars ())
2960- fixupIVUsers (Entry.first , Entry.second ,
2961- getOrCreateVectorTripCount (VectorLoop->getLoopPreheader ()),
2962- IVEndValues[Entry.first ], LoopMiddleBlock,
2963- VectorLoop->getHeader (), Plan, State);
2957+ if (OrigLoop->getUniqueExitBlock ()) {
2958+ if (Cost->requiresScalarEpilogue (VF.isVector ())) {
2959+ // No edge from the middle block to the unique exit block has been
2960+ // inserted and there is nothing to fix from vector loop; phis should have
2961+ // incoming from scalar loop only.
2962+ } else {
2963+ // TODO: Check VPLiveOuts to see if IV users need fixing instead of
2964+ // checking the cost model.
2965+
2966+ // If we inserted an edge from the middle block to the unique exit block,
2967+ // update uses outside the loop (phis) to account for the newly inserted
2968+ // edge.
2969+
2970+ // Fix-up external users of the induction variables.
2971+ for (const auto &Entry : Legal->getInductionVars ())
2972+ fixupIVUsers (Entry.first , Entry.second ,
2973+ getOrCreateVectorTripCount (VectorLoop->getLoopPreheader ()),
2974+ IVEndValues[Entry.first ], LoopMiddleBlock,
2975+ VectorLoop->getHeader (), Plan, State);
2976+ }
29642977 }
29652978
29662979 // Fix live-out phis not already fixed earlier.
@@ -3584,7 +3597,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
35843597 TheLoop->getExitingBlocks (Exiting);
35853598 for (BasicBlock *E : Exiting) {
35863599 auto *Cmp = dyn_cast<Instruction>(E->getTerminator ()->getOperand (0 ));
3587- if (Cmp && TheLoop->contains (Cmp) && Cmp->hasOneUse ())
3600+ if (Cmp && TheLoop->contains (Cmp) && Cmp->hasOneUse () &&
3601+ (TheLoop->getLoopLatch () == E || !Legal->canVectorizeMultiCond ()))
35883602 AddToWorklistIfAllowed (Cmp);
35893603 }
35903604
@@ -7515,7 +7529,8 @@ LoopVectorizationPlanner::executePlan(
75157529 LLVM_DEBUG (BestVPlan.dump ());
75167530
75177531 // Perform the actual loop transformation.
7518- VPTransformState State (BestVF, BestUF, LI, DT, ILV.Builder , &ILV, &BestVPlan);
7532+ VPTransformState State (BestVF, BestUF, LI, DT, ILV.Builder , &ILV, &BestVPlan,
7533+ OrigLoop);
75197534
75207535 // 0. Generate SCEV-dependent code into the preheader, including TripCount,
75217536 // before making any changes to the CFG.
@@ -7577,12 +7592,15 @@ LoopVectorizationPlanner::executePlan(
75777592
75787593 // 2.5 Collect reduction resume values.
75797594 DenseMap<const RecurrenceDescriptor *, Value *> ReductionResumeValues;
7580- auto *ExitVPBB =
7581- cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7582- for (VPRecipeBase &R : *ExitVPBB) {
7583- createAndCollectMergePhiForReduction (
7584- dyn_cast<VPInstruction>(&R), ReductionResumeValues, State, OrigLoop,
7585- State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7595+ VPBasicBlock *ExitVPBB = nullptr ;
7596+ if (BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ()) {
7597+ ExitVPBB = cast<VPBasicBlock>(
7598+ BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7599+ for (VPRecipeBase &R : *ExitVPBB) {
7600+ createAndCollectMergePhiForReduction (
7601+ dyn_cast<VPInstruction>(&R), ReductionResumeValues, State, OrigLoop,
7602+ State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7603+ }
75867604 }
75877605
75887606 // 2.6. Maintain Loop Hints
@@ -7608,6 +7626,7 @@ LoopVectorizationPlanner::executePlan(
76087626 LoopVectorizeHints Hints (L, true , *ORE);
76097627 Hints.setAlreadyVectorized ();
76107628 }
7629+
76117630 TargetTransformInfo::UnrollingPreferences UP;
76127631 TTI.getUnrollingPreferences (L, *PSE.getSE (), UP, ORE);
76137632 if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
@@ -7620,15 +7639,17 @@ LoopVectorizationPlanner::executePlan(
76207639 ILV.printDebugTracesAtEnd ();
76217640
76227641 // 4. Adjust branch weight of the branch in the middle block.
7623- auto *MiddleTerm =
7624- cast<BranchInst>(State.CFG .VPBB2IRBB [ExitVPBB]->getTerminator ());
7625- if (MiddleTerm->isConditional () &&
7626- hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ())) {
7627- // Assume that `Count % VectorTripCount` is equally distributed.
7628- unsigned TripCount = State.UF * State.VF .getKnownMinValue ();
7629- assert (TripCount > 0 && " trip count should not be zero" );
7630- const uint32_t Weights[] = {1 , TripCount - 1 };
7631- setBranchWeights (*MiddleTerm, Weights, /* IsExpected=*/ false );
7642+ if (ExitVPBB) {
7643+ auto *MiddleTerm =
7644+ cast<BranchInst>(State.CFG .VPBB2IRBB [ExitVPBB]->getTerminator ());
7645+ if (MiddleTerm->isConditional () &&
7646+ hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ())) {
7647+ // Assume that `Count % VectorTripCount` is equally distributed.
7648+ unsigned TripCount = State.UF * State.VF .getKnownMinValue ();
7649+ assert (TripCount > 0 && " trip count should not be zero" );
7650+ const uint32_t Weights[] = {1 , TripCount - 1 };
7651+ setBranchWeights (*MiddleTerm, Weights, /* IsExpected=*/ false );
7652+ }
76327653 }
76337654
76347655 return {State.ExpandedSCEVs , ReductionResumeValues};
@@ -8013,7 +8034,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
80138034 // If source is an exiting block, we know the exit edge is dynamically dead
80148035 // in the vector loop, and thus we don't need to restrict the mask. Avoid
80158036 // adding uses of an otherwise potentially dead instruction.
8016- if (OrigLoop->isLoopExiting (Src))
8037+ if (!Legal-> canVectorizeMultiCond () && OrigLoop->isLoopExiting (Src))
80178038 return EdgeMaskCache[Edge] = SrcMask;
80188039
80198040 VPValue *EdgeMask = getVPValueOrAddLiveIn (BI->getCondition ());
@@ -8630,6 +8651,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
86308651static SetVector<VPIRInstruction *> collectUsersInExitBlock (
86318652 Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
86328653 const MapVector<PHINode *, InductionDescriptor> &Inductions) {
8654+ if (!Plan.getVectorLoopRegion ()->getSingleSuccessor ())
8655+ return {};
86338656 auto *MiddleVPBB =
86348657 cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSingleSuccessor ());
86358658 // No edge from the middle block to the unique exit block has been inserted
@@ -8717,6 +8740,8 @@ static void addLiveOutsForFirstOrderRecurrences(
87178740 // TODO: Should be replaced by
87188741 // Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
87198742 // scalar region is modeled as well.
8743+ if (!VectorRegion->getSingleSuccessor ())
8744+ return ;
87208745 auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor ());
87218746 VPBasicBlock *ScalarPHVPBB = nullptr ;
87228747 if (MiddleVPBB->getNumSuccessors () == 2 ) {
@@ -8991,6 +9016,67 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
89919016 " VPBasicBlock" );
89929017 RecipeBuilder.fixHeaderPhis ();
89939018
9019+ SmallVector<BasicBlock *> Exiting;
9020+ OrigLoop->getExitingBlocks (Exiting);
9021+
9022+ if (Legal->canVectorizeMultiCond ()) {
9023+ auto *LatchVPBB =
9024+ cast<VPBasicBlock>(Plan->getVectorLoopRegion ()->getExiting ());
9025+ VPBuilder::InsertPointGuard Guard (Builder);
9026+ Builder.setInsertPoint (LatchVPBB->getTerminator ());
9027+ auto *MiddleVPBB =
9028+ cast<VPBasicBlock>(Plan->getVectorLoopRegion ()->getSingleSuccessor ());
9029+
9030+ VPValue *EarlyExitTaken = nullptr ;
9031+ SmallVector<VPValue *> ExitTaken;
9032+ SmallVector<PHINode *> ExitPhis;
9033+ SmallVector<Value *> ExitValues;
9034+ BasicBlock *ExitBlock;
9035+ for (BasicBlock *E : Exiting) {
9036+ if (E == OrigLoop->getLoopLatch ()) {
9037+ BasicBlock *TrueSucc =
9038+ cast<BranchInst>(E->getTerminator ())->getSuccessor (0 );
9039+ BasicBlock *FalseSucc =
9040+ cast<BranchInst>(E->getTerminator ())->getSuccessor (1 );
9041+ auto EB = !OrigLoop->contains (TrueSucc) ? TrueSucc : FalseSucc;
9042+
9043+ auto *VPExitBlock = new VPIRBasicBlock (EB);
9044+ VPBasicBlock *ScalarPH = new VPBasicBlock (" scalar.ph" );
9045+ VPBlockUtils::connectBlocks (MiddleVPBB, VPExitBlock);
9046+ VPBlockUtils::connectBlocks (MiddleVPBB, ScalarPH);
9047+ continue ;
9048+ }
9049+ BasicBlock *TrueSucc =
9050+ cast<BranchInst>(E->getTerminator ())->getSuccessor (0 );
9051+ BasicBlock *FalseSucc =
9052+ cast<BranchInst>(E->getTerminator ())->getSuccessor (1 );
9053+ VPValue *M = RecipeBuilder.getBlockInMask (
9054+ OrigLoop->contains (TrueSucc) ? TrueSucc : FalseSucc);
9055+
9056+ auto *N = Builder.createNot (M);
9057+ auto *EC = Builder.createNaryOp (VPInstruction::AnyOf, {N});
9058+ ExitTaken.push_back (EC);
9059+ if (EarlyExitTaken)
9060+ EarlyExitTaken = Builder.createOr (EarlyExitTaken, EC);
9061+ else
9062+ EarlyExitTaken = EC;
9063+ ExitBlock = !OrigLoop->contains (TrueSucc) ? TrueSucc : FalseSucc;
9064+ }
9065+
9066+ auto *Term = dyn_cast<VPInstruction>(LatchVPBB->getTerminator ());
9067+ auto *IsLatchExiting = Builder.createICmp (
9068+ CmpInst::ICMP_EQ, Term->getOperand (0 ), Term->getOperand (1 ));
9069+ Builder.createNaryOp (VPInstruction::BranchMultipleConds,
9070+ {EarlyExitTaken, IsLatchExiting});
9071+ Term->eraseFromParent ();
9072+
9073+ auto *EA = new VPIRBasicBlock (ExitBlock);
9074+ VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion ();
9075+ VPBlockUtils::disconnectBlocks (LoopRegion, MiddleVPBB);
9076+ VPBlockUtils::connectBlocks (LoopRegion, EA);
9077+ VPBlockUtils::connectBlocks (LoopRegion, MiddleVPBB);
9078+ }
9079+
89949080 SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock (
89959081 OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
89969082 addLiveOutsForFirstOrderRecurrences (*Plan, ExitUsersToFix);
@@ -9062,6 +9148,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
90629148 VPlanTransforms::addActiveLaneMask (*Plan, ForControlFlow,
90639149 WithoutRuntimeCheck);
90649150 }
9151+
90659152 return Plan;
90669153}
90679154
@@ -9286,6 +9373,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
92869373 }
92879374 VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock ();
92889375 Builder.setInsertPoint (&*LatchVPBB->begin ());
9376+ if (!VectorLoopRegion->getSingleSuccessor ())
9377+ return ;
92899378 VPBasicBlock *MiddleVPBB =
92909379 cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor ());
92919380 VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi ();
0 commit comments