@@ -2426,6 +2426,26 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
24262426 return VectorTripCount;
24272427}
24282428
2429+ static void connectScalarPreheaderInVPlan (VPlan &Plan) {
2430+ VPBlockBase *VectorPH = Plan.getVectorPreheader ();
2431+ VPBlockBase *ScalarPH = Plan.getScalarPreheader ();
2432+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor ();
2433+ VPBlockUtils::disconnectBlocks (Plan.getEntry (), VectorPH);
2434+ VPBlockUtils::connectBlocks (PredVPB, ScalarPH);
2435+ VPBlockUtils::connectBlocks (PredVPB, VectorPH);
2436+ }
2437+
2438+ static void connectCheckBlockInVPlan (VPlan &Plan, BasicBlock *CheckIRBB) {
2439+ VPBlockBase *ScalarPH = Plan.getScalarPreheader ();
2440+ VPBlockBase *VectorPH = Plan.getVectorPreheader ();
2441+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor ();
2442+ VPBlockUtils::disconnectBlocks (PredVPB, VectorPH);
2443+ VPIRBasicBlock *CheckVPIRBB = VPIRBasicBlock::fromBasicBlock (CheckIRBB);
2444+ VPBlockUtils::connectBlocks (PredVPB, CheckVPIRBB);
2445+ VPBlockUtils::connectBlocks (CheckVPIRBB, ScalarPH);
2446+ VPBlockUtils::connectBlocks (CheckVPIRBB, VectorPH);
2447+ }
2448+
24292449void InnerLoopVectorizer::emitIterationCountCheck (BasicBlock *Bypass) {
24302450 Value *Count = getTripCount ();
24312451 // Reuse existing vector loop preheader for TC checks.
@@ -2511,13 +2531,14 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
25112531 " TC check is expected to dominate Bypass" );
25122532
25132533 // Update dominator for Bypass & LoopExit (if needed).
2514- DT->changeImmediateDominator (Bypass, TCCheckBlock);
25152534 BranchInst &BI =
25162535 *BranchInst::Create (Bypass, LoopVectorPreHeader, CheckMinIters);
25172536 if (hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ()))
25182537 setBranchWeights (BI, MinItersBypassWeights, /* IsExpected=*/ false );
25192538 ReplaceInstWithInst (TCCheckBlock->getTerminator (), &BI);
25202539 LoopBypassBlocks.push_back (TCCheckBlock);
2540+
2541+ connectScalarPreheaderInVPlan (Plan);
25212542}
25222543
25232544BasicBlock *InnerLoopVectorizer::emitSCEVChecks (BasicBlock *Bypass) {
@@ -2534,6 +2555,8 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
25342555 " Should already be a bypass block due to iteration count check" );
25352556 LoopBypassBlocks.push_back (SCEVCheckBlock);
25362557 AddedSafetyChecks = true ;
2558+
2559+ connectCheckBlockInVPlan (Plan, SCEVCheckBlock);
25372560 return SCEVCheckBlock;
25382561}
25392562
@@ -2570,6 +2593,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
25702593
25712594 AddedSafetyChecks = true ;
25722595
2596+ connectCheckBlockInVPlan (Plan, MemCheckBlock);
25732597 return MemCheckBlock;
25742598}
25752599
@@ -7649,10 +7673,10 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76497673
76507674 // 0. Generate SCEV-dependent code into the preheader, including TripCount,
76517675 // before making any changes to the CFG.
7652- if (!BestVPlan.getPreheader ()->empty ()) {
7676+ if (!BestVPlan.getEntry ()->empty ()) {
76537677 State.CFG .PrevBB = OrigLoop->getLoopPreheader ();
76547678 State.Builder .SetInsertPoint (OrigLoop->getLoopPreheader ()->getTerminator ());
7655- BestVPlan.getPreheader ()->execute (&State);
7679+ BestVPlan.getEntry ()->execute (&State);
76567680 }
76577681 if (!ILV.getTripCount ())
76587682 ILV.setTripCount (State.get (BestVPlan.getTripCount (), VPLane (0 )));
@@ -7861,8 +7885,6 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
78617885 DT->getNode (Bypass)->getIDom ()) &&
78627886 " TC check is expected to dominate Bypass" );
78637887
7864- // Update dominator for Bypass.
7865- DT->changeImmediateDominator (Bypass, TCCheckBlock);
78667888 LoopBypassBlocks.push_back (TCCheckBlock);
78677889
78687890 // Save the trip count so we don't have to regenerate it in the
@@ -7877,6 +7899,12 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
78777899 setBranchWeights (BI, MinItersBypassWeights, /* IsExpected=*/ false );
78787900 ReplaceInstWithInst (TCCheckBlock->getTerminator (), &BI);
78797901
7902+ VPBlockBase *VectorPH = Plan.getVectorPreheader ();
7903+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor ();
7904+ if (PredVPB->getNumSuccessors () == 1 )
7905+ connectScalarPreheaderInVPlan (Plan);
7906+ else
7907+ connectCheckBlockInVPlan (Plan, TCCheckBlock);
78807908 return TCCheckBlock;
78817909}
78827910
@@ -7907,32 +7935,19 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
79077935 EPI.MainLoopIterationCountCheck ->getTerminator ()->replaceUsesOfWith (
79087936 VecEpilogueIterationCountCheck, LoopVectorPreHeader);
79097937
7910- DT->changeImmediateDominator (LoopVectorPreHeader,
7911- EPI.MainLoopIterationCountCheck );
7912-
79137938 EPI.EpilogueIterationCountCheck ->getTerminator ()->replaceUsesOfWith (
79147939 VecEpilogueIterationCountCheck, LoopScalarPreHeader);
79157940
79167941 if (EPI.SCEVSafetyCheck )
79177942 EPI.SCEVSafetyCheck ->getTerminator ()->replaceUsesOfWith (
79187943 VecEpilogueIterationCountCheck, LoopScalarPreHeader);
7919- if (EPI.MemSafetyCheck )
7944+ if (EPI.MemSafetyCheck ) {
79207945 EPI.MemSafetyCheck ->getTerminator ()->replaceUsesOfWith (
79217946 VecEpilogueIterationCountCheck, LoopScalarPreHeader);
7922-
7923- DT->changeImmediateDominator (
7924- VecEpilogueIterationCountCheck,
7925- VecEpilogueIterationCountCheck->getSinglePredecessor ());
7947+ }
79267948
79277949 DT->changeImmediateDominator (LoopScalarPreHeader,
79287950 EPI.EpilogueIterationCountCheck );
7929- if (!Cost->requiresScalarEpilogue (EPI.EpilogueVF .isVector ()))
7930- // If there is an epilogue which must run, there's no edge from the
7931- // middle block to exit blocks and thus no need to update the immediate
7932- // dominator of the exit blocks.
7933- DT->changeImmediateDominator (LoopExitBlock,
7934- EPI.EpilogueIterationCountCheck );
7935-
79367951 // Keep track of bypass blocks, as they feed start values to the induction and
79377952 // reduction phis in the scalar loop preheader.
79387953 if (EPI.SCEVSafetyCheck )
@@ -8035,6 +8050,20 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
80358050 }
80368051 ReplaceInstWithInst (Insert->getTerminator (), &BI);
80378052 LoopBypassBlocks.push_back (Insert);
8053+
8054+ // A new entry block has been created for the epilogue VPlan. Hook it in.
8055+ VPIRBasicBlock *NewEntry = VPIRBasicBlock::fromBasicBlock (Insert);
8056+ VPBasicBlock *OldEntry = Plan.getEntry ();
8057+ VPBlockUtils::reassociateBlocks (OldEntry, NewEntry);
8058+ Plan.setEntry (NewEntry);
8059+ for (auto &R : make_early_inc_range (*NewEntry)) {
8060+ auto *VPIR = dyn_cast<VPIRInstruction>(&R);
8061+ if (!VPIR || !isa<PHINode>(VPIR->getInstruction ()))
8062+ break ;
8063+ VPIR->eraseFromParent ();
8064+ }
8065+
8066+ connectScalarPreheaderInVPlan (Plan);
80388067 return Insert;
80398068}
80408069
@@ -10270,7 +10299,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1027010299 // should be removed once induction resume value creation is done
1027110300 // directly in VPlan.
1027210301 EpilogILV.setTripCount (MainILV.getTripCount ());
10273- for (auto &R : make_early_inc_range (*BestEpiPlan.getPreheader ())) {
10302+ for (auto &R : make_early_inc_range (*BestEpiPlan.getEntry ())) {
1027410303 auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
1027510304 if (!ExpandR)
1027610305 continue ;
@@ -10330,8 +10359,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1033010359 cast<VPHeaderPHIRecipe>(&R)->setStartValue (StartVal);
1033110360 }
1033210361
10333- assert (DT->verify (DominatorTree::VerificationLevel::Fast) &&
10334- " DT not preserved correctly" );
1033510362 LVP.executePlan (EPI.EpilogueVF , EPI.EpilogueUF , BestEpiPlan, EpilogILV,
1033610363 DT, true , &ExpandedSCEVs);
1033710364 ++LoopsEpilogueVectorized;
@@ -10359,6 +10386,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1035910386 checkMixedPrecision (L, ORE);
1036010387 }
1036110388
10389+ assert (DT->verify (DominatorTree::VerificationLevel::Fast) &&
10390+ " DT not preserved correctly" );
10391+
1036210392 std::optional<MDNode *> RemainderLoopID =
1036310393 makeFollowupLoopID (OrigLoopID, {LLVMLoopVectorizeFollowupAll,
1036410394 LLVMLoopVectorizeFollowupEpilogue});
0 commit comments