@@ -2426,6 +2426,26 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
24262426 return VectorTripCount;
24272427}
24282428
2429+ static void connectScalarPreheaderInVPlan (VPlan &Plan) {
2430+ VPBlockBase *VectorPH = Plan.getVectorPreheader ();
2431+ VPBlockBase *ScalarPH = Plan.getScalarPreheader ();
2432+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor ();
2433+ VPBlockUtils::disconnectBlocks (Plan.getEntry (), VectorPH);
2434+ VPBlockUtils::connectBlocks (PredVPB, ScalarPH);
2435+ VPBlockUtils::connectBlocks (PredVPB, VectorPH);
2436+ }
2437+
2438+ static void connectCheckBlockInVPlan (VPlan &Plan, BasicBlock *CheckIRBB) {
2439+ VPBlockBase *ScalarPH = Plan.getScalarPreheader ();
2440+ VPBlockBase *VectorPH = Plan.getVectorPreheader ();
2441+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor ();
2442+ VPBlockUtils::disconnectBlocks (PredVPB, VectorPH);
2443+ VPIRBasicBlock *CheckVPIRBB = VPIRBasicBlock::fromBasicBlock (CheckIRBB);
2444+ VPBlockUtils::connectBlocks (PredVPB, CheckVPIRBB);
2445+ VPBlockUtils::connectBlocks (CheckVPIRBB, ScalarPH);
2446+ VPBlockUtils::connectBlocks (CheckVPIRBB, VectorPH);
2447+ }
2448+
24292449void InnerLoopVectorizer::emitIterationCountCheck (BasicBlock *Bypass) {
24302450 Value *Count = getTripCount ();
24312451 // Reuse existing vector loop preheader for TC checks.
@@ -2511,13 +2531,14 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
25112531 " TC check is expected to dominate Bypass" );
25122532
25132533 // Update dominator for Bypass & LoopExit (if needed).
2514- DT->changeImmediateDominator (Bypass, TCCheckBlock);
25152534 BranchInst &BI =
25162535 *BranchInst::Create (Bypass, LoopVectorPreHeader, CheckMinIters);
25172536 if (hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ()))
25182537 setBranchWeights (BI, MinItersBypassWeights, /* IsExpected=*/ false );
25192538 ReplaceInstWithInst (TCCheckBlock->getTerminator (), &BI);
25202539 LoopBypassBlocks.push_back (TCCheckBlock);
2540+
2541+ connectScalarPreheaderInVPlan (Plan);
25212542}
25222543
25232544BasicBlock *InnerLoopVectorizer::emitSCEVChecks (BasicBlock *Bypass) {
@@ -2534,6 +2555,8 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
25342555 " Should already be a bypass block due to iteration count check" );
25352556 LoopBypassBlocks.push_back (SCEVCheckBlock);
25362557 AddedSafetyChecks = true ;
2558+
2559+ connectCheckBlockInVPlan (Plan, SCEVCheckBlock);
25372560 return SCEVCheckBlock;
25382561}
25392562
@@ -2570,6 +2593,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
25702593
25712594 AddedSafetyChecks = true ;
25722595
2596+ connectCheckBlockInVPlan (Plan, MemCheckBlock);
25732597 return MemCheckBlock;
25742598}
25752599
@@ -7648,10 +7672,10 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76487672
76497673 // 0. Generate SCEV-dependent code into the preheader, including TripCount,
76507674 // before making any changes to the CFG.
7651- if (!BestVPlan.getPreheader ()->empty ()) {
7675+ if (!BestVPlan.getEntry ()->empty ()) {
76527676 State.CFG .PrevBB = OrigLoop->getLoopPreheader ();
76537677 State.Builder .SetInsertPoint (OrigLoop->getLoopPreheader ()->getTerminator ());
7654- BestVPlan.getPreheader ()->execute (&State);
7678+ BestVPlan.getEntry ()->execute (&State);
76557679 }
76567680 if (!ILV.getTripCount ())
76577681 ILV.setTripCount (State.get (BestVPlan.getTripCount (), VPLane (0 )));
@@ -7859,8 +7883,6 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
78597883 DT->getNode (Bypass)->getIDom ()) &&
78607884 " TC check is expected to dominate Bypass" );
78617885
7862- // Update dominator for Bypass.
7863- DT->changeImmediateDominator (Bypass, TCCheckBlock);
78647886 LoopBypassBlocks.push_back (TCCheckBlock);
78657887
78667888 // Save the trip count so we don't have to regenerate it in the
@@ -7875,6 +7897,12 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
78757897 setBranchWeights (BI, MinItersBypassWeights, /* IsExpected=*/ false );
78767898 ReplaceInstWithInst (TCCheckBlock->getTerminator (), &BI);
78777899
7900+ VPBlockBase *VectorPH = Plan.getVectorPreheader ();
7901+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor ();
7902+ if (PredVPB->getNumSuccessors () == 1 )
7903+ connectScalarPreheaderInVPlan (Plan);
7904+ else
7905+ connectCheckBlockInVPlan (Plan, TCCheckBlock);
78787906 return TCCheckBlock;
78797907}
78807908
@@ -7905,32 +7933,19 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
79057933 EPI.MainLoopIterationCountCheck ->getTerminator ()->replaceUsesOfWith (
79067934 VecEpilogueIterationCountCheck, LoopVectorPreHeader);
79077935
7908- DT->changeImmediateDominator (LoopVectorPreHeader,
7909- EPI.MainLoopIterationCountCheck );
7910-
79117936 EPI.EpilogueIterationCountCheck ->getTerminator ()->replaceUsesOfWith (
79127937 VecEpilogueIterationCountCheck, LoopScalarPreHeader);
79137938
79147939 if (EPI.SCEVSafetyCheck )
79157940 EPI.SCEVSafetyCheck ->getTerminator ()->replaceUsesOfWith (
79167941 VecEpilogueIterationCountCheck, LoopScalarPreHeader);
7917- if (EPI.MemSafetyCheck )
7942+ if (EPI.MemSafetyCheck ) {
79187943 EPI.MemSafetyCheck ->getTerminator ()->replaceUsesOfWith (
79197944 VecEpilogueIterationCountCheck, LoopScalarPreHeader);
7920-
7921- DT->changeImmediateDominator (
7922- VecEpilogueIterationCountCheck,
7923- VecEpilogueIterationCountCheck->getSinglePredecessor ());
7945+ }
79247946
79257947 DT->changeImmediateDominator (LoopScalarPreHeader,
79267948 EPI.EpilogueIterationCountCheck );
7927- if (!Cost->requiresScalarEpilogue (EPI.EpilogueVF .isVector ()))
7928- // If there is an epilogue which must run, there's no edge from the
7929- // middle block to exit blocks and thus no need to update the immediate
7930- // dominator of the exit blocks.
7931- DT->changeImmediateDominator (LoopExitBlock,
7932- EPI.EpilogueIterationCountCheck );
7933-
79347949 // Keep track of bypass blocks, as they feed start values to the induction and
79357950 // reduction phis in the scalar loop preheader.
79367951 if (EPI.SCEVSafetyCheck )
@@ -8033,6 +8048,20 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
80338048 }
80348049 ReplaceInstWithInst (Insert->getTerminator (), &BI);
80358050 LoopBypassBlocks.push_back (Insert);
8051+
8052+ // A new entry block has been created for the epilogue VPlan. Hook it in.
8053+ VPIRBasicBlock *NewEntry = VPIRBasicBlock::fromBasicBlock (Insert);
8054+ VPBasicBlock *OldEntry = Plan.getEntry ();
8055+ VPBlockUtils::reassociateBlocks (OldEntry, NewEntry);
8056+ Plan.setEntry (NewEntry);
8057+ for (auto &R : make_early_inc_range (*NewEntry)) {
8058+ auto *VPIR = dyn_cast<VPIRInstruction>(&R);
8059+ if (!VPIR || !isa<PHINode>(VPIR->getInstruction ()))
8060+ break ;
8061+ VPIR->eraseFromParent ();
8062+ }
8063+
8064+ connectScalarPreheaderInVPlan (Plan);
80368065 return Insert;
80378066}
80388067
@@ -10256,7 +10285,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1025610285 // should be removed once induction resume value creation is done
1025710286 // directly in VPlan.
1025810287 EpilogILV.setTripCount (MainILV.getTripCount ());
10259- for (auto &R : make_early_inc_range (*BestEpiPlan.getPreheader ())) {
10288+ for (auto &R : make_early_inc_range (*BestEpiPlan.getEntry ())) {
1026010289 auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
1026110290 if (!ExpandR)
1026210291 continue ;
@@ -10316,8 +10345,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1031610345 cast<VPHeaderPHIRecipe>(&R)->setStartValue (StartVal);
1031710346 }
1031810347
10319- assert (DT->verify (DominatorTree::VerificationLevel::Fast) &&
10320- " DT not preserved correctly" );
1032110348 LVP.executePlan (EPI.EpilogueVF , EPI.EpilogueUF , BestEpiPlan, EpilogILV,
1032210349 DT, true , &ExpandedSCEVs);
1032310350 ++LoopsEpilogueVectorized;
@@ -10345,6 +10372,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1034510372 checkMixedPrecision (L, ORE);
1034610373 }
1034710374
10375+ assert (DT->verify (DominatorTree::VerificationLevel::Fast) &&
10376+ " DT not preserved correctly" );
10377+
1034810378 std::optional<MDNode *> RemainderLoopID =
1034910379 makeFollowupLoopID (OrigLoopID, {LLVMLoopVectorizeFollowupAll,
1035010380 LLVMLoopVectorizeFollowupEpilogue});
0 commit comments