2323#include " llvm/ADT/SmallVector.h"
2424#include " llvm/Analysis/AliasAnalysis.h"
2525#include " llvm/Analysis/AliasSetTracker.h"
26+ #include " llvm/Analysis/AssumeBundleQueries.h"
27+ #include " llvm/Analysis/AssumptionCache.h"
2628#include " llvm/Analysis/LoopAnalysisManager.h"
2729#include " llvm/Analysis/LoopInfo.h"
2830#include " llvm/Analysis/LoopIterator.h"
@@ -208,28 +210,46 @@ static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B,
208210
209211// / Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
210212// / \p MaxBTC is guaranteed inbounds of the accessed object.
211- static bool evaluatePtrAddRecAtMaxBTCWillNotWrap ( const SCEVAddRecExpr *AR,
212- const SCEV *MaxBTC ,
213- const SCEV *EltSize,
214- ScalarEvolution &SE ,
215- const DataLayout &DL ) {
213+ static bool
214+ evaluatePtrAddRecAtMaxBTCWillNotWrap ( const SCEVAddRecExpr *AR ,
215+ const SCEV *MaxBTC, const SCEV *EltSize,
216+ ScalarEvolution &SE, const DataLayout &DL ,
217+ DominatorTree *DT, AssumptionCache *AC ) {
216218 auto *PointerBase = SE.getPointerBase (AR->getStart ());
217219 auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
218220 if (!StartPtr)
219221 return false ;
222+ const Loop *L = AR->getLoop ();
220223 bool CheckForNonNull, CheckForFreed;
221- uint64_t DerefBytes = StartPtr->getValue ()->getPointerDereferenceableBytes (
224+ Value *StartPtrV = StartPtr->getValue ();
225+ uint64_t DerefBytes = StartPtrV->getPointerDereferenceableBytes (
222226 DL, CheckForNonNull, CheckForFreed);
223227
224- if (CheckForNonNull || CheckForFreed)
228+ if (DerefBytes && ( CheckForNonNull || CheckForFreed) )
225229 return false ;
226230
227231 const SCEV *Step = AR->getStepRecurrence (SE);
232+ Type *WiderTy = SE.getWiderType (MaxBTC->getType (), Step->getType ());
233+ const SCEV *DerefBytesSCEV = SE.getConstant (WiderTy, DerefBytes);
234+
235+ // Check if we have a suitable dereferencable assumption we can use.
236+ if (!StartPtrV->canBeFreed ()) {
237+ RetainedKnowledge DerefRK = getKnowledgeValidInContext (
238+ StartPtrV, {Attribute::Dereferenceable}, *AC,
239+ L->getLoopPredecessor ()->getTerminator (), DT);
240+ if (DerefRK) {
241+ DerefBytesSCEV = SE.getUMaxExpr (
242+ DerefBytesSCEV, SE.getConstant (WiderTy, DerefRK.ArgValue ));
243+ }
244+ }
245+
246+ if (DerefBytesSCEV->isZero ())
247+ return false ;
248+
228249 bool IsKnownNonNegative = SE.isKnownNonNegative (Step);
229250 if (!IsKnownNonNegative && !SE.isKnownNegative (Step))
230251 return false ;
231252
232- Type *WiderTy = SE.getWiderType (MaxBTC->getType (), Step->getType ());
233253 Step = SE.getNoopOrSignExtend (Step, WiderTy);
234254 MaxBTC = SE.getNoopOrZeroExtend (MaxBTC, WiderTy);
235255
@@ -256,24 +276,23 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
256276 const SCEV *EndBytes = addSCEVNoOverflow (StartOffset, OffsetEndBytes, SE);
257277 if (!EndBytes)
258278 return false ;
259- return SE.isKnownPredicate (CmpInst::ICMP_ULE, EndBytes,
260- SE.getConstant (WiderTy, DerefBytes));
279+ return SE.isKnownPredicate (CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
261280 }
262281
263282 // For negative steps check if
264283 // * StartOffset >= (MaxBTC * Step + EltSize)
265284 // * StartOffset <= DerefBytes.
266285 assert (SE.isKnownNegative (Step) && " must be known negative" );
267286 return SE.isKnownPredicate (CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) &&
268- SE.isKnownPredicate (CmpInst::ICMP_ULE, StartOffset,
269- SE.getConstant (WiderTy, DerefBytes));
287+ SE.isKnownPredicate (CmpInst::ICMP_ULE, StartOffset, DerefBytesSCEV);
270288}
271289
272290std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess (
273291 const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
274292 const SCEV *MaxBTC, ScalarEvolution *SE,
275293 DenseMap<std::pair<const SCEV *, Type *>,
276- std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
294+ std::pair<const SCEV *, const SCEV *>> *PointerBounds,
295+ DominatorTree *DT, AssumptionCache *AC) {
277296 std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
278297 if (PointerBounds) {
279298 auto [Iter, Ins] = PointerBounds->insert (
@@ -308,8 +327,8 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
308327 // sets ScEnd to the maximum unsigned value for the type. Note that LAA
309328 // separately checks that accesses cannot not wrap, so unsigned max
310329 // represents an upper bound.
311- if (evaluatePtrAddRecAtMaxBTCWillNotWrap (AR, MaxBTC, EltSizeSCEV, *SE,
312- DL )) {
330+ if (evaluatePtrAddRecAtMaxBTCWillNotWrap (AR, MaxBTC, EltSizeSCEV, *SE, DL,
331+ DT, AC )) {
313332 ScEnd = AR->evaluateAtIteration (MaxBTC, *SE);
314333 } else {
315334 ScEnd = SE->getAddExpr (
@@ -356,9 +375,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
356375 bool NeedsFreeze) {
357376 const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount ();
358377 const SCEV *BTC = PSE.getBackedgeTakenCount ();
359- const auto &[ScStart, ScEnd] =
360- getStartAndEndForAccess ( Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC,
361- PSE. getSE (), & DC.getPointerBounds ());
378+ const auto &[ScStart, ScEnd] = getStartAndEndForAccess (
379+ Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE. getSE () ,
380+ &DC. getPointerBounds (), DC. getDT (), DC.getAC ());
362381 assert (!isa<SCEVCouldNotCompute>(ScStart) &&
363382 !isa<SCEVCouldNotCompute>(ScEnd) &&
364383 " must be able to compute both start and end expressions" );
@@ -1961,13 +1980,15 @@ bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
19611980 const SCEV *BTC = PSE.getBackedgeTakenCount ();
19621981 const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount ();
19631982 ScalarEvolution &SE = *PSE.getSE ();
1964- const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess (
1965- InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds);
1983+ const auto &[SrcStart_, SrcEnd_] =
1984+ getStartAndEndForAccess (InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC,
1985+ &SE, &PointerBounds, DT, AC);
19661986 if (isa<SCEVCouldNotCompute>(SrcStart_) || isa<SCEVCouldNotCompute>(SrcEnd_))
19671987 return false ;
19681988
1969- const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess (
1970- InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds);
1989+ const auto &[SinkStart_, SinkEnd_] =
1990+ getStartAndEndForAccess (InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC,
1991+ &SE, &PointerBounds, DT, AC);
19711992 if (isa<SCEVCouldNotCompute>(SinkStart_) ||
19721993 isa<SCEVCouldNotCompute>(SinkEnd_))
19731994 return false ;
@@ -3003,7 +3024,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
30033024 const TargetTransformInfo *TTI,
30043025 const TargetLibraryInfo *TLI, AAResults *AA,
30053026 DominatorTree *DT, LoopInfo *LI,
3006- bool AllowPartial)
3027+ AssumptionCache *AC, bool AllowPartial)
30073028 : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
30083029 PtrRtChecking (nullptr ), TheLoop(L), AllowPartial(AllowPartial) {
30093030 unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned >::max ();
@@ -3013,8 +3034,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
30133034 MaxTargetVectorWidthInBits =
30143035 TTI->getRegisterBitWidth (TargetTransformInfo::RGK_FixedWidthVector) * 2 ;
30153036
3016- DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides,
3017- MaxTargetVectorWidthInBits);
3037+ DepChecker = std::make_unique<MemoryDepChecker>(
3038+ *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits);
30183039 PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
30193040 if (canAnalyzeLoop ())
30203041 CanVecMem = analyzeLoop (AA, LI, TLI, DT);
@@ -3083,7 +3104,7 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L,
30833104 // or if it was created with a different value of AllowPartial.
30843105 if (Inserted || It->second ->hasAllowPartial () != AllowPartial)
30853106 It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT,
3086- &LI, AllowPartial);
3107+ &LI, AC, AllowPartial);
30873108
30883109 return *It->second ;
30893110}
@@ -3126,7 +3147,8 @@ LoopAccessInfoManager LoopAccessAnalysis::run(Function &F,
31263147 auto &LI = FAM.getResult <LoopAnalysis>(F);
31273148 auto &TTI = FAM.getResult <TargetIRAnalysis>(F);
31283149 auto &TLI = FAM.getResult <TargetLibraryAnalysis>(F);
3129- return LoopAccessInfoManager (SE, AA, DT, LI, &TTI, &TLI);
3150+ auto &AC = FAM.getResult <AssumptionAnalysis>(F);
3151+ return LoopAccessInfoManager (SE, AA, DT, LI, &TTI, &TLI, &AC);
31303152}
31313153
31323154AnalysisKey LoopAccessAnalysis::Key;
0 commit comments