2323#include " llvm/ADT/SmallVector.h"
2424#include " llvm/Analysis/AliasAnalysis.h"
2525#include " llvm/Analysis/AliasSetTracker.h"
26+ #include " llvm/Analysis/AssumeBundleQueries.h"
27+ #include " llvm/Analysis/AssumptionCache.h"
2628#include " llvm/Analysis/LoopAnalysisManager.h"
2729#include " llvm/Analysis/LoopInfo.h"
2830#include " llvm/Analysis/LoopIterator.h"
@@ -208,28 +210,52 @@ static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B,
208210
209211// / Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
210212// / \p MaxBTC is guaranteed inbounds of the accessed object.
211- static bool evaluatePtrAddRecAtMaxBTCWillNotWrap ( const SCEVAddRecExpr *AR,
212- const SCEV *MaxBTC ,
213- const SCEV *EltSize,
214- ScalarEvolution &SE ,
215- const DataLayout &DL ) {
213+ static bool
214+ evaluatePtrAddRecAtMaxBTCWillNotWrap ( const SCEVAddRecExpr *AR ,
215+ const SCEV *MaxBTC, const SCEV *EltSize,
216+ ScalarEvolution &SE, const DataLayout &DL ,
217+ AssumptionCache *AC, DominatorTree *DT ) {
216218 auto *PointerBase = SE.getPointerBase (AR->getStart ());
217219 auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
218220 if (!StartPtr)
219221 return false ;
222+ const Loop *L = AR->getLoop ();
220223 bool CheckForNonNull, CheckForFreed;
221- uint64_t DerefBytes = StartPtr->getValue ()->getPointerDereferenceableBytes (
224+ Value *StartPtrV = StartPtr->getValue ();
225+ uint64_t DerefBytes = StartPtrV->getPointerDereferenceableBytes (
222226 DL, CheckForNonNull, CheckForFreed);
223227
224- if (CheckForNonNull || CheckForFreed)
228+ if (DerefBytes && ( CheckForNonNull || CheckForFreed) )
225229 return false ;
226230
227231 const SCEV *Step = AR->getStepRecurrence (SE);
232+ Type *WiderTy = SE.getWiderType (MaxBTC->getType (), Step->getType ());
233+ const SCEV *DerefBytesSCEV = SE.getConstant (WiderTy, DerefBytes);
234+
235+ // Check if we have a suitable dereferencable assumption we can use.
236+ RetainedKnowledge DerefRK;
237+ if (!StartPtrV->canBeFreed () &&
238+ getKnowledgeForValue (
239+ StartPtrV, {Attribute::Dereferenceable}, *AC,
240+ [&](RetainedKnowledge RK, Instruction *Assume, auto ) {
241+ if (!isValidAssumeForContext (
242+ Assume, L->getLoopPredecessor ()->getTerminator (), DT))
243+ return false ;
244+ if (RK.AttrKind == Attribute::Dereferenceable) {
245+ DerefRK = std::max (DerefRK, RK);
246+ return true ;
247+ }
248+ return false ;
249+ }) &&
250+ DerefRK.ArgValue ) {
251+ DerefBytesSCEV = SE.getUMaxExpr (DerefBytesSCEV,
252+ SE.getConstant (WiderTy, DerefRK.ArgValue ));
253+ }
254+
228255 bool IsKnownNonNegative = SE.isKnownNonNegative (Step);
229256 if (!IsKnownNonNegative && !SE.isKnownNegative (Step))
230257 return false ;
231258
232- Type *WiderTy = SE.getWiderType (MaxBTC->getType (), Step->getType ());
233259 Step = SE.getNoopOrSignExtend (Step, WiderTy);
234260 MaxBTC = SE.getNoopOrZeroExtend (MaxBTC, WiderTy);
235261
@@ -256,24 +282,23 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
256282 const SCEV *EndBytes = addSCEVNoOverflow (StartOffset, OffsetEndBytes, SE);
257283 if (!EndBytes)
258284 return false ;
259- return SE.isKnownPredicate (CmpInst::ICMP_ULE, EndBytes,
260- SE.getConstant (WiderTy, DerefBytes));
285+ return SE.isKnownPredicate (CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
261286 }
262287
263288 // For negative steps check if
264289 // * StartOffset >= (MaxBTC * Step + EltSize)
265290 // * StartOffset <= DerefBytes.
266291 assert (SE.isKnownNegative (Step) && " must be known negative" );
267292 return SE.isKnownPredicate (CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) &&
268- SE.isKnownPredicate (CmpInst::ICMP_ULE, StartOffset,
269- SE.getConstant (WiderTy, DerefBytes));
293+ SE.isKnownPredicate (CmpInst::ICMP_ULE, StartOffset, DerefBytesSCEV);
270294}
271295
272296std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess (
273297 const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
274298 const SCEV *MaxBTC, ScalarEvolution *SE,
275299 DenseMap<std::pair<const SCEV *, Type *>,
276- std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
300+ std::pair<const SCEV *, const SCEV *>> *PointerBounds,
301+ AssumptionCache *AC, DominatorTree *DT) {
277302 std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
278303 if (PointerBounds) {
279304 auto [Iter, Ins] = PointerBounds->insert (
@@ -308,8 +333,8 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
308333 // sets ScEnd to the maximum unsigned value for the type. Note that LAA
309334 // separately checks that accesses cannot not wrap, so unsigned max
310335 // represents an upper bound.
311- if (evaluatePtrAddRecAtMaxBTCWillNotWrap (AR, MaxBTC, EltSizeSCEV, *SE,
312- DL )) {
336+ if (evaluatePtrAddRecAtMaxBTCWillNotWrap (AR, MaxBTC, EltSizeSCEV, *SE, DL,
337+ AC, DT )) {
313338 ScEnd = AR->evaluateAtIteration (MaxBTC, *SE);
314339 } else {
315340 ScEnd = SE->getAddExpr (
@@ -356,9 +381,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
356381 bool NeedsFreeze) {
357382 const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount ();
358383 const SCEV *BTC = PSE.getBackedgeTakenCount ();
359- const auto &[ScStart, ScEnd] =
360- getStartAndEndForAccess ( Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC,
361- PSE. getSE (), & DC.getPointerBounds ());
384+ const auto &[ScStart, ScEnd] = getStartAndEndForAccess (
385+ Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE. getSE () ,
386+ &DC. getPointerBounds (), DC. getAC (), DC.getDT ());
362387 assert (!isa<SCEVCouldNotCompute>(ScStart) &&
363388 !isa<SCEVCouldNotCompute>(ScEnd) &&
364389 " must be able to compute both start and end expressions" );
@@ -2011,10 +2036,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
20112036 const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount ();
20122037 const auto &[SrcStart_, SrcEnd_] =
20132038 getStartAndEndForAccess (InnermostLoop, Src, ATy, BTC, SymbolicMaxBTC,
2014- PSE.getSE (), &PointerBounds);
2039+ PSE.getSE (), &PointerBounds, AC, DT );
20152040 const auto &[SinkStart_, SinkEnd_] =
20162041 getStartAndEndForAccess (InnermostLoop, Sink, BTy, BTC, SymbolicMaxBTC,
2017- PSE.getSE (), &PointerBounds);
2042+ PSE.getSE (), &PointerBounds, AC, DT );
20182043 if (!isa<SCEVCouldNotCompute>(SrcStart_) &&
20192044 !isa<SCEVCouldNotCompute>(SrcEnd_) &&
20202045 !isa<SCEVCouldNotCompute>(SinkStart_) &&
@@ -2975,7 +3000,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
29753000 const TargetTransformInfo *TTI,
29763001 const TargetLibraryInfo *TLI, AAResults *AA,
29773002 DominatorTree *DT, LoopInfo *LI,
2978- bool AllowPartial)
3003+ AssumptionCache *AC, bool AllowPartial)
29793004 : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
29803005 PtrRtChecking (nullptr ), TheLoop(L), AllowPartial(AllowPartial) {
29813006 unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned >::max ();
@@ -2985,8 +3010,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
29853010 MaxTargetVectorWidthInBits =
29863011 TTI->getRegisterBitWidth (TargetTransformInfo::RGK_FixedWidthVector) * 2 ;
29873012
2988- DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides,
2989- MaxTargetVectorWidthInBits);
3013+ DepChecker = std::make_unique<MemoryDepChecker>(
3014+ *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits);
29903015 PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
29913016 if (canAnalyzeLoop ())
29923017 CanVecMem = analyzeLoop (AA, LI, TLI, DT);
@@ -3055,7 +3080,7 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L,
30553080 // or if it was created with a different value of AllowPartial.
30563081 if (Inserted || It->second ->hasAllowPartial () != AllowPartial)
30573082 It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT,
3058- &LI, AllowPartial);
3083+ &LI, AC, AllowPartial);
30593084
30603085 return *It->second ;
30613086}
@@ -3098,7 +3123,8 @@ LoopAccessInfoManager LoopAccessAnalysis::run(Function &F,
30983123 auto &LI = FAM.getResult <LoopAnalysis>(F);
30993124 auto &TTI = FAM.getResult <TargetIRAnalysis>(F);
31003125 auto &TLI = FAM.getResult <TargetLibraryAnalysis>(F);
3101- return LoopAccessInfoManager (SE, AA, DT, LI, &TTI, &TLI);
3126+ auto &AC = FAM.getResult <AssumptionAnalysis>(F);
3127+ return LoopAccessInfoManager (SE, AA, DT, LI, &TTI, &TLI, &AC);
31023128}
31033129
31043130AnalysisKey LoopAccessAnalysis::Key;
0 commit comments