@@ -22,27 +22,35 @@ spec:
2222 annotations :
2323 description : container {{`{{ $labels.container }}`}} of pod {{`{{ $labels.pod }}`}} experienced OOM event(s); count={{`{{ $value }}`}}
2424 expr : container_oom_events_total > 0
25+ # Memory growth alerts - thresholds calibrated based on baseline memory profiling
26+ # See MEMORY_ANALYSIS.md for details on normal operational memory patterns
2527 - alert : operator-controller-memory-growth
2628 annotations :
2729 description : ' operator-controller pod memory usage growing at a high rate for 5 minutes: {{`{{ $value | humanize }}`}}B/sec'
28- expr : deriv(sum(container_memory_working_set_bytes{pod=~"operator-controller.*",container="manager"})[5m:]) > 100_000
30+ # Threshold: 200kB/sec (baseline shows 132.4kB/sec episodic growth during e2e tests is normal)
31+ expr : deriv(sum(container_memory_working_set_bytes{pod=~"operator-controller.*",container="manager"})[5m:]) > 200_000
2932 for : 5m
3033 keep_firing_for : 1d
3134 - alert : catalogd-memory-growth
3235 annotations :
3336 description : ' catalogd pod memory usage growing at a high rate for 5 minutes: {{`{{ $value | humanize }}`}}B/sec'
34- expr : deriv(sum(container_memory_working_set_bytes{pod=~"catalogd.*",container="manager"})[5m:]) > 100_000
37+ # Threshold: 200kB/sec (aligned with operator-controller for consistency)
38+ expr : deriv(sum(container_memory_working_set_bytes{pod=~"catalogd.*",container="manager"})[5m:]) > 200_000
3539 for : 5m
3640 keep_firing_for : 1d
41+ # Memory usage alerts - thresholds calibrated for test/development environments
42+ # Production deployments may need different thresholds based on workload
3743 - alert : operator-controller-memory-usage
3844 annotations :
3945 description : ' operator-controller pod using high memory resources for the last 5 minutes: {{`{{ $value | humanize }}`}}B'
40- expr : sum(container_memory_working_set_bytes{pod=~"operator-controller.*",container="manager"}) > 100_000_000
46+ # Threshold: 150MB (baseline shows 107.9MB peak is normal, stabilizes at 78-88MB)
47+ expr : sum(container_memory_working_set_bytes{pod=~"operator-controller.*",container="manager"}) > 150_000_000
4148 for : 5m
4249 keep_firing_for : 1d
4350 - alert : catalogd-memory-usage
4451 annotations :
4552 description : ' catalogd pod using high memory resources for the last 5 minutes: {{`{{ $value | humanize }}`}}B'
53+ # Threshold: 75MB (baseline shows 16.9MB peak, well under threshold)
4654 expr : sum(container_memory_working_set_bytes{pod=~"catalogd.*",container="manager"}) > 75_000_000
4755 for : 5m
4856 keep_firing_for : 1d
0 commit comments