Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -281,13 +281,14 @@ test-experimental-e2e: KIND_CLUSTER_NAME := operator-controller-e2e
test-experimental-e2e: GO_BUILD_EXTRA_FLAGS := -cover
test-experimental-e2e: COVERAGE_NAME := experimental-e2e
test-experimental-e2e: export MANIFEST := $(EXPERIMENTAL_RELEASE_MANIFEST)
test-experimental-e2e: PROMETHEUS_VALUES := helm/prom_experimental.yaml
test-experimental-e2e: run-internal image-registry prometheus e2e e2e-coverage kind-clean #HELP Run experimental e2e test suite on local kind cluster

.PHONY: prometheus
prometheus: PROMETHEUS_NAMESPACE := olmv1-system
prometheus: PROMETHEUS_VERSION := v0.83.0
prometheus: $(KUSTOMIZE) #EXHELP Deploy Prometheus into specified namespace
./hack/test/install-prometheus.sh $(PROMETHEUS_NAMESPACE) $(PROMETHEUS_VERSION) $(VERSION)
./hack/test/install-prometheus.sh $(PROMETHEUS_NAMESPACE) $(PROMETHEUS_VERSION) $(VERSION) $(PROMETHEUS_VALUES)

.PHONY: test-extension-developer-e2e
test-extension-developer-e2e: SOURCE_MANIFEST := $(STANDARD_E2E_MANIFEST)
Expand Down
12 changes: 9 additions & 3 deletions hack/test/install-prometheus.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ set -euo pipefail

help="install-prometheus.sh is used to set up prometheus monitoring for e2e testing.
Usage:
install-prometheus.sh [PROMETHEUS_NAMESPACE] [PROMETHEUS_VERSION] [GIT_VERSION]
install-prometheus.sh [PROMETHEUS_NAMESPACE] [PROMETHEUS_VERSION] [GIT_VERSION] [PROMETHEUS_VALUES]
"

if [[ "$#" -ne 3 ]]; then
if [[ "$#" -lt 3 || "$#" -gt 4 ]]; then
echo "Illegal number of arguments passed"
echo "${help}"
exit 1
Expand All @@ -18,6 +18,12 @@ fi
PROMETHEUS_NAMESPACE="$1"
PROMETHEUS_VERSION="$2"
GIT_VERSION="$3"
PROMETHEUS_VALUES="${4:-}"

if [ -n "${PROMETHEUS_VALUES}" ]; then
echo "Adding ${PROMETHEUS_VALUES} to templating"
PROMETHEUS_VALUES="--values ${PROMETHEUS_VALUES}"
fi

TMPDIR="$(mktemp -d)"
trap 'echo "Cleaning up $TMPDIR"; rm -rf "$TMPDIR"' EXIT
Expand All @@ -36,7 +42,7 @@ echo "Waiting for Prometheus Operator pod to become ready..."
kubectl wait --for=condition=Ready pod -n "$PROMETHEUS_NAMESPACE" -l app.kubernetes.io/name=prometheus-operator

echo "Applying prometheus Helm chart..."
${HELM} template prometheus helm/prometheus | sed "s/cert-git-version/cert-${VERSION}/g" | kubectl apply -f -
${HELM} template prometheus helm/prometheus ${PROMETHEUS_VALUES} | sed "s/cert-git-version/cert-${VERSION}/g" | kubectl apply -f -

echo "Waiting for metrics scraper to become ready..."
kubectl wait --for=create pods -n "$PROMETHEUS_NAMESPACE" prometheus-prometheus-0 --timeout=60s
Expand Down
14 changes: 14 additions & 0 deletions helm/prom_experimental.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# experimental values for OLMv1 prometheus
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# Quote the threshold values to avoid the helm templater interpretting them

# List of options to include
options:
operatorController:
thresholds:
memoryGrowth: "200_000"
memoryUsage: "150_000_000"
catalogd:
thresholds:
memoryGrowth: "200_000"
Original file line number Diff line number Diff line change
Expand Up @@ -25,48 +25,48 @@ spec:
- alert: operator-controller-memory-growth
annotations:
description: 'operator-controller pod memory usage growing at a high rate for 5 minutes: {{`{{ $value | humanize }}`}}B/sec'
expr: deriv(sum(container_memory_working_set_bytes{pod=~"operator-controller.*",container="manager"})[5m:]) > 100_000
expr: deriv(sum(container_memory_working_set_bytes{pod=~"operator-controller.*",container="manager"})[5m:]) > {{ .Values.options.operatorController.thresholds.memoryGrowth }}
for: 5m
keep_firing_for: 1d
- alert: catalogd-memory-growth
annotations:
description: 'catalogd pod memory usage growing at a high rate for 5 minutes: {{`{{ $value | humanize }}`}}B/sec'
expr: deriv(sum(container_memory_working_set_bytes{pod=~"catalogd.*",container="manager"})[5m:]) > 100_000
expr: deriv(sum(container_memory_working_set_bytes{pod=~"catalogd.*",container="manager"})[5m:]) > {{ .Values.options.catalogd.thresholds.memoryGrowth }}
for: 5m
keep_firing_for: 1d
- alert: operator-controller-memory-usage
annotations:
description: 'operator-controller pod using high memory resources for the last 5 minutes: {{`{{ $value | humanize }}`}}B'
expr: sum(container_memory_working_set_bytes{pod=~"operator-controller.*",container="manager"}) > 100_000_000
expr: sum(container_memory_working_set_bytes{pod=~"operator-controller.*",container="manager"}) > {{ .Values.options.operatorController.thresholds.memoryUsage }}
for: 5m
keep_firing_for: 1d
- alert: catalogd-memory-usage
annotations:
description: 'catalogd pod using high memory resources for the last 5 minutes: {{`{{ $value | humanize }}`}}B'
expr: sum(container_memory_working_set_bytes{pod=~"catalogd.*",container="manager"}) > 75_000_000
expr: sum(container_memory_working_set_bytes{pod=~"catalogd.*",container="manager"}) > {{ .Values.options.catalogd.thresholds.memoryUsage }}
for: 5m
keep_firing_for: 1d
- alert: operator-controller-cpu-usage
annotations:
description: 'operator-controller using high cpu resource for 5 minutes: {{`{{ $value | printf "%.2f" }}`}}%'
expr: rate(container_cpu_usage_seconds_total{pod=~"operator-controller.*",container="manager"}[5m]) * 100 > 20
expr: rate(container_cpu_usage_seconds_total{pod=~"operator-controller.*",container="manager"}[5m]) * 100 > {{ .Values.options.operatorController.thresholds.cpuUsage }}
for: 5m
keep_firing_for: 1d
- alert: catalogd-cpu-usage
annotations:
description: 'catalogd using high cpu resources for 5 minutes: {{`{{ $value | printf "%.2f" }}`}}%'
expr: rate(container_cpu_usage_seconds_total{pod=~"catalogd.*",container="manager"}[5m]) * 100 > 20
expr: rate(container_cpu_usage_seconds_total{pod=~"catalogd.*",container="manager"}[5m]) * 100 > {{ .Values.options.catalogd.thresholds.cpuUsage }}
for: 5m
keep_firing_for: 1d
- alert: operator-controller-api-call-rate
annotations:
description: 'operator-controller making excessive API calls for 5 minutes: {{`{{ $value | printf "%.2f" }}`}}/sec'
expr: sum(rate(rest_client_requests_total{job=~"operator-controller-service"}[5m])) > 10
expr: sum(rate(rest_client_requests_total{job=~"operator-controller-service"}[5m])) > {{ .Values.options.operatorController.thresholds.apiCallRate }}
for: 5m
keep_firing_for: 1d
- alert: catalogd-api-call-rate
annotations:
description: 'catalogd making excessive API calls for 5 minutes: {{`{{ $value | printf "%.2f" }}`}}/sec'
expr: sum(rate(rest_client_requests_total{job=~"catalogd-service"}[5m])) > 5
expr: sum(rate(rest_client_requests_total{job=~"catalogd-service"}[5m])) > {{ .Values.options.catalogd.thresholds.apiCallRate }}
for: 5m
keep_firing_for: 1d
11 changes: 11 additions & 0 deletions helm/prometheus/values.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
# Default values for OLMv1.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# Quote the threshold values to avoid the helm templater interpretting them

# List of components to include
options:
operatorController:
enabled: true
thresholds:
memoryGrowth: "100_000"
memoryUsage: "100_000_000"
cpuUsage: 20
apiCallRate: 10
catalogd:
enabled: true
thresholds:
memoryGrowth: "100_000"
memoryUsage: "75_000_000"
cpuUsage: 20
apiCallRate: 5

# The set of namespaces
namespaces:
Expand Down