From c01a078ac376687e9823225f5e249b7d33bd714a Mon Sep 17 00:00:00 2001 From: Vlado Djerek Date: Thu, 26 Jun 2025 16:45:53 +0200 Subject: [PATCH 01/14] init ai-starter-kit --- .gitignore | 4 + ai/ai-starter-kit/Makefile | 43 + .../ci/terraform/default_env.tfvars | 9 + ai/ai-starter-kit/ci/terraform/main.tf | 108 +++ ai/ai-starter-kit/ci/terraform/outputs.tf | 15 + ai/ai-starter-kit/ci/terraform/variables.tf | 26 + ai/ai-starter-kit/ci/test_hub.py | 59 ++ ai/ai-starter-kit/cloudbuild.yaml | 208 +++++ .../helm-chart/ai-starter-kit/.helmignore | 23 + .../helm-chart/ai-starter-kit/Chart.yaml | 45 + .../ai-starter-kit/files/chat_bot.ipynb | 312 +++++++ .../ai-starter-kit/files/download_models.py | 17 + .../files/multi-agent-ollama.ipynb | 525 ++++++++++++ .../files/multi-agent-ramalama.ipynb | 466 ++++++++++ .../ai-starter-kit/files/multi-agent.ipynb | 687 +++++++++++++++ .../helm-chart/ai-starter-kit/files/ray.ipynb | 798 ++++++++++++++++++ .../ai-starter-kit/files/requirements.txt | 10 + .../ai-starter-kit/files/welcome.ipynb | 104 +++ .../ai-starter-kit/networkpolicy.yaml | 46 + .../ai-starter-kit/templates/NOTES.txt | 1 + .../ai-starter-kit/templates/_helpers.tpl | 62 ++ .../ai-starter-kit/templates/configmaps.yaml | 18 + .../ai-starter-kit/templates/hf-secret.yaml | 13 + .../ai-starter-kit/templates/local-pv.yaml | 16 + .../ai-starter-kit/templates/pvc-ray.yaml | 28 + .../ai-starter-kit/templates/pvc.yaml | 28 + .../templates/ramalama-deployment.yaml | 49 ++ .../helm-chart/ai-starter-kit/values-gke.yaml | 121 +++ .../helm-chart/ai-starter-kit/values.yaml | 192 +++++ ai/ai-starter-kit/notebooks/multi-agent.ipynb | 621 ++++++++++++++ ai/ai-starter-kit/notebooks/test_ollama.py | 11 + ai/ai-starter-kit/notebooks/test_ray.py | 12 + 32 files changed, 4677 insertions(+) create mode 100644 ai/ai-starter-kit/Makefile create mode 100644 ai/ai-starter-kit/ci/terraform/default_env.tfvars create mode 100644 ai/ai-starter-kit/ci/terraform/main.tf create mode 100644 ai/ai-starter-kit/ci/terraform/outputs.tf create mode 100644 ai/ai-starter-kit/ci/terraform/variables.tf create mode 100644 ai/ai-starter-kit/ci/test_hub.py create mode 100644 ai/ai-starter-kit/cloudbuild.yaml create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/.helmignore create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/Chart.yaml create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/files/chat_bot.ipynb create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ramalama.ipynb create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent.ipynb create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/networkpolicy.yaml create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/NOTES.txt create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/_helpers.tpl create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/local-pv.yaml create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-ray.yaml create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/ramalama-deployment.yaml create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml create mode 100644 ai/ai-starter-kit/notebooks/multi-agent.ipynb create mode 100644 ai/ai-starter-kit/notebooks/test_ollama.py create mode 100644 ai/ai-starter-kit/notebooks/test_ray.py diff --git a/.gitignore b/.gitignore index 6ef5822c8..f8e65abf8 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,7 @@ cscope.* /bazel-* *.pyc + +# Helm chart dependecies cache +**/Chart.lock +**/charts/*.tgz \ No newline at end of file diff --git a/ai/ai-starter-kit/Makefile b/ai/ai-starter-kit/Makefile new file mode 100644 index 000000000..eead31a50 --- /dev/null +++ b/ai/ai-starter-kit/Makefile @@ -0,0 +1,43 @@ +lint: + helm lint helm-chart/ai-starter-kit + +dep_update: + helm dependency update helm-chart/ai-starter-kit + +install: + helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="your_hf_token" --timeout 10m -f helm-chart/ai-starter-kit/values.yaml + +install_gke: + helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="your_hf_token" --timeout 10m -f helm-chart/ai-starter-kit/values-gke.yaml + +start: + mkdir -p /tmp/models-cache + minikube start --cpus 4 --memory 15000 --mount --mount-string="/tmp/models-cache:/tmp/models-cache" + +uninstall: + helm uninstall ai-starter-kit + +destroy: + minikube delete + +validate_jupyterhub: + kubectl get pods; \ + kubectl wait --for=condition=Ready pods -l 'component!=continuous-image-puller' --timeout=1800s; \ + kubectl get pods; \ + kubectl get services; \ + kubectl port-forward service/ai-starter-kit-jupyterhub-proxy-public 8081:80 & \ + PID=$$!; \ + echo "Port-forward PID=$${PID}"; \ + sleep 5s; \ + python3 ./ci/test_hub.py "127.0.0.1:8081"; \ + kill $$PID + +validate_ray: + kubectl wait --for=condition=Ready pods -l 'app.kubernetes.io/created-by=kuberay-operator' --timeout=1800s; \ + kubectl get pods; \ + kubectl get services; \ + kubectl port-forward service/ai-starter-kit-kuberay-head-svc 8265:8265 & \ + PID=$$!; \ + sleep 10s; \ + ray job submit --address=http://127.0.0.1:8265 -- python -c "import ray; ray.init(); print(ray.cluster_resources())"; \ + kill $$PID diff --git a/ai/ai-starter-kit/ci/terraform/default_env.tfvars b/ai/ai-starter-kit/ci/terraform/default_env.tfvars new file mode 100644 index 000000000..573c66bab --- /dev/null +++ b/ai/ai-starter-kit/ci/terraform/default_env.tfvars @@ -0,0 +1,9 @@ +project_id = "" +default_resource_name = "" + +cluster_name = "" # Leave empty to use the default name (default_resource_name) +cluster_location = "us-central1" +private_cluster = false +autopilot_cluster = true + +service_account_name = "" # Leave empty to use the default name diff --git a/ai/ai-starter-kit/ci/terraform/main.tf b/ai/ai-starter-kit/ci/terraform/main.tf new file mode 100644 index 000000000..5ab94ee89 --- /dev/null +++ b/ai/ai-starter-kit/ci/terraform/main.tf @@ -0,0 +1,108 @@ +terraform { + + required_providers { + kubectl = { + source = "gavinbunney/kubectl" + version = ">= 1.19.0" + } + } +} +data "google_client_config" "default" {} + + +data "google_project" "project" { + project_id = var.project_id +} + + +locals { + cluster_name = var.cluster_name != "" ? var.cluster_name : var.default_resource_name +} + +module "gke_cluster" { + source = "github.com/ai-on-gke/common-infra/common/infrastructure?ref=main" + + project_id = var.project_id + cluster_name = local.cluster_name + cluster_location = var.cluster_location + autopilot_cluster = var.autopilot_cluster + private_cluster = var.private_cluster + create_network = false + network_name = "default" + subnetwork_name = "default" + enable_gpu = true + gpu_pools = [ + { + name = "gpu-pool-l4" + machine_type = "g2-standard-24" + node_locations = "us-central1-a" ## comment to autofill node_location based on cluster_location + autoscaling = true + min_count = 1 + max_count = 3 + disk_size_gb = 100 + disk_type = "pd-balanced" + enable_gcfs = true + logging_variant = "DEFAULT" + accelerator_count = 2 + accelerator_type = "nvidia-l4" + gpu_driver_version = "DEFAULT" + } + ] + ray_addon_enabled = false +} + +locals { + #ca_certificate = base64decode(module.gke_cluster.ca_certificate) + cluster_membership_id = var.cluster_membership_id == "" ? local.cluster_name : var.cluster_membership_id + host = var.private_cluster ? "https://connectgateway.googleapis.com/v1/projects/${data.google_project.project.number}/locations/${var.cluster_location}/gkeMemberships/${local.cluster_membership_id}" : "https://${module.gke_cluster.endpoint}" + +} + +provider "kubernetes" { + alias = "ai_starter_kit" + host = local.host + token = data.google_client_config.default.access_token + cluster_ca_certificate = var.private_cluster ? "" : base64decode(module.gke_cluster.ca_certificate) + + dynamic "exec" { + for_each = var.private_cluster ? [1] : [] + content { + api_version = "client.authentication.k8s.io/v1beta1" + command = "gke-gcloud-auth-plugin" + } + } +} + +locals { + service_account_name = var.service_account_name != "" ? var.service_account_name : var.default_resource_name +} + + +module "ai_starter_kit_workload_identity" { + providers = { + kubernetes = kubernetes.ai_starter_kit + } + source = "terraform-google-modules/kubernetes-engine/google//modules/workload-identity" + name = local.service_account_name + namespace = "default" + roles = ["roles/storage.objectUser"] + project_id = var.project_id + depends_on = [module.gke_cluster] +} + +provider "kubectl" { + alias = "ai_starter_kit" + apply_retry_count = 15 + host = local.host + token = data.google_client_config.default.access_token + cluster_ca_certificate = var.private_cluster ? "" : base64decode(module.gke_cluster.ca_certificate) + load_config_file = true + + dynamic "exec" { + for_each = var.private_cluster ? [1] : [] + content { + api_version = "client.authentication.k8s.io/v1beta1" + command = "gke-gcloud-auth-plugin" + } + } +} diff --git a/ai/ai-starter-kit/ci/terraform/outputs.tf b/ai/ai-starter-kit/ci/terraform/outputs.tf new file mode 100644 index 000000000..006f5b55b --- /dev/null +++ b/ai/ai-starter-kit/ci/terraform/outputs.tf @@ -0,0 +1,15 @@ + +output "gke_cluster_name" { + value = local.cluster_name + description = "GKE cluster name" +} + +output "gke_cluster_location" { + value = var.cluster_location + description = "GKE cluster location" +} + +output "project_id" { + value = var.project_id + description = "GKE cluster location" +} diff --git a/ai/ai-starter-kit/ci/terraform/variables.tf b/ai/ai-starter-kit/ci/terraform/variables.tf new file mode 100644 index 000000000..10c4d6390 --- /dev/null +++ b/ai/ai-starter-kit/ci/terraform/variables.tf @@ -0,0 +1,26 @@ +variable "project_id" { + type = string +} +variable "default_resource_name" { + type = string +} +variable "cluster_name" { + type = string +} +variable "cluster_location" { + type = string +} +variable "autopilot_cluster" { + type = bool +} +variable "private_cluster" { + type = bool +} +variable "cluster_membership_id" { + type = string + description = "require to use connectgateway for private clusters, default: cluster_name" + default = "" +} +variable "service_account_name" { + type = string +} diff --git a/ai/ai-starter-kit/ci/test_hub.py b/ai/ai-starter-kit/ci/test_hub.py new file mode 100644 index 000000000..a7a40436b --- /dev/null +++ b/ai/ai-starter-kit/ci/test_hub.py @@ -0,0 +1,59 @@ +import sys +import requests +from packaging.version import Version as V + + +def test_hub_up(hub_url): + r = requests.get(hub_url) + r.raise_for_status() + print("JupyterHub up.") + + +def test_api_root(hub_url): + """ + Tests the hub api's root endpoint (/). The hub's version should be returned. + + A typical jupyterhub logging response to this test: + + [I 2019-09-25 12:03:12.051 JupyterHub log:174] 200 GET /hub/api (test@127.0.0.1) 9.57ms + """ + r = requests.get(hub_url + "/hub/api") + r.raise_for_status() + info = r.json() + version = info["version"] + assert V("4") <= V(version) <= V("5.5"), f"version {version} must be between 4 and 5.5" + print("JupyterHub Rest API is working.") + + +def test_hub_login(hub_url): + """ + Tests the hub dummy authenticator login credentials. Login credentials retrieve + from /jupyter_config/config.yaml. After successfully login, user will be + redirected to /hub/spawn. + """ + username, password = "user", "sneakypass" + session = requests.Session() + + response = session.get(hub_url + "/hub/login") + response.raise_for_status() + + auth_params = {} + if "_xsrf" in session.cookies: + auth_params = {"_xsrf": session.cookies["_xsrf"]} + + response = session.post( + hub_url + "/hub/login", + params=auth_params, + data={"username": username, "password": password}, + allow_redirects=True, + ) + response.raise_for_status() + assert (hub_url + "/hub/spawn-pending/user") in response.url, f"unexpected response url: got {response.url}, expected {hub_url}/hub/spawn-pending/user" + print("JupyterHub login success.") + + +hub_url = "http://" + sys.argv[1] + +test_hub_up(hub_url) +test_api_root(hub_url) +test_hub_login(hub_url) diff --git a/ai/ai-starter-kit/cloudbuild.yaml b/ai/ai-starter-kit/cloudbuild.yaml new file mode 100644 index 000000000..332063066 --- /dev/null +++ b/ai/ai-starter-kit/cloudbuild.yaml @@ -0,0 +1,208 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +steps: +- id: "ai-starter-kit: validate platform" + name: "gcr.io/${PROJECT_ID}/terraform" + dir: "/workspace/ci/terraform" + script: | + terraform init -no-color + terraform validate -no-color + waitFor: ["-"] + +- id: 'ai-starter-kit: create gke cluster' + name: "gcr.io/${PROJECT_ID}/terraform" + dir: "/workspace/ci/terraform" + env: + - "KUBE_LOAD_CONFIG_FILE=false" + entrypoint: 'sh' + args: + - '-c' + - | + set -e + + echo "fail" > /workspace/ai_starter_kit_cluster_result.txt + terraform apply \ + -var-file=default_env.tfvars \ + -var=project_id=$PROJECT_ID \ + -var=default_resource_name=ml-ai-starter-kit-${SHORT_SHA}-${_BUILD_ID} \ + -var=cluster_name=ml-ai-starter-kit-${SHORT_SHA}-${_BUILD_ID}-cluster \ + -var=cluster_location=${_REGION} \ + -var=private_cluster=false \ + -var=autopilot_cluster=${_AUTOPILOT_CLUSTER} \ + -var=service_account_name=ml-ai-starter-kit-${SHORT_SHA}-${_BUILD_ID}-sa \ + -auto-approve -no-color + echo "pass" > /workspace/ai_starter_kit_cluster_result.txt + allowFailure: true + waitFor: ['ai-starter-kit: validate platform'] + +- id: 'ai-starter-kit: generate kubeconfig' + name: 'gcr.io/cloud-builders/gcloud' + args: + - 'container' + - 'clusters' + - 'get-credentials' + - 'ml-ai-starter-kit-${SHORT_SHA}-${_BUILD_ID}-cluster' + - '--region=${_REGION}' + - '--project=${PROJECT_ID}' + waitFor: ['ai-starter-kit: create gke cluster'] + +- id: 'ai-starter-kit: make install_gke' + name: "gcr.io/cloud-builders/kubectl" + env: + - "CLOUDSDK_COMPUTE_ZONE=${_REGION}" + - "CLOUDSDK_CONTAINER_CLUSTER=ml-${SHORT_SHA}-${_BUILD_ID}-cluster" + entrypoint: 'sh' + args: + - '-c' + - | + set -e + + echo "fail" > /workspace/ai_starter_kit_make_install_gke_result.txt + apt update + apt install curl make --assume-yes + curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 + chmod 700 get_helm.sh + /workspace/get_helm.sh + make install_gke + sleep 300s + echo "pass" > /workspace/ai_starter_kit_make_install_gke_result.txt + allowFailure: true + waitFor: ['ai-starter-kit: generate kubeconfig'] + +- id: 'ai-starter-kit: test jupyterhub' + name: "gcr.io/cloud-builders/kubectl" + env: + - "CLOUDSDK_COMPUTE_ZONE=${_REGION}" + - "CLOUDSDK_CONTAINER_CLUSTER=ml-${SHORT_SHA}-${_BUILD_ID}-cluster" + entrypoint: 'sh' + args: + - '-c' + - | + set -e + + echo "fail" > /workspace/ai_starter_kit_jupyterhub_test_result.txt + + apt update + apt install make python3-venv --assume-yes + apt install python3-pip --assume-yes + pip install pyyaml requests packaging + + make validate_jupyterhub + + echo "pass" > /workspace/ai_starter_kit_jupyterhub_test_result.txt + allowFailure: true + waitFor: ['ai-starter-kit: make install_gke'] + +- id: "ai-starter-kit: test ray cluster" + name: "gcr.io/cloud-builders/kubectl" + env: + - "CLOUDSDK_COMPUTE_ZONE=${_REGION}" + - "CLOUDSDK_CONTAINER_CLUSTER=ml-${SHORT_SHA}-${_PR_NUMBER}-${_BUILD_ID}-cluster" + entrypoint: 'sh' + args: + - '-c' + - | + set -e + echo "fail" > /workspace/ai_starter_kit_ray_result.txt + + apt update + apt install make python3-venv --assume-yes + apt install python3-pip --assume-yes + pip install ray==2.41.0 "ray[data,train,tune,serve]" + + make validate_ray + + echo "pass" > /workspace/ai_starter_kit_ray_result.txt + allowFailure: true + waitFor: ['ai-starter-kit: make install_gke'] + +- id: 'ai-starter-kit: cleanup gke cluster' + name: "gcr.io/${PROJECT_ID}/terraform" + dir: "/workspace/ci/terraform" + env: + - "KUBE_LOAD_CONFIG_FILE=false" + entrypoint: 'sh' + args: + - '-c' + - | + set -e + + echo "fail" > /workspace/ai_starter_kit_cleanup_result.txt + terraform destroy \ + -var-file=default_env.tfvars \ + -var=project_id=$PROJECT_ID \ + -var=default_resource_name=ml-ai-starter-kit-${SHORT_SHA}-${_BUILD_ID} \ + -var=cluster_name=ml-ai-starter-kit-${SHORT_SHA}-${_BUILD_ID}-cluster \ + -var=cluster_location=${_REGION} \ + -var=private_cluster=false \ + -var=autopilot_cluster=${_AUTOPILOT_CLUSTER} \ + -var=service_account_name=ml-ai-starter-kit-${SHORT_SHA}-${_BUILD_ID}-sa \ + -auto-approve -no-color + echo "pass" > /workspace/ai_starter_kit_cleanup_result.txt + allowFailure: true + waitFor: ['ai-starter-kit: test jupyterhub', 'ai-starter-kit: test ray cluster'] + +- id: 'check result' + name: "ubuntu" + entrypoint: 'bash' + args: + - '-c' + - | + set -e + + echo "pass" > /workspace/check_result.txt + + if [[ $(cat /workspace/ai_starter_kit_cluster_result.txt) != "pass" ]]; then + echo "ai starter kit cluster creation failed" + echo "error" > /workspace/check_result.txt + fi + + if [[ $(cat /workspace/ai_starter_kit_make_install_gke_result.txt) != "pass" ]]; then + echo "ai starter kit make install_gke failed" + echo "error" > /workspace/check_result.txt + fi + + if [[ $(cat /workspace/ai_starter_kit_jupyterhub_test_result.txt) != "pass" ]]; then + echo "ai starter kit jupyterhub test failed" + echo "error" > /workspace/check_result.txt + fi + + if [[ $(cat /workspace/ai_starter_kit_ray_result.txt) != "pass" ]]; then + echo "ai starter kit ray test failed" + echo "error" > /workspace/check_result.txt + fi + + if [[ $(cat /workspace/ai_starter_kit_cleanup_result.txt) != "pass" ]]; then + echo "ai starter kit clean up failed" + echo "error" > /workspace/check_result.txt + fi + + if [[ $(cat /workspace/check_result.txt) != "pass" ]]; then + cat /workspace/check_result.txt + exit 1 + fi + waitFor: ['ai-starter-kit: cleanup gke cluster'] + +substitutions: + _AUTOPILOT_CLUSTER: "true" + _REGION: us-east4 + # _USER_NAME: github + _BUILD_ID: "1234567" #${BUILD_ID:0:8} +# logsBucket: gs://ai-on-gke-qss-build-logs +options: + logging: CLOUD_LOGGING_ONLY + substitutionOption: "ALLOW_LOOSE" + machineType: "E2_HIGHCPU_8" +timeout: 5400s diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/.helmignore b/ai/ai-starter-kit/helm-chart/ai-starter-kit/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/Chart.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/Chart.yaml new file mode 100644 index 000000000..64f9e5f13 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/Chart.yaml @@ -0,0 +1,45 @@ +apiVersion: v2 +name: ai-starter-kit +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" + + +dependencies: + - name: kuberay-operator + condition: ray-cluster.enabled + version: "1.3.0" + repository: "https://ray-project.github.io/kuberay-helm" + - condition: ray-cluster.enabled + name: ray-cluster + version: "1.3.0" + repository: "https://ray-project.github.io/kuberay-helm" + - name: jupyterhub + version: "4.2.0" + repository: "https://hub.jupyter.org/helm-chart/" + - name: mlflow + version: "0.12.0" + repository: "https://community-charts.github.io/helm-charts" + - name: ollama + condition: ollama.enabled + version: "1.27.0" + repository: "https://helm.otwld.com" diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/chat_bot.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/chat_bot.ipynb new file mode 100644 index 000000000..0834cf6c3 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/chat_bot.ipynb @@ -0,0 +1,312 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "e9e3dd59-b4d9-4de5-a6aa-a72d1480ac77", + "metadata": {}, + "outputs": [], + "source": [ + "from ollama import Client\n", + "\n", + "client = Client(\n", + " host='http://ai-starter-kit-ollama:11434',\n", + " headers={'x-some-header': 'some-value'}\n", + ")\n", + "\n", + "def get_response(prompt):\n", + " response = client.chat(model='gemma3', messages=[\n", + " {\n", + " 'role': 'user',\n", + " 'content': prompt,\n", + " },\n", + " ])\n", + " return response.message.content" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "dd1513d4-18c5-46d7-8260-f90be004d315", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": "(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n const py_version = '3.7.3'.replace('rc', '-rc.').replace('.dev', '-dev.');\n const reloading = false;\n const Bokeh = root.Bokeh;\n\n // Set a timeout for this load but only if we are not already initializing\n if (typeof (root._bokeh_timeout) === \"undefined\" || (force || !root._bokeh_is_initializing)) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n // Don't load bokeh if it is still initializing\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n } else if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n // There is nothing to load\n run_callbacks();\n return null;\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error(e) {\n const src_el = e.srcElement\n console.error(\"failed to load \" + (src_el.href || src_el.src));\n }\n\n const skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n root._bokeh_is_loading = css_urls.length + 0;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n const existing_stylesheets = []\n const links = document.getElementsByTagName('link')\n for (let i = 0; i < links.length; i++) {\n const link = links[i]\n if (link.href != null) {\n existing_stylesheets.push(link.href)\n }\n }\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const escaped = encodeURI(url)\n if (existing_stylesheets.indexOf(escaped) !== -1) {\n on_load()\n continue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } var existing_scripts = []\n const scripts = document.getElementsByTagName('script')\n for (let i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n existing_scripts.push(script.src)\n }\n }\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const escaped = encodeURI(url)\n if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n if (!window.requirejs) {\n on_load();\n }\n continue;\n }\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (let i = 0; i < js_modules.length; i++) {\n const url = js_modules[i];\n const escaped = encodeURI(url)\n if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n if (!window.requirejs) {\n on_load();\n }\n continue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n const url = js_exports[name];\n const escaped = encodeURI(url)\n if (skip.indexOf(escaped) >= 0 || root[name] != null) {\n if (!window.requirejs) {\n on_load();\n }\n continue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.holoviz.org/panel/1.7.5/dist/bundled/reactiveesm/es-module-shims@^1.10.0/dist/es-module-shims.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-3.7.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.7.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.7.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.7.3.min.js\", \"https://cdn.holoviz.org/panel/1.7.5/dist/panel.min.js\"];\n const js_modules = [];\n const js_exports = {};\n const css_urls = [];\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (let i = 0; i < inline_js.length; i++) {\n try {\n inline_js[i].call(root, root.Bokeh);\n } catch(e) {\n if (!reloading) {\n throw e;\n }\n }\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n var NewBokeh = root.Bokeh;\n if (Bokeh.versions === undefined) {\n Bokeh.versions = new Map();\n }\n if (NewBokeh.version !== Bokeh.version) {\n Bokeh.versions.set(NewBokeh.version, NewBokeh)\n }\n root.Bokeh = Bokeh;\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n // If the timeout and bokeh was not successfully loaded we reset\n // everything and try loading again\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n root._bokeh_is_loading = 0\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n const bokeh_loaded = root.Bokeh != null && (root.Bokeh.version === py_version || (root.Bokeh.versions !== undefined && root.Bokeh.versions.has(py_version)));\n if (!reloading && !bokeh_loaded) {\n if (root.Bokeh) {\n root.Bokeh = undefined;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));", + "application/vnd.holoviews_load.v0+json": "" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n let retries = 0;\n const open = () => {\n if (comm.active) {\n comm.open();\n } else if (retries > 3) {\n console.warn('Comm target never activated')\n } else {\n retries += 1\n setTimeout(open, 500)\n }\n }\n if (comm.active) {\n comm.open();\n } else {\n setTimeout(open, 500)\n }\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n })\n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n", + "application/vnd.holoviews_load.v0+json": "" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ] + }, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "b6fd14e0-f8d2-46e7-9c4d-722893d04d7e" + } + }, + "output_type": "display_data" + }, + { + "data": {}, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ], + "text/plain": [ + "Column\n", + " [0] TextInput(placeholder='Enter text here…')\n", + " [1] Row\n", + " [0] Button(name='Chat!')\n", + " [2] ParamFunction(function, _pane=Column, defer_load=False, height=300, loading_indicator=True, sizing_mode='fixed', width=300)" + ] + }, + "execution_count": 2, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "2854d6b0-689d-4dc0-8861-1834489708e9" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "import panel as pn # GUI\n", + "pn.extension()\n", + "\n", + "panels = [] # collect display \n", + "context = [ ] # accumulate messages\n", + "\n", + "\n", + "def collect_messages(_):\n", + " prompt = inp.value_input\n", + " inp.value = ''\n", + " if (not prompt):\n", + " return pn.Column(*panels)\n", + "\n", + " response = get_response(prompt)\n", + " context.append({'role':'user', 'content':f\"{prompt}\"})\n", + " context.append({'role':'assistant', 'content':f\"{response}\"})\n", + " panels.append(\n", + " pn.Row('User:', pn.pane.Markdown(prompt, width=600)))\n", + " panels.append(\n", + " pn.Row('Assistant:', pn.pane.Markdown(response, width=600)))\n", + " \n", + " return pn.Column(*panels)\n", + "\n", + "\n", + "inp = pn.widgets.TextInput(value=\"Hi\", placeholder='Enter text here…')\n", + "button_conversation = pn.widgets.Button(name=\"Chat!\")\n", + "interactive_conversation = pn.bind(collect_messages, button_conversation)\n", + "dashboard = pn.Column(\n", + " inp,\n", + " pn.Row(button_conversation),\n", + " pn.panel(interactive_conversation, loading_indicator=True, height=300, width=300),\n", + ")\n", + "\n", + "dashboard" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py new file mode 100644 index 000000000..aee733734 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py @@ -0,0 +1,17 @@ +from huggingface_hub import snapshot_download + +# --- Model Download --- +# List your desired Hugging Face model names here +model_names = [ + "Qwen/Qwen3-Embedding-0.6B", +] + +for model_name in model_names: + print(f"--- Downloading {model_name} ---") + try: + snapshot_download(repo_id=model_name) + print(f"Successfully cached {model_name}") + except Exception as e: + print(f"Failed to download {model_name}. Error: {e}") + +print("--- Model download process finished. ---") diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb new file mode 100644 index 000000000..c8f8040c5 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb @@ -0,0 +1,525 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "079fadd2-200e-4d37-8ae2-be2792e3a24e", + "metadata": {}, + "source": [ + "### Cell 1 - Install RamaLama and verify environment\n", + "\n", + "Installs RamaLama for local model serving, sets up environment variables, and verifies the installation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79db57cd-fb72-4b10-b0fb-5e9cd5c007b6", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install ollama requests --disable-pip-version-check\n", + "\n", + "import os, subprocess, time, json, requests\n", + "from pathlib import Path\n", + "\n", + "os.environ['OLLAMA_HOST'] = os.getenv('OLLAMA_HOST', 'http://ai-starter-kit-ollama:11434')\n", + "MODEL_NAME = \"qwen2.5:1.5b\"\n", + "MLFLOW_URI = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n", + "\n", + "OLLAMA_HOST = os.environ['OLLAMA_HOST']\n", + "\n", + "print(\"Environment Configuration:\")\n", + "print(\"Ollama Host:\", OLLAMA_HOST)\n", + "print(\"Model: \", MODEL_NAME)\n", + "print(\"MLflow: \", MLFLOW_URI)\n", + "print(\"-\" * 60)\n", + "\n", + "try:\n", + " r = requests.get(f\"{OLLAMA_HOST}/api/version\", timeout=5)\n", + " print(\"Ollama version:\", r.json())\n", + "except Exception as e:\n", + " print(\"Note: Ollama service not running. Starting it in next cell...\")" + ] + }, + { + "cell_type": "markdown", + "id": "fe862173-fd9a-41ae-a27b-63875f788024", + "metadata": {}, + "source": [ + "### Cell 2 - Start Ollama service and pull model\n", + "\n", + "Starts the Ollama service if not running, pulls the Qwen 2.5 1.5B model, and verifies it's ready." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34da3e26-6276-48b7-b3ac-c90359df6547", + "metadata": {}, + "outputs": [], + "source": [ + "import subprocess, time, requests, os\n", + "\n", + "OLLAMA_HOST = os.environ.get('OLLAMA_HOST', 'http://ai-starter-kit-ollama:11434')\n", + "MODEL_NAME = \"qwen2.5:1.5b\"\n", + "\n", + "def check_ollama():\n", + " try:\n", + " r = requests.get(f\"{OLLAMA_HOST}/api/tags\", timeout=2)\n", + " return r.status_code == 200\n", + " except:\n", + " return False\n", + "\n", + "if not check_ollama() and OLLAMA_HOST.startswith(\"http://ai-starter-kit-ollama\"):\n", + " print(\"Starting Ollama service...\")\n", + " try:\n", + " subprocess.Popen([\"ollama\", \"serve\"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)\n", + " time.sleep(3)\n", + " except Exception as e:\n", + " print(f\"Could not start Ollama automatically: {e}\")\n", + " print(\"Please start Ollama manually with: ollama serve\")\n", + "\n", + "if check_ollama():\n", + " print(\"Ollama service is running\")\n", + " \n", + " print(f\"\\nPulling model {MODEL_NAME}...\")\n", + " try:\n", + " r = requests.get(f\"{OLLAMA_HOST}/api/tags\")\n", + " models = r.json().get('models', [])\n", + " model_exists = any(m.get('name') == MODEL_NAME for m in models)\n", + " \n", + " if not model_exists:\n", + " pull_data = {\"name\": MODEL_NAME}\n", + " r = requests.post(f\"{OLLAMA_HOST}/api/pull\", json=pull_data, stream=True)\n", + " for line in r.iter_lines():\n", + " if line:\n", + " try:\n", + " status = json.loads(line)\n", + " if 'status' in status:\n", + " print(f\" {status['status']}\", end='\\r')\n", + " except:\n", + " pass\n", + " print(f\"\\nModel {MODEL_NAME} pulled successfully\")\n", + " else:\n", + " print(f\"Model {MODEL_NAME} already available\")\n", + " except Exception as e:\n", + " print(f\"Error pulling model: {e}\")\n", + "else:\n", + " print(\"Warning: Ollama service is not running\")\n", + " print(\"Please ensure Ollama is installed and running\")" + ] + }, + { + "cell_type": "markdown", + "id": "8111d705-595e-4e65-8479-bdc76191fa31", + "metadata": {}, + "source": [ + "### Cell 3 - Create OpenAI-compatible API wrapper\n", + "\n", + "Sets up a simple FastAPI server that wraps Ollama with an OpenAI-compatible API, including MLflow tracking." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbea1539-e9ab-460a-9cfc-20a42807f616", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install fastapi uvicorn mlflow --disable-pip-version-check\n", + "\n", + "import os, subprocess, time, json, requests, threading\n", + "from pathlib import Path\n", + "\n", + "api_wrapper_code = '''\n", + "import os, time, uuid, requests, json\n", + "from fastapi import FastAPI, Request\n", + "from fastapi.responses import JSONResponse\n", + "import uvicorn\n", + "\n", + "USE_MLFLOW = False\n", + "try:\n", + " import mlflow\n", + " mlflow_uri = os.getenv(\"MLFLOW_TRACKING_URI\")\n", + " if mlflow_uri:\n", + " mlflow.set_tracking_uri(mlflow_uri)\n", + " mlflow.set_experiment(\"ramalama-llm\")\n", + " USE_MLFLOW = True\n", + "except:\n", + " pass\n", + "\n", + "app = FastAPI()\n", + "OLLAMA_HOST = os.getenv(\"OLLAMA_HOST\", \"http://ai-starter-kit-ollama:11434\")\n", + "MODEL_NAME = os.getenv(\"MODEL_NAME\", \"qwen2.5:1.5b\")\n", + "\n", + "@app.get(\"/v1/healthz\")\n", + "async def health():\n", + " return {\"status\": \"ok\", \"model\": MODEL_NAME}\n", + "\n", + "@app.post(\"/v1/chat/completions\")\n", + "async def chat_completions(request: Request):\n", + " t0 = time.time()\n", + " body = await request.json()\n", + " \n", + " messages = body.get(\"messages\", [])\n", + " temperature = body.get(\"temperature\", 0.7)\n", + " max_tokens = body.get(\"max_tokens\", 256)\n", + " \n", + " # Call Ollama API\n", + " ollama_payload = {\n", + " \"model\": MODEL_NAME,\n", + " \"messages\": messages,\n", + " \"stream\": False,\n", + " \"options\": {\n", + " \"temperature\": temperature,\n", + " \"num_predict\": max_tokens\n", + " }\n", + " }\n", + " \n", + " try:\n", + " r = requests.post(f\"{OLLAMA_HOST}/api/chat\", json=ollama_payload, timeout=120)\n", + " r.raise_for_status()\n", + " ollama_response = r.json()\n", + " \n", + " content = ollama_response.get(\"message\", {}).get(\"content\", \"\")\n", + " prompt_tokens = len(\" \".join(m.get(\"content\", \"\") for m in messages).split())\n", + " completion_tokens = len(content.split())\n", + " \n", + " if USE_MLFLOW:\n", + " try:\n", + " with mlflow.start_run():\n", + " mlflow.log_params({\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"model\": MODEL_NAME\n", + " })\n", + " mlflow.log_metrics({\n", + " \"duration_ms\": int((time.time() - t0) * 1000),\n", + " \"prompt_tokens_approx\": prompt_tokens,\n", + " \"completion_tokens_approx\": completion_tokens,\n", + " \"total_tokens_approx\": prompt_tokens + completion_tokens\n", + " })\n", + " except:\n", + " pass\n", + " \n", + " return {\n", + " \"id\": \"chatcmpl-\" + uuid.uuid4().hex[:8],\n", + " \"object\": \"chat.completion\",\n", + " \"created\": int(time.time()),\n", + " \"model\": MODEL_NAME,\n", + " \"choices\": [{\n", + " \"index\": 0,\n", + " \"message\": {\"role\": \"assistant\", \"content\": content},\n", + " \"finish_reason\": \"stop\"\n", + " }],\n", + " \"usage\": {\n", + " \"prompt_tokens\": prompt_tokens,\n", + " \"completion_tokens\": completion_tokens,\n", + " \"total_tokens\": prompt_tokens + completion_tokens\n", + " }\n", + " }\n", + " except Exception as e:\n", + " return JSONResponse(status_code=500, content={\"error\": str(e)})\n", + "\n", + "if __name__ == \"__main__\":\n", + " uvicorn.run(app, host=\"0.0.0.0\", port=8000)\n", + "'''\n", + "\n", + "with open('/tmp/ollama_wrapper.py', 'w') as f:\n", + " f.write(api_wrapper_code)\n", + "\n", + "!pkill -f ollama_wrapper.py 2>/dev/null || true\n", + "\n", + "env_vars = f\"\"\"\n", + "export OLLAMA_HOST=\"{os.getenv('OLLAMA_HOST', 'http://ai-starter-kit-ollama:11434')}\"\n", + "export MODEL_NAME=\"qwen2.5:1.5b\"\n", + "export MLFLOW_TRACKING_URI=\"{os.getenv('MLFLOW_TRACKING_URI', 'http://ai-starter-kit-mlflow:5000')}\"\n", + "\"\"\"\n", + "\n", + "!echo '{env_vars}' > /tmp/env_vars.sh\n", + "!bash -c 'source /tmp/env_vars.sh && nohup python /tmp/ollama_wrapper.py > /tmp/wrapper.log 2>&1 &'\n", + "\n", + "print(\"Starting API wrapper...\")\n", + "for i in range(30):\n", + " time.sleep(1)\n", + " try:\n", + " r = requests.get(\"http://localhost:8000/v1/healthz\", timeout=1)\n", + " if r.status_code == 200:\n", + " print(\"API Status:\", r.json())\n", + " print(f\"\\nOpenAI-compatible API running at: http://localhost:8000/v1\")\n", + " print(f\"Health: http://localhost:8000/v1/healthz\")\n", + " print(f\"Chat: http://localhost:8000/v1/chat/completions\")\n", + " break\n", + " except:\n", + " if i % 5 == 0:\n", + " print(f\" Waiting for API to start... ({i}s)\")\n", + " continue\n", + "else:\n", + " print(\"\\nAPI wrapper failed to start. Checking logs:\")\n", + " !tail -20 /tmp/wrapper.log\n", + " print(\"\\nYou can still use direct Ollama API in the next cells.\")" + ] + }, + { + "cell_type": "markdown", + "id": "a411c015-c802-4ca1-81bb-3f4790d9626a", + "metadata": {}, + "source": [ + "### Cell 4 - Basic client + latency test\n", + "\n", + "Tests the OpenAI-compatible API with a simple chat request and measures latency." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3be634e2-a82f-42c9-8e31-57e6868a86ee", + "metadata": {}, + "outputs": [], + "source": [ + "import os, time, requests, json\n", + "\n", + "USE_WRAPPER = True\n", + "BASE_URL = \"http://localhost:8000/v1\" if USE_WRAPPER else os.getenv(\"OLLAMA_HOST\", \"http://ai-starter-kit-ollama:11434\")\n", + "\n", + "def health():\n", + " if USE_WRAPPER:\n", + " r = requests.get(f\"{BASE_URL}/healthz\", timeout=10)\n", + " print(\"Health:\", r.status_code, r.json())\n", + " else:\n", + " r = requests.get(f\"{BASE_URL}/api/tags\", timeout=10)\n", + " print(\"Health:\", r.status_code, \"Models available:\", len(r.json().get('models', [])))\n", + "\n", + "def chat(prompt, temperature=0.4, max_tokens=220):\n", + " if USE_WRAPPER:\n", + " body = {\n", + " \"model\": \"qwen2.5:1.5b\",\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"messages\": [\n", + " {\"role\": \"system\", \"content\": \"You are a helpful assistant. Be concise.\"},\n", + " {\"role\": \"user\", \"content\": prompt},\n", + " ]\n", + " }\n", + " endpoint = f\"{BASE_URL}/chat/completions\"\n", + " else:\n", + " body = {\n", + " \"model\": \"qwen2.5:1.5b\",\n", + " \"messages\": [\n", + " {\"role\": \"system\", \"content\": \"You are a helpful assistant. Be concise.\"},\n", + " {\"role\": \"user\", \"content\": prompt},\n", + " ],\n", + " \"stream\": False,\n", + " \"options\": {\n", + " \"temperature\": temperature,\n", + " \"num_predict\": max_tokens\n", + " }\n", + " }\n", + " endpoint = f\"{BASE_URL}/api/chat\"\n", + " \n", + " t0 = time.time()\n", + " r = requests.post(endpoint, json=body, timeout=120)\n", + " dt = time.time() - t0\n", + " r.raise_for_status()\n", + " \n", + " if USE_WRAPPER:\n", + " response = r.json()\n", + " content = response[\"choices\"][0][\"message\"][\"content\"]\n", + " usage = response.get(\"usage\", {})\n", + " else:\n", + " response = r.json()\n", + " content = response.get(\"message\", {}).get(\"content\", \"\")\n", + " usage = {\"total_tokens\": \"estimated: \" + str(len(content.split()) + len(prompt.split()))}\n", + " \n", + " print(f\"\\nLatency: {dt:.2f}s | usage: {usage}\")\n", + " print(\"\\n---\\n\", content)\n", + " return content\n", + "\n", + "health()\n", + "_ = chat(\"Say 'test ok' then give me one short fun fact about llamas.\")" + ] + }, + { + "cell_type": "markdown", + "id": "553d2756-8949-43e3-8342-71387688e0fa", + "metadata": {}, + "source": [ + "### Cell 5 - Multi-agent pipeline\n", + "\n", + "Implements a simple three-agent workflow (Researcher -> Writer -> Critic) using the local LLM." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f6713f3-8b60-40b2-ad3c-ebf6db4f66e1", + "metadata": {}, + "outputs": [], + "source": [ + "import os, requests, json, time\n", + "\n", + "BASE_URL = \"http://localhost:8000/v1\" \n", + "OLLAMA_DIRECT = os.getenv(\"OLLAMA_HOST\", \"http://ai-starter-kit-ollama:11434\")\n", + "\n", + "def call_llm(role_prompt, user_message, temperature=0.4, max_tokens=150, use_wrapper=True):\n", + " if use_wrapper:\n", + " body = {\n", + " \"model\": \"qwen2.5:1.5b\",\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"messages\": [\n", + " {\"role\": \"system\", \"content\": role_prompt},\n", + " {\"role\": \"user\", \"content\": user_message}\n", + " ]\n", + " }\n", + " try:\n", + " r = requests.post(f\"{BASE_URL}/chat/completions\", json=body, timeout=120)\n", + " r.raise_for_status()\n", + " return r.json()[\"choices\"][0][\"message\"][\"content\"]\n", + " except Exception as e:\n", + " return f\"Error: {e}\"\n", + " else:\n", + " body = {\n", + " \"model\": \"qwen2.5:1.5b\",\n", + " \"messages\": [\n", + " {\"role\": \"system\", \"content\": role_prompt},\n", + " {\"role\": \"user\", \"content\": user_message}\n", + " ],\n", + " \"stream\": False,\n", + " \"options\": {\n", + " \"temperature\": temperature,\n", + " \"num_predict\": max_tokens\n", + " }\n", + " }\n", + " try:\n", + " r = requests.post(f\"{OLLAMA_DIRECT}/api/chat\", json=body, timeout=120)\n", + " r.raise_for_status()\n", + " return r.json().get(\"message\", {}).get(\"content\", \"\")\n", + " except Exception as e:\n", + " return f\"Error: {e}\"\n", + "\n", + "print(\"=\" * 60)\n", + "print(\"Running Multi-Agent Workflow with RamaLama/Ollama\")\n", + "print(\"=\" * 60)\n", + "\n", + "task = \"Research the latest advancements in quantum computing as of 2025.\"\n", + "\n", + "try:\n", + " r = requests.get(f\"{BASE_URL}/healthz\", timeout=2)\n", + " use_wrapper = r.status_code == 200\n", + " print(\"Using: OpenAI-compatible wrapper\\n\")\n", + "except:\n", + " use_wrapper = False\n", + " print(\"Using: Direct Ollama API\\n\")\n", + "\n", + "print(\"1. RESEARCHER:\")\n", + "print(\"-\" * 40)\n", + "research_prompt = \"You are a researcher. Provide 3-4 key facts about the topic. Be concise and factual.\"\n", + "research_notes = call_llm(research_prompt, task, temperature=0.35, max_tokens=140, use_wrapper=use_wrapper)\n", + "print(research_notes)\n", + "time.sleep(1)\n", + "\n", + "print(\"\\n2. WRITER:\")\n", + "print(\"-\" * 40)\n", + "writer_prompt = \"You are a technical writer. Based on the following notes, write a brief report.\"\n", + "writer_task = f\"Write a report based on these notes:\\n{research_notes}\"\n", + "report = call_llm(writer_prompt, writer_task, temperature=0.55, max_tokens=220, use_wrapper=use_wrapper)\n", + "print(report)\n", + "time.sleep(1)\n", + "\n", + "print(\"\\n3. CRITIC/EDITOR:\")\n", + "print(\"-\" * 40)\n", + "critic_prompt = \"You are an editor. Review the report and provide a final polished version.\"\n", + "critic_task = f\"Review and improve this report:\\n{report}\"\n", + "final_output = call_llm(critic_prompt, critic_task, temperature=0.45, max_tokens=160, use_wrapper=use_wrapper)\n", + "print(final_output)\n", + "\n", + "print(\"\\n\" + \"=\" * 60)\n", + "print(\"Multi-agent workflow complete\")\n", + "print(\"=\" * 60)" + ] + }, + { + "cell_type": "markdown", + "id": "0af596cf-5ba6-42df-a030-61d7a20d6f7b", + "metadata": {}, + "source": [ + "### Cell 6 - MLFlow: connect to tracking server and list recent runs\n", + "\n", + "Connects to MLflow tracking server and displays recent model inference runs with metrics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03a1b042-04df-4cd0-9099-4cc763ecfe9d", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install mlflow==2.14.3 --disable-pip-version-check\n", + "\n", + "import os, mlflow\n", + "from datetime import datetime\n", + "\n", + "tracking_uri = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n", + "mlflow.set_tracking_uri(tracking_uri)\n", + "print(f\"MLflow Tracking URI: {tracking_uri}\")\n", + "\n", + "exp_name = \"ramalama-llm\"\n", + "exp = mlflow.set_experiment(exp_name)\n", + "print(f\"Experiment: {exp.name} (ID: {exp.experiment_id})\")\n", + "print(\"-\" * 60)\n", + "\n", + "client = mlflow.tracking.MlflowClient()\n", + "runs = client.search_runs(\n", + " exp.experiment_id,\n", + " order_by=[\"attributes.start_time DESC\"],\n", + " max_results=10\n", + ")\n", + "\n", + "if not runs:\n", + " print(\"No runs found. Run cells 4 or 5 first to generate inference requests.\")\n", + "else:\n", + " print(f\"\\nFound {len(runs)} recent runs:\")\n", + " print(\"-\" * 60)\n", + " \n", + " for i, run in enumerate(runs, 1):\n", + " start_time = datetime.fromtimestamp(run.info.start_time/1000).strftime('%Y-%m-%d %H:%M:%S')\n", + " duration = run.data.metrics.get('duration_ms', 'N/A')\n", + " temp = run.data.params.get('temperature', 'N/A')\n", + " max_tokens = run.data.params.get('max_tokens', 'N/A')\n", + " total_tokens = run.data.metrics.get('total_tokens_approx', 'N/A')\n", + " \n", + " print(f\"\\nRun {i}:\")\n", + " print(f\" ID: {run.info.run_id[:12]}...\")\n", + " print(f\" Time: {start_time}\")\n", + " print(f\" Status: {run.info.status}\")\n", + " print(f\" Temperature: {temp}\")\n", + " print(f\" Max Tokens: {max_tokens}\")\n", + " print(f\" Duration: {duration} ms\")\n", + " print(f\" Total Tokens: {total_tokens}\")\n", + " \n", + " print(\"\\n\" + \"=\" * 60)\n", + " print(\"SUMMARY:\")\n", + " successful = sum(1 for r in runs if r.info.status == 'FINISHED')\n", + " durations = [r.data.metrics.get('duration_ms', 0) for r in runs if r.data.metrics.get('duration_ms')]\n", + " avg_duration = sum(durations) / len(durations) if durations else 0\n", + " \n", + " print(f\" Total Runs: {len(runs)}\")\n", + " print(f\" Successful: {successful}\")\n", + " print(f\" Failed: {len(runs) - successful}\")\n", + " print(f\" Avg Duration: {avg_duration:.1f} ms\" if avg_duration else \" Avg Duration: N/A\")\n", + "\n", + "print(\"\\n\" + \"=\" * 60)\n", + "print(\"MLflow verification complete\")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ramalama.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ramalama.ipynb new file mode 100644 index 000000000..07aff13cc --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ramalama.ipynb @@ -0,0 +1,466 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "079fadd2-200e-4d37-8ae2-be2792e3a24e", + "metadata": {}, + "source": [ + "### Cell 1 - Install RamaLama and verify environment\n", + "\n", + "Installs RamaLama for local model serving, sets up environment variables, and verifies the installation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79db57cd-fb72-4b10-b0fb-5e9cd5c007b6", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install requests --disable-pip-version-check\n", + "\n", + "import os, time, json, requests\n", + "from pathlib import Path\n", + "\n", + "os.environ['RAMALAMA_HOST'] = 'http://ai-starter-kit-ramalama:8080'\n", + "MODEL_NAME = \"qwen2.5:1.5b\"\n", + "MLFLOW_URI = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n", + "\n", + "RAMALAMA_HOST = os.environ['RAMALAMA_HOST']\n", + "\n", + "print(\"Environment Configuration:\")\n", + "print(\"RamaLama Host:\", RAMALAMA_HOST)\n", + "print(\"Model: \", MODEL_NAME)\n", + "print(\"MLflow: \", MLFLOW_URI)\n", + "print(\"-\" * 60)\n", + "\n", + "try:\n", + " r = requests.get(f\"{RAMALAMA_HOST}/v1/models\", timeout=5)\n", + " print(\"RamaLama models:\", r.json())\n", + "except Exception as e:\n", + " print(f\"Error connecting to RamaLama: {e}\")" + ] + }, + { + "cell_type": "markdown", + "id": "fe862173-fd9a-41ae-a27b-63875f788024", + "metadata": {}, + "source": [ + "### Cell 2 - Start RamaLama service and pull model\n", + "\n", + "Starts the RamaLama service if not running, pulls the Qwen 2.5 1.5B model, and verifies it's ready." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34da3e26-6276-48b7-b3ac-c90359df6547", + "metadata": {}, + "outputs": [], + "source": [ + "import requests, os, json\n", + "\n", + "RAMALAMA_HOST = os.environ.get('RAMALAMA_HOST', 'http://ai-starter-kit-ramalama:8080')\n", + "MODEL_NAME = \"qwen2.5:1.5b\"\n", + "\n", + "def check_ramalama():\n", + " try:\n", + " r = requests.get(f\"{RAMALAMA_HOST}/v1/models\", timeout=2)\n", + " return r.status_code == 200\n", + " except:\n", + " return False\n", + "\n", + "if check_ramalama():\n", + " print(\"RamaLama service is running\")\n", + " \n", + " try:\n", + " r = requests.get(f\"{RAMALAMA_HOST}/v1/models\")\n", + " models = r.json().get('data', [])\n", + " model_exists = any(m.get('id') == MODEL_NAME for m in models) \n", + " if model_exists:\n", + " print(f\"Model {MODEL_NAME} already available\")\n", + " else:\n", + " print(f\"Model {MODEL_NAME} not found; ensure it's pulled in the deployment\")\n", + " except Exception as e:\n", + " print(f\"Error checking model: {e}\")\n", + "else:\n", + " print(\"Warning: RamaLama service is not running\")\n", + " print(\"Please ensure the deployment is healthy\")" + ] + }, + { + "cell_type": "markdown", + "id": "8111d705-595e-4e65-8479-bdc76191fa31", + "metadata": {}, + "source": [ + "### Cell 3 - Create OpenAI-compatible API wrapper\n", + "\n", + "Sets up a simple FastAPI server that wraps RamaLama with an OpenAI-compatible API, including MLflow tracking. Since RamaLama already provides OpenAI compatibility, this acts as a proxy with logging." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbea1539-e9ab-460a-9cfc-20a42807f616", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install fastapi uvicorn mlflow --disable-pip-version-check\n", + "\n", + "import os, threading, time, json\n", + "from pathlib import Path\n", + "\n", + "api_wrapper_code = '''\n", + "import os, time, uuid, requests, json\n", + "from fastapi import FastAPI, Request\n", + "from fastapi.responses import JSONResponse\n", + "import uvicorn\n", + "\n", + "USE_MLFLOW = False\n", + "try:\n", + " import mlflow\n", + " mlflow_uri = os.getenv(\"MLFLOW_TRACKING_URI\")\n", + " if mlflow_uri:\n", + " mlflow.set_tracking_uri(mlflow_uri)\n", + " mlflow.set_experiment(\"ramalama-llm\")\n", + " USE_MLFLOW = True\n", + "except:\n", + " pass\n", + "\n", + "app = FastAPI()\n", + "RAMALAMA_HOST = os.getenv(\"RAMALAMA_HOST\", \"http://127.0.0.1:8080\")\n", + "MODEL_NAME = os.getenv(\"MODEL_NAME\", \"qwen2.5:1.5b\")\n", + "\n", + "@app.get(\"/v1/healthz\")\n", + "async def health():\n", + " return {\"status\": \"ok\", \"model\": MODEL_NAME}\n", + "\n", + "@app.post(\"/v1/chat/completions\")\n", + "async def chat_completions(request: Request):\n", + " t0 = time.time()\n", + " body = await request.json()\n", + " \n", + " messages = body.get(\"messages\", [])\n", + " temperature = body.get(\"temperature\", 0.7)\n", + " max_tokens = body.get(\"max_tokens\", 256)\n", + " \n", + " payload = {\n", + " \"model\": MODEL_NAME,\n", + " \"messages\": messages,\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"stream\": False\n", + " }\n", + " \n", + " try:\n", + " r = requests.post(f\"{RAMALAMA_HOST}/v1/chat/completions\", json=payload, timeout=120)\n", + " r.raise_for_status()\n", + " response = r.json()\n", + " \n", + " content = response[\"choices\"][0][\"message\"][\"content\"]\n", + " usage = response.get(\"usage\", {})\n", + " prompt_tokens = usage.get(\"prompt_tokens\", len(\" \".join(m.get(\"content\", \"\") for m in messages).split()))\n", + " completion_tokens = usage.get(\"completion_tokens\", len(content.split()))\n", + " total_tokens = prompt_tokens + completion_tokens\n", + " \n", + " if USE_MLFLOW:\n", + " try:\n", + " with mlflow.start_run():\n", + " mlflow.log_params({\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"model\": MODEL_NAME\n", + " })\n", + " mlflow.log_metrics({\n", + " \"duration_ms\": int((time.time() - t0) * 1000),\n", + " \"prompt_tokens\": prompt_tokens,\n", + " \"completion_tokens\": completion_tokens,\n", + " \"total_tokens\": total_tokens\n", + " })\n", + " except:\n", + " pass\n", + " \n", + " return {\n", + " \"id\": \"chatcmpl-\" + uuid.uuid4().hex[:8],\n", + " \"object\": \"chat.completion\",\n", + " \"created\": int(time.time()),\n", + " \"model\": MODEL_NAME,\n", + " \"choices\": [{\n", + " \"index\": 0,\n", + " \"message\": {\"role\": \"assistant\", \"content\": content},\n", + " \"finish_reason\": \"stop\"\n", + " }],\n", + " \"usage\": {\n", + " \"prompt_tokens\": prompt_tokens,\n", + " \"completion_tokens\": completion_tokens,\n", + " \"total_tokens\": total_tokens\n", + " }\n", + " }\n", + " except Exception as e:\n", + " return JSONResponse(status_code=500, content={\"error\": str(e)})\n", + "\n", + "if __name__ == \"__main__\":\n", + " uvicorn.run(app, host=\"0.0.0.0\", port=8000)\n", + "'''\n", + "\n", + "with open('/tmp/ramalama_wrapper.py', 'w') as f:\n", + " f.write(api_wrapper_code)\n", + "\n", + "def run_api():\n", + " subprocess.run([\"python\", \"/tmp/ramalama_wrapper.py\"], capture_output=True)\n", + "\n", + "import subprocess\n", + "api_process = subprocess.Popen(\n", + " [\"python\", \"/tmp/ramalama_wrapper.py\"],\n", + " env={**os.environ, \n", + " \"RAMALAMA_HOST\": os.getenv(\"RAMALAMA_HOST\", \"http://127.0.0.1:8080\"),\n", + " \"MODEL_NAME\": MODEL_NAME,\n", + " \"MLFLOW_TRACKING_URI\": MLFLOW_URI},\n", + " stdout=subprocess.DEVNULL,\n", + " stderr=subprocess.DEVNULL\n", + ")\n", + "\n", + "time.sleep(3)\n", + "\n", + "API_URL = \"http://localhost:8000\"\n", + "try:\n", + " r = requests.get(f\"{API_URL}/v1/healthz\", timeout=5)\n", + " print(\"API Status:\", r.json())\n", + " print(f\"\\nOpenAI-compatible API running at: {API_URL}/v1\")\n", + " print(f\"Health: {API_URL}/v1/healthz\")\n", + " print(f\"Chat: {API_URL}/v1/chat/completions\")\n", + "except Exception as e:\n", + " print(f\"Warning: API wrapper not responding: {e}\")\n", + " print(\"You may need to run the wrapper manually\")" + ] + }, + { + "cell_type": "markdown", + "id": "a411c015-c802-4ca1-81bb-3f4790d9626a", + "metadata": {}, + "source": [ + "### Cell 4 - Basic client + latency test\n", + "\n", + "Tests the OpenAI-compatible API with a simple chat request and measures latency." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3be634e2-a82f-42c9-8e31-57e6868a86ee", + "metadata": {}, + "outputs": [], + "source": [ + "import os, time, requests, json\n", + "\n", + "USE_WRAPPER = True\n", + "BASE_URL = \"http://localhost:8000/v1\" if USE_WRAPPER else os.getenv(\"RAMALAMA_HOST\", \"http://127.0.0.1:8080\")\n", + "\n", + "def health():\n", + " if USE_WRAPPER:\n", + " r = requests.get(f\"{BASE_URL}/healthz\", timeout=10)\n", + " print(\"Health:\", r.status_code, r.json())\n", + " else:\n", + " r = requests.get(f\"{BASE_URL}/v1/models\", timeout=10)\n", + " print(\"Health:\", r.status_code, \"Models available:\", r.json().get('data', []))\n", + "\n", + "def chat(prompt, temperature=0.4, max_tokens=220):\n", + " body = {\n", + " \"model\": \"qwen2.5:1.5b\",\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"messages\": [\n", + " {\"role\": \"system\", \"content\": \"You are a helpful assistant. Be concise.\"},\n", + " {\"role\": \"user\", \"content\": prompt},\n", + " ],\n", + " \"stream\": False\n", + " }\n", + " endpoint = f\"{BASE_URL}/chat/completions\"\n", + " \n", + " t0 = time.time()\n", + " r = requests.post(endpoint, json=body, timeout=120)\n", + " dt = time.time() - t0\n", + " r.raise_for_status()\n", + " \n", + " response = r.json()\n", + " content = response[\"choices\"][0][\"message\"][\"content\"]\n", + " usage = response.get(\"usage\", {\"total_tokens\": \"estimated: \" + str(len(content.split()) + len(prompt.split()))})\n", + " \n", + " print(f\"\\nLatency: {dt:.2f}s | usage: {usage}\")\n", + " print(\"\\n---\\n\", content)\n", + " return content\n", + "\n", + "health()\n", + "_ = chat(\"Say 'test ok' then give me one short fun fact about llamas.\")" + ] + }, + { + "cell_type": "markdown", + "id": "553d2756-8949-43e3-8342-71387688e0fa", + "metadata": {}, + "source": [ + "### Cell 5 - Multi-agent pipeline\n", + "\n", + "Implements a simple three-agent workflow (Researcher -> Writer -> Critic) using the local LLM." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f6713f3-8b60-40b2-ad3c-ebf6db4f66e1", + "metadata": {}, + "outputs": [], + "source": [ + "import os, requests, json, time\n", + "\n", + "BASE_URL = \"http://localhost:8000/v1\" \n", + "RAMALAMA_DIRECT = os.getenv(\"RAMALAMA_HOST\", \"http://127.0.0.1:8080\")\n", + "\n", + "def call_llm(role_prompt, user_message, temperature=0.4, max_tokens=150, use_wrapper=True):\n", + " body = {\n", + " \"model\": \"qwen2.5:1.5b\",\n", + " \"messages\": [\n", + " {\"role\": \"system\", \"content\": role_prompt},\n", + " {\"role\": \"user\", \"content\": user_message}\n", + " ],\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"stream\": False\n", + " }\n", + " if use_wrapper:\n", + " endpoint = f\"{BASE_URL}/chat/completions\"\n", + " else:\n", + " endpoint = f\"{RAMALAMA_DIRECT}/v1/chat/completions\"\n", + " try:\n", + " r = requests.post(endpoint, json=body, timeout=120)\n", + " r.raise_for_status()\n", + " response = r.json()\n", + " return response[\"choices\"][0][\"message\"][\"content\"]\n", + " except Exception as e:\n", + " return f\"Error: {e}\"\n", + "\n", + "print(\"=\" * 60)\n", + "print(\"Running Multi-Agent Workflow with RamaLama\")\n", + "print(\"=\" * 60)\n", + "\n", + "task = \"Research the latest advancements in quantum computing as of 2025.\"\n", + "\n", + "try:\n", + " r = requests.get(f\"{BASE_URL}/healthz\", timeout=2)\n", + " use_wrapper = r.status_code == 200\n", + " print(\"Using: OpenAI-compatible wrapper\\n\")\n", + "except:\n", + " use_wrapper = False\n", + " print(\"Using: Direct RamaLama API\\n\")\n", + "\n", + "print(\"RESEARCHER:\")\n", + "print(\"-\" * 40)\n", + "research_prompt = \"You are a researcher. Provide 3-4 key facts about the topic. Be concise and factual.\"\n", + "research_notes = call_llm(research_prompt, task, temperature=0.35, max_tokens=140, use_wrapper=use_wrapper)\n", + "print(research_notes)\n", + "time.sleep(1)\n", + "\n", + "print(\"\\nWRITER:\")\n", + "print(\"-\" * 40)\n", + "writer_prompt = \"You are a technical writer. Based on the following notes, write a brief report.\"\n", + "writer_task = f\"Write a report based on these notes:\\n{research_notes}\"\n", + "report = call_llm(writer_prompt, writer_task, temperature=0.55, max_tokens=220, use_wrapper=use_wrapper)\n", + "print(report)\n", + "time.sleep(1)\n", + "\n", + "print(\"\\nCRITIC/EDITOR:\")\n", + "print(\"-\" * 40)\n", + "critic_prompt = \"You are an editor. Review the report and provide a final polished version.\"\n", + "critic_task = f\"Review and improve this report:\\n{report}\"\n", + "final_output = call_llm(critic_prompt, critic_task, temperature=0.45, max_tokens=160, use_wrapper=use_wrapper)\n", + "print(final_output)\n", + "\n", + "print(\"\\n\" + \"=\" * 60)\n", + "print(\"Multi-agent workflow complete\")\n", + "print(\"=\" * 60)" + ] + }, + { + "cell_type": "markdown", + "id": "0af596cf-5ba6-42df-a030-61d7a20d6f7b", + "metadata": {}, + "source": [ + "### Cell 6 - MLFlow: connect to tracking server and list recent runs\n", + "\n", + "Connects to MLflow tracking server and displays recent model inference runs with metrics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03a1b042-04df-4cd0-9099-4cc763ecfe9d", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install mlflow==2.14.3 --disable-pip-version-check\n", + "\n", + "import os, mlflow\n", + "from datetime import datetime\n", + "\n", + "tracking_uri = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n", + "mlflow.set_tracking_uri(tracking_uri)\n", + "print(f\"MLflow Tracking URI: {tracking_uri}\")\n", + "\n", + "exp_name = \"ramalama-llm\"\n", + "exp = mlflow.set_experiment(exp_name)\n", + "print(f\"Experiment: {exp.name} (ID: {exp.experiment_id})\")\n", + "print(\"-\" * 60)\n", + "\n", + "client = mlflow.tracking.MlflowClient()\n", + "runs = client.search_runs(\n", + " exp.experiment_id,\n", + " order_by=[\"attributes.start_time DESC\"],\n", + " max_results=10\n", + ")\n", + "\n", + "if not runs:\n", + " print(\"No runs found. Run cells 4 or 5 first to generate inference requests.\")\n", + "else:\n", + " print(f\"\\nFound {len(runs)} recent runs:\")\n", + " print(\"-\" * 60)\n", + " \n", + " for i, run in enumerate(runs, 1):\n", + " start_time = datetime.fromtimestamp(run.info.start_time/1000).strftime('%Y-%m-%d %H:%M:%S')\n", + " duration = run.data.metrics.get('duration_ms', 'N/A')\n", + " temp = run.data.params.get('temperature', 'N/A')\n", + " max_tokens = run.data.params.get('max_tokens', 'N/A')\n", + " total_tokens = run.data.metrics.get('total_tokens', 'N/A')\n", + " \n", + " print(f\"\\nRun {i}:\")\n", + " print(f\" ID: {run.info.run_id[:12]}...\")\n", + " print(f\" Time: {start_time}\")\n", + " print(f\" Status: {run.info.status}\")\n", + " print(f\" Temperature: {temp}\")\n", + " print(f\" Max Tokens: {max_tokens}\")\n", + " print(f\" Duration: {duration} ms\")\n", + " print(f\" Total Tokens: {total_tokens}\")\n", + " \n", + " print(\"\\n\" + \"=\" * 60)\n", + " print(\"SUMMARY:\")\n", + " successful = sum(1 for r in runs if r.info.status == 'FINISHED')\n", + " durations = [r.data.metrics.get('duration_ms', 0) for r in runs if r.data.metrics.get('duration_ms')]\n", + " avg_duration = sum(durations) / len(durations) if durations else 0\n", + " \n", + " print(f\" Total Runs: {len(runs)}\")\n", + " print(f\" Successful: {successful}\")\n", + " print(f\" Failed: {len(runs) - successful}\")\n", + " print(f\" Avg Duration: {avg_duration:.1f} ms\" if avg_duration else \" Avg Duration: N/A\")\n", + "\n", + "print(\"\\n\" + \"=\" * 60)\n", + "print(\"MLflow verification complete\")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent.ipynb new file mode 100644 index 000000000..23189a639 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent.ipynb @@ -0,0 +1,687 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "079fadd2-200e-4d37-8ae2-be2792e3a24e", + "metadata": {}, + "source": [ + "### Cell 1 - Initialize Ray endpoints and verify dashboard\n", + "\n", + "Installs requests, derives the Ray head host from RAY_ADDRESS, builds Dashboard/Serve/MLflow URLs, reads an Hugging Face token, and prints the endpoints plus the Jobs API version for a quick health check." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79db57cd-fb72-4b10-b0fb-5e9cd5c007b6", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install requests==2.* --disable-pip-version-check\n", + "\n", + "import os, textwrap, base64, time, json, requests\n", + "from string import Template\n", + "\n", + "raw_addr = os.getenv(\"RAY_ADDRESS\", \"ray://ai-starter-kit-kuberay-head-svc:10001\")\n", + "if raw_addr.startswith(\"ray://\"):\n", + " HEAD_HOST = raw_addr.split(\"://\", 1)[1].split(\":\", 1)[0]\n", + "else:\n", + " HEAD_HOST = raw_addr.split(\":\", 1)[0] or \"ai-starter-kit-kuberay-head-svc\"\n", + "\n", + "DASH_URL = f\"http://{HEAD_HOST}:8265\"\n", + "SERVE_PORT = int(os.getenv(\"SERVE_PORT\", \"8000\"))\n", + "SERVE_ROUTE = \"/v1\"\n", + "\n", + "HF_TOKEN_PATH = \"/etc/secrets/huggingface/token\"\n", + "HF_TOKEN = \"\"\n", + "if os.path.exists(HF_TOKEN_PATH):\n", + " try:\n", + " HF_TOKEN = open(HF_TOKEN_PATH).read().strip()\n", + " except Exception:\n", + " HF_TOKEN = \"\"\n", + "\n", + "print(\"Head host:\", HEAD_HOST)\n", + "print(\"Jobs API :\", f\"{DASH_URL}/api/jobs/\")\n", + "print(\"Serve URL:\", f\"http://{HEAD_HOST}:{SERVE_PORT}{SERVE_ROUTE}\")\n", + "print(\"MLflow :\", os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\"))\n", + "\n", + "print(\"Jobs API version:\", requests.get(f\"{DASH_URL}/api/version\", timeout=10).json())\n" + ] + }, + { + "cell_type": "markdown", + "id": "fe862173-fd9a-41ae-a27b-63875f788024", + "metadata": {}, + "source": [ + "### Cell 2 - Deploy a minimal Ray Serve smoke test and verify readiness\n", + "\n", + "Submits a tiny FastAPI app to Ray Serve (one /healthz endpoint under /smoke) as a Ray Job, installing FastAPI on the fly. It polls the Jobs API for status and hits :8000/smoke/healthz up to 60 seconds, printing when the service responds 200 (i.e., smoke test passes)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34da3e26-6276-48b7-b3ac-c90359df6547", + "metadata": {}, + "outputs": [], + "source": [ + "import os, base64, textwrap, time, requests\n", + "\n", + "DASH_URL = \"http://ai-starter-kit-kuberay-head-svc:8265\"\n", + "\n", + "print(\"Jobs API:\", requests.get(f\"{DASH_URL}/api/version\", timeout=10).json())\n", + "\n", + "serve_py = textwrap.dedent(\"\"\"\n", + " from fastapi import FastAPI\n", + " from ray import serve\n", + " serve.start(detached=True, http_options={\"host\":\"0.0.0.0\",\"port\":8000})\n", + " app = FastAPI()\n", + "\n", + " @serve.deployment(name=\"smoke\", num_replicas=1)\n", + " @serve.ingress(app)\n", + " class Smoke:\n", + " @app.get(\"/healthz\")\n", + " async def health(self): return {\"ok\": True}\n", + "\n", + " serve.run(Smoke.bind(), route_prefix=\"/smoke\")\n", + " print(\"READY: smoke\", flush=True)\n", + "\"\"\").strip()\n", + "\n", + "b64 = base64.b64encode(serve_py.encode()).decode()\n", + "entry = f'python -c \"import base64; exec(base64.b64decode(\\'{b64}\\'))\"'\n", + "submit = requests.post(f\"{DASH_URL}/api/jobs/\", json={\"entrypoint\": entry, \"runtime_env\": {\"pip\": [\"fastapi>=0.110\"]}}, timeout=60).json()\n", + "job_id = submit[\"job_id\"]\n", + "print(\"Job:\", job_id)\n", + "\n", + "svc = \"http://ai-starter-kit-kuberay-head-svc:8000/smoke/healthz\"\n", + "for i in range(60):\n", + " s = requests.get(f\"{DASH_URL}/api/jobs/{job_id}\", timeout=10).json()[\"status\"]\n", + " try:\n", + " r = requests.get(svc, timeout=2)\n", + " print(f\"tick {i:02d}: job={s}, health={r.status_code}\")\n", + " if r.status_code == 200:\n", + " print(\"Smoke OK\")\n", + " break\n", + " except Exception as e:\n", + " print(f\"tick {i:02d}: job={s}, health=ERR {e}\")\n", + " time.sleep(1)" + ] + }, + { + "cell_type": "markdown", + "id": "8111d705-595e-4e65-8479-bdc76191fa31", + "metadata": {}, + "source": [ + "### Cell 3 - Deploy model on Ray Serve with llama-cpp\n", + "\n", + "Packages and submits a Ray Job that spins up a Ray Serve app exposing /v1/healthz and /v1/chat/completions. It downloads the preferred GGUF from Hugging Face, initializes llama-cpp-python, logs to MLflow, and prints the deployed health/chat URLs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbea1539-e9ab-460a-9cfc-20a42807f616", + "metadata": {}, + "outputs": [], + "source": [ + "import os, base64, textwrap, requests\n", + "\n", + "HEAD = os.environ.get(\"RAY_HEAD_SVC\", \"ai-starter-kit-kuberay-head-svc\")\n", + "DASH_URL = f\"http://{HEAD}:8265\"\n", + "SERVE_PORT = 8000\n", + "SERVE_ROUTE = \"/v1\"\n", + "\n", + "runtime_env = {\n", + " \"pip\": [\n", + " \"fastapi==0.110.0\",\n", + " \"uvicorn==0.23.2\",\n", + " \"huggingface_hub==0.25.2\",\n", + " \"llama-cpp-python==0.3.16\", \n", + " \"hf_transfer==0.1.6\",\n", + " \"mlflow==2.14.3\", \n", + " ],\n", + " \"env_vars\": {\n", + " \"HF_HUB_ENABLE_HF_TRANSFER\": \"1\",\n", + " \"HUGGINGFACE_HUB_TOKEN\": os.environ.get(\"HUGGINGFACE_HUB_TOKEN\", \"\"),\n", + " \"SERVE_PORT\": str(SERVE_PORT),\n", + "\n", + " \"MODEL_REPO\": \"Qwen/Qwen2.5-1.5B-Instruct-GGUF\",\n", + " \"GGUF_PREF_ORDER\": \"q4_k_m,q4_0,q3_k_m,q2_k\",\n", + "\n", + " \"LLM_CONTEXT\": os.environ.get(\"LLM_CONTEXT\", \"1024\"),\n", + " \"LLM_MAX_TOKENS\": os.environ.get(\"LLM_MAX_TOKENS\", \"256\"),\n", + " \"SERVER_MAX_NEW_TOKENS\": os.environ.get(\"SERVER_MAX_NEW_TOKENS\", \"512\"),\n", + "\n", + " \"LLM_THREADS\": os.environ.get(\"LLM_THREADS\", \"6\"),\n", + " \"OMP_NUM_THREADS\": os.environ.get(\"OMP_NUM_THREADS\", \"6\"),\n", + " \"GPU_LAYERS\": \"0\", \n", + " \n", + " \"PIP_PREFER_BINARY\": \"1\",\n", + " \"CMAKE_ARGS\": \"-DGGML_OPENMP=OFF -DLLAMA_NATIVE=OFF\",\n", + "\n", + " \"HF_HOME\": \"/tmp/hf-cache\",\n", + " \"TRANSFORMERS_CACHE\": \"/tmp/hf-cache\",\n", + "\n", + " \"MLFLOW_TRACKING_URI\": os.environ.get(\"MLFLOW_TRACKING_URI\", \"\"),\n", + " \"MLFLOW_EXPERIMENT_NAME\": os.environ.get(\"MLFLOW_EXPERIMENT_NAME\", \"ray-llama-cpp\"),\n", + " },\n", + "}\n", + "\n", + "serve_py = textwrap.dedent(f\"\"\"\n", + "import os, time, multiprocessing, uuid\n", + "from typing import List, Dict, Any\n", + "from fastapi import FastAPI, Request\n", + "from fastapi.responses import JSONResponse\n", + "from huggingface_hub import HfApi, hf_hub_download\n", + "from ray import serve\n", + "from llama_cpp import Llama\n", + "\n", + "USE_MLFLOW = False\n", + "try:\n", + " import mlflow\n", + " if os.getenv(\"MLFLOW_TRACKING_URI\"):\n", + " mlflow.set_tracking_uri(os.getenv(\"MLFLOW_TRACKING_URI\"))\n", + " mlflow.set_experiment(os.getenv(\"MLFLOW_EXPERIMENT_NAME\",\"ray-llama-cpp\"))\n", + " USE_MLFLOW = True\n", + "except Exception as _e:\n", + " USE_MLFLOW = False\n", + "\n", + "SERVE_PORT = int(os.getenv(\"SERVE_PORT\", \"{SERVE_PORT}\"))\n", + "SERVE_ROUTE = \"{SERVE_ROUTE}\"\n", + "MODEL_REPO = os.getenv(\"MODEL_REPO\", \"Qwen/Qwen2.5-1.5B-Instruct-GGUF\")\n", + "GGUF_PREFS = [s.strip() for s in os.getenv(\"GGUF_PREF_ORDER\",\"q4_k_m,q4_0,q3_k_m,q2_k\").split(\",\") if s.strip()]\n", + "CTX_LEN = int(os.getenv(\"LLM_CONTEXT\", \"2048\"))\n", + "MAX_TOKENS = int(os.getenv(\"LLM_MAX_TOKENS\", \"256\"))\n", + "HF_TOKEN = os.getenv(\"HUGGINGFACE_HUB_TOKEN\") or None\n", + "\n", + "serve.start(detached=True, http_options={{\"host\":\"0.0.0.0\", \"port\":SERVE_PORT}})\n", + "app = FastAPI()\n", + "\n", + "def pick_one_file(repo_id: str, prefs):\n", + " api = HfApi()\n", + " files = api.list_repo_files(repo_id=repo_id, repo_type=\"model\", token=HF_TOKEN)\n", + " ggufs = [f for f in files if f.lower().endswith(\".gguf\")]\n", + " if not ggufs:\n", + " raise RuntimeError(f\"No .gguf files visible in {{repo_id}}\")\n", + " for pref in prefs:\n", + " for f in ggufs:\n", + " if pref.lower() in f.lower():\n", + " return f\n", + " return ggufs[0]\n", + "\n", + "def pick_chat_format(repo: str, fname: str) -> str:\n", + " return \"qwen\"\n", + "\n", + "@serve.deployment(name=\"qwen\", num_replicas=1, ray_actor_options={{\"num_cpus\": 6}})\n", + "@serve.ingress(app)\n", + "class OpenAICompatLlama:\n", + " def __init__(self, repo_id: str = MODEL_REPO):\n", + " target = pick_one_file(repo_id, GGUF_PREFS)\n", + " print(f\"[env] model repo: {{repo_id}} file: {{target}}\", flush=True)\n", + " local_dir = \"/tmp/hf-gguf\"; os.makedirs(local_dir, exist_ok=True)\n", + "\n", + " gguf_path = hf_hub_download(\n", + " repo_id=repo_id, filename=target, token=HF_TOKEN,\n", + " local_dir=local_dir, local_dir_use_symlinks=False,\n", + " force_download=False, resume_download=True\n", + " )\n", + " print(f\"[download] done: {{gguf_path}}\", flush=True)\n", + "\n", + " n_threads = int(os.getenv(\"LLM_THREADS\", max(2, (multiprocessing.cpu_count() or 4)//2)))\n", + " print(f\"[load] llama-cpp-python | ctx={{CTX_LEN}} threads={{n_threads}} gpu_layers={{int(os.getenv('GPU_LAYERS','0'))}}\", flush=True)\n", + "\n", + " self.model_file = os.path.basename(gguf_path)\n", + " self.model_repo = repo_id\n", + " chat_format = pick_chat_format(self.model_repo, self.model_file)\n", + " print(f\"[load] chat_format={{chat_format}}\", flush=True)\n", + "\n", + " self.llm = Llama(\n", + " model_path=gguf_path,\n", + " n_ctx=CTX_LEN,\n", + " n_threads=n_threads,\n", + " n_batch=256, \n", + " n_gpu_layers=int(os.getenv(\"GPU_LAYERS\",\"0\")),\n", + " chat_format=chat_format,\n", + " verbose=False\n", + " )\n", + " print(\"[ready] model loaded\", flush=True)\n", + "\n", + " @app.get(\"/healthz\")\n", + " async def health(self):\n", + " return {{\"status\":\"ok\"}}\n", + "\n", + " @app.post(\"/chat/completions\")\n", + " async def chat_completions(self, request: Request):\n", + " t0 = time.time()\n", + " body = await request.json()\n", + "\n", + " messages = body.get(\"messages\", [])\n", + " temperature = float(body.get(\"temperature\", 0.2))\n", + " req_max = body.get(\"max_tokens\", None)\n", + " stop_words = (body.get(\"stop\", []) or []) + [\"<|im_end|>\", \"\"]\n", + "\n", + " SERVER_MAX = int(os.getenv(\"SERVER_MAX_NEW_TOKENS\", \"512\"))\n", + " max_tokens = int(req_max if isinstance(req_max, int) else MAX_TOKENS)\n", + " max_tokens = max(32, min(max_tokens, CTX_LEN - 128, SERVER_MAX))\n", + "\n", + " rid = \"chatcmpl-\" + uuid.uuid4().hex[:24]\n", + " created = int(time.time())\n", + " model_name = f\"{{self.model_repo}}/{{self.model_file}}\"\n", + "\n", + " try:\n", + " result = self.llm.create_chat_completion(\n", + " messages=messages,\n", + " temperature=temperature,\n", + " max_tokens=max_tokens,\n", + " top_k=50,\n", + " top_p=0.9,\n", + " repeat_penalty=1.1,\n", + " stop=stop_words,\n", + " )\n", + " out_text = (result[\"choices\"][0][\"message\"][\"content\"] or \"\").strip()\n", + " usage_raw = result.get(\"usage\") or {{}}\n", + " p_tokens = int(usage_raw.get(\"prompt_tokens\") or 0)\n", + " c_tokens = int(usage_raw.get(\"completion_tokens\") or 0)\n", + " err = None\n", + " except Exception as e:\n", + " out_text = \"\"\n", + " p_tokens = c_tokens = 0\n", + " err = str(e)\n", + "\n", + " if USE_MLFLOW:\n", + " try:\n", + " dur_ms = int((time.time()-t0) * 1000)\n", + " with mlflow.start_run(run_name=\"chat\"):\n", + " mlflow.set_tags({{\n", + " \"model_repo\": self.model_repo,\n", + " \"model_file\": self.model_file,\n", + " \"framework\": \"llama-cpp-python\",\n", + " }})\n", + " mlflow.log_params({{\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"ctx\": CTX_LEN,\n", + " }})\n", + " if not (p_tokens and c_tokens):\n", + " p_tokens = p_tokens or max(1, len(\" \".join(m.get(\"content\",\"\") for m in messages).split()))\n", + " c_tokens = c_tokens or max(0, len(out_text.split()))\n", + " mlflow.log_metrics({{\n", + " \"duration_ms\": dur_ms,\n", + " \"prompt_tokens_approx\": p_tokens,\n", + " \"completion_tokens_approx\": c_tokens,\n", + " \"total_tokens_approx\": p_tokens + c_tokens,\n", + " }})\n", + " except Exception:\n", + " pass\n", + "\n", + " if err:\n", + " return JSONResponse(status_code=500, content={{\"error\": err, \"type\":\"generation_error\"}})\n", + "\n", + " usage = {{\n", + " \"prompt_tokens\": p_tokens,\n", + " \"completion_tokens\": c_tokens,\n", + " \"total_tokens\": p_tokens + c_tokens,\n", + " }}\n", + " return {{\n", + " \"id\": rid,\n", + " \"object\": \"chat.completion\",\n", + " \"created\": created,\n", + " \"model\": model_name,\n", + " \"choices\": [\n", + " {{\n", + " \"index\": 0,\n", + " \"message\": {{\"role\":\"assistant\",\"content\": out_text}},\n", + " \"finish_reason\": \"stop\"\n", + " }}\n", + " ],\n", + " \"usage\": usage\n", + " }}\n", + "\n", + "serve.run(OpenAICompatLlama.bind(), route_prefix=SERVE_ROUTE)\n", + "print(\"READY\", flush=True)\n", + "\"\"\").strip()\n", + "\n", + "payload = base64.b64encode(serve_py.encode()).decode()\n", + "entrypoint = 'python -c \"import base64,sys;exec(base64.b64decode(\\'{}\\').decode())\"'.format(payload)\n", + "\n", + "job = requests.post(\n", + " f\"{DASH_URL}/api/jobs/\",\n", + " json={\n", + " \"entrypoint\": entrypoint,\n", + " \"runtime_env\": runtime_env,\n", + " \"metadata\": {\"job_name\": \"serve-qwen2_5-llama_cpp-openai\"},\n", + " },\n", + " timeout=45\n", + ").json()\n", + "\n", + "print(\"Job:\", job.get(\"job_id\"))\n", + "print(\"Health:\", f\"http://{HEAD}:{SERVE_PORT}{SERVE_ROUTE}/healthz\")\n", + "print(\"Chat: \", f\"http://{HEAD}:{SERVE_PORT}{SERVE_ROUTE}/chat/completions\")" + ] + }, + { + "cell_type": "markdown", + "id": "a411c015-c802-4ca1-81bb-3f4790d9626a", + "metadata": {}, + "source": [ + "### Cell 4 - Basic client + latency test\n", + "\n", + "Calls /v1/healthz and then sends an OpenAI-style chat request to /v1/chat/completions with a short prompt. Prints latency and token usage, returning the assistant text." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3be634e2-a82f-42c9-8e31-57e6868a86ee", + "metadata": {}, + "outputs": [], + "source": [ + "import os, time, requests, json\n", + "\n", + "HEAD = os.environ.get(\"RAY_HEAD_SVC\", \"ai-starter-kit-kuberay-head-svc\")\n", + "SERVE_PORT = 8000\n", + "BASE_URL = f\"http://{HEAD}:{SERVE_PORT}/v1\"\n", + "\n", + "def health():\n", + " r = requests.get(f\"{BASE_URL}/healthz\", timeout=10)\n", + " print(\"Health:\", r.status_code, r.json())\n", + "\n", + "def chat(prompt, temperature=0.4, max_tokens=220, stop=None):\n", + " body = {\n", + " \"model\": \"qwen2.5-1.5b-instruct-gguf\",\n", + " \"temperature\": float(temperature),\n", + " \"max_tokens\": int(max_tokens),\n", + " \"messages\": [\n", + " {\"role\": \"system\", \"content\": \"You are Qwen2.5 Instruct running on a tiny CPU host. Be concise, complete sentences.\"},\n", + " {\"role\": \"user\", \"content\": prompt},\n", + " ],\n", + " }\n", + " if stop:\n", + " body[\"stop\"] = stop\n", + "\n", + " t0 = time.time()\n", + " r = requests.post(f\"{BASE_URL}/chat/completions\", json=body, timeout=300)\n", + " dt = time.time() - t0\n", + " r.raise_for_status()\n", + " out = r.json()[\"choices\"][0][\"message\"][\"content\"]\n", + " usage = r.json().get(\"usage\", {})\n", + " print(f\"\\nLatency: {dt:.2f}s | usage: {usage}\")\n", + " print(\"\\n---\\n\", out)\n", + " return out\n", + "\n", + "health()\n", + "_ = chat(\"Say 'test ok' then give me one short fun fact about llamas.\", stop=[\"<|im_end|>\"])" + ] + }, + { + "cell_type": "markdown", + "id": "553d2756-8949-43e3-8342-71387688e0fa", + "metadata": {}, + "source": [ + "### Cell 5 - Multi-agent (Autogen) pipeline\n", + "\n", + "Installs Autogen, configures OpenAIWrapper to hit Ray Serve /v1 endpoint, warms up the model, then runs a simple three-agent workflow (Researcher -> Writer -> Critic) to produce and refine a short report." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f6713f3-8b60-40b2-ad3c-ebf6db4f66e1", + "metadata": {}, + "outputs": [], + "source": [ + "import os, requests, json, time\n", + "\n", + "HEAD = os.environ.get(\"RAY_HEAD_SVC\", \"ai-starter-kit-kuberay-head-svc\")\n", + "SERVE_PORT = 8000\n", + "BASE_URL = f\"http://{HEAD}:{SERVE_PORT}/v1\"\n", + "\n", + "def call_llm(role_prompt, user_message, temperature=0.4, max_tokens=150):\n", + " body = {\n", + " \"model\": \"qwen2.5-1.5b-instruct-gguf\",\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"messages\": [\n", + " {\"role\": \"system\", \"content\": role_prompt},\n", + " {\"role\": \"user\", \"content\": user_message}\n", + " ]\n", + " }\n", + " try:\n", + " r = requests.post(f\"{BASE_URL}/chat/completions\", json=body, timeout=120)\n", + " r.raise_for_status()\n", + " return r.json()[\"choices\"][0][\"message\"][\"content\"]\n", + " except Exception as e:\n", + " return f\"Error: {e}\"\n", + "\n", + "# Try to use autogen if available, otherwise use direct implementation\n", + "USE_AUTOGEN = False\n", + "\n", + "try:\n", + " import autogen\n", + " from autogen import AssistantAgent, UserProxyAgent\n", + " USE_AUTOGEN = True\n", + " print(\"Using autogen for multi-agent workflow\")\n", + "except ImportError:\n", + " try:\n", + " print(\"Installing autogen dependencies...\")\n", + " !pip install -q pyautogen~=0.2.35 python-dotenv tiktoken \"numpy<2,>=1.17.0\" --disable-pip-version-check 2>/dev/null\n", + " import autogen\n", + " from autogen import AssistantAgent, UserProxyAgent\n", + " USE_AUTOGEN = True\n", + " print(\"Autogen installed successfully\")\n", + " except:\n", + " print(\"Using direct implementation (autogen not available)\")\n", + " USE_AUTOGEN = False\n", + "\n", + "if USE_AUTOGEN:\n", + " config_list = [\n", + " {\n", + " \"model\": \"qwen2.5-1.5b-instruct-gguf\",\n", + " \"base_url\": BASE_URL,\n", + " \"api_key\": \"local\",\n", + " \"price\": [0.0, 0.0],\n", + " }\n", + " ]\n", + " \n", + " llm = autogen.OpenAIWrapper(config_list=config_list)\n", + " \n", + " try:\n", + " r = llm.create(messages=[{\"role\":\"user\",\"content\":\"Say 'test ok'.\"}], temperature=0.2, max_tokens=16)\n", + " print(\"Warmup:\", r.choices[0].message.content)\n", + " except Exception as e:\n", + " print(\"Warmup skipped:\", e)\n", + " \n", + " user_proxy = UserProxyAgent(\n", + " name=\"UserProxy\",\n", + " system_message=\"You are the human admin. Initiate the task.\",\n", + " code_execution_config=False,\n", + " human_input_mode=\"NEVER\",\n", + " )\n", + " \n", + " researcher = AssistantAgent(\n", + " name=\"Researcher\",\n", + " system_message=(\n", + " \"You are a researcher. Gather concise, verified facts on the topic. \"\n", + " \"Return 3-4 bullet points. Keep under 100 words total.\"\n", + " ),\n", + " llm_config={\"config_list\": config_list, \"temperature\": 0.35, \"max_tokens\": 140, \"timeout\": 120},\n", + " )\n", + " \n", + " writer = AssistantAgent(\n", + " name=\"Writer\",\n", + " system_message=(\n", + " \"You are a writer. Using the Researcher's notes, produce a clear report under 160 words.\"\n", + " ),\n", + " llm_config={\"config_list\": config_list, \"temperature\": 0.55, \"max_tokens\": 220, \"timeout\": 180},\n", + " )\n", + " \n", + " critic = AssistantAgent(\n", + " name=\"Critic\",\n", + " system_message=(\n", + " \"You are a critic. Review the Writer's report for accuracy and clarity. \"\n", + " \"Present the final polished text under 140 words.\"\n", + " ),\n", + " llm_config={\"config_list\": config_list, \"temperature\": 0.45, \"max_tokens\": 160, \"timeout\": 120},\n", + " )\n", + " \n", + " def run_sequential(task):\n", + " print(\"\\n\" + \"=\" * 60)\n", + " print(\"Running Multi-Agent Workflow (with autogen)\")\n", + " print(\"=\" * 60)\n", + " \n", + " research_response = researcher.generate_reply(messages=[{\"content\": task, \"role\": \"user\"}])\n", + " research_notes = research_response if isinstance(research_response, str) else research_response.get(\"content\", \"[no output]\")\n", + " print(\"\\n1. RESEARCHER:\")\n", + " print(\"-\" * 40)\n", + " print(research_notes)\n", + " \n", + " writer_prompt = f\"Using these research notes, write the report:\\n{research_notes}\"\n", + " writer_response = writer.generate_reply(messages=[{\"content\": writer_prompt, \"role\": \"user\"}])\n", + " report = writer_response if isinstance(writer_response, str) else writer_response.get(\"content\", \"[no output]\")\n", + " print(\"\\n2. WRITER:\")\n", + " print(\"-\" * 40)\n", + " print(report)\n", + " \n", + " critic_prompt = f\"Review this report:\\n{report}\"\n", + " critic_response = critic.generate_reply(messages=[{\"content\": critic_prompt, \"role\": \"user\"}])\n", + " final_text = critic_response if isinstance(critic_response, str) else critic_response.get(\"content\", \"[no output]\")\n", + " print(\"\\n3. CRITIC/EDITOR:\")\n", + " print(\"-\" * 40)\n", + " print(final_text)\n", + " return final_text\n", + " \n", + " task = \"Research the latest advancements in quantum computing as of 2025. Gather key facts, then write a short report.\"\n", + " final_output = run_sequential(task)\n", + " \n", + "else:\n", + " print(\"=\" * 60)\n", + " print(\"Running Multi-Agent Workflow (direct implementation)\")\n", + " print(\"=\" * 60)\n", + " \n", + " task = \"Research the latest advancements in quantum computing as of 2025.\"\n", + " \n", + " print(\"\\n1. RESEARCHER:\")\n", + " print(\"-\" * 40)\n", + " research_prompt = \"You are a researcher. Provide 3-4 key facts about the topic. Be concise and factual.\"\n", + " research_notes = call_llm(research_prompt, task, temperature=0.35, max_tokens=140)\n", + " print(research_notes)\n", + " time.sleep(1) \n", + " \n", + " print(\"\\n2. WRITER:\")\n", + " print(\"-\" * 40)\n", + " writer_prompt = \"You are a technical writer. Based on the following notes, write a brief report.\"\n", + " writer_task = f\"Write a report based on these notes:\\n{research_notes}\"\n", + " report = call_llm(writer_prompt, writer_task, temperature=0.55, max_tokens=220)\n", + " print(report)\n", + " time.sleep(1)\n", + " \n", + " print(\"\\n3. CRITIC/EDITOR:\")\n", + " print(\"-\" * 40)\n", + " critic_prompt = \"You are an editor. Review the report and provide a final polished version.\"\n", + " critic_task = f\"Review and improve this report:\\n{report}\"\n", + " final_output = call_llm(critic_prompt, critic_task, temperature=0.45, max_tokens=160)\n", + " print(final_output)\n", + "\n", + "print(\"\\n\" + \"=\" * 60)\n", + "print(\"Multi-agent workflow complete\")\n", + "print(\"=\" * 60)" + ] + }, + { + "cell_type": "markdown", + "id": "0af596cf-5ba6-42df-a030-61d7a20d6f7b", + "metadata": {}, + "source": [ + "### Cell 6 - MLFlow: connect to tracking server and list recent chat runs\n", + "\n", + "Installs MLflow, sets the tracking URI and experiment, then queries and prints the latest runs with key params/metrics (temperature, max_tokens, duration) to verify Serve logging." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03a1b042-04df-4cd0-9099-4cc763ecfe9d", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install mlflow==2.14.3 --disable-pip-version-check\n", + "\n", + "import os, mlflow\n", + "from datetime import datetime\n", + "\n", + "tracking_uri = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n", + "mlflow.set_tracking_uri(tracking_uri)\n", + "print(f\"MLflow Tracking URI: {tracking_uri}\")\n", + "\n", + "exp_name = os.getenv(\"MLFLOW_EXPERIMENT_NAME\", \"ray-llama-cpp\")\n", + "exp = mlflow.set_experiment(exp_name)\n", + "print(f\"Experiment: {exp.name} (ID: {exp.experiment_id})\")\n", + "print(\"-\" * 60)\n", + "\n", + "client = mlflow.tracking.MlflowClient()\n", + "runs = client.search_runs(\n", + " exp.experiment_id, \n", + " order_by=[\"attributes.start_time DESC\"], \n", + " max_results=10\n", + ")\n", + "\n", + "if not runs:\n", + " print(\"No runs found. Run cells 4 or 5 first to generate inference requests.\")\n", + "else:\n", + " print(f\"\\nFound {len(runs)} recent runs:\")\n", + " print(\"-\" * 60)\n", + " \n", + " for i, run in enumerate(runs, 1):\n", + " start_time = datetime.fromtimestamp(run.info.start_time/1000).strftime('%Y-%m-%d %H:%M:%S')\n", + " duration = run.data.metrics.get('duration_ms', 'N/A')\n", + " temp = run.data.params.get('temperature', 'N/A')\n", + " max_tokens = run.data.params.get('max_tokens', 'N/A')\n", + " total_tokens = run.data.metrics.get('total_tokens_approx', 'N/A')\n", + " \n", + " print(f\"\\nRun {i}:\")\n", + " print(f\" ID: {run.info.run_id[:12]}...\")\n", + " print(f\" Time: {start_time}\")\n", + " print(f\" Status: {run.info.status}\")\n", + " print(f\" Temperature: {temp}\")\n", + " print(f\" Max Tokens: {max_tokens}\")\n", + " print(f\" Duration: {duration} ms\")\n", + " print(f\" Total Tokens: {total_tokens}\")\n", + " \n", + " print(\"\\n\" + \"=\" * 60)\n", + " print(\"SUMMARY:\")\n", + " successful = sum(1 for r in runs if r.info.status == 'FINISHED')\n", + " durations = [r.data.metrics.get('duration_ms', 0) for r in runs if r.data.metrics.get('duration_ms')]\n", + " avg_duration = sum(durations) / len(durations) if durations else 0\n", + " \n", + " print(f\" Total Runs: {len(runs)}\")\n", + " print(f\" Successful: {successful}\")\n", + " print(f\" Failed: {len(runs) - successful}\")\n", + " print(f\" Avg Duration: {avg_duration:.1f} ms\" if avg_duration else \" Avg Duration: N/A\")\n", + "\n", + "print(\"\\n\" + \"=\" * 60)\n", + "print(\"MLflow verification complete\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb new file mode 100644 index 000000000..dae93a357 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb @@ -0,0 +1,798 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "e4a6ac7c-5c73-42a9-8b74-420788321543", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Collecting ray==2.41.0\n", + " Downloading ray-2.41.0-cp311-cp311-manylinux2014_x86_64.whl.metadata (18 kB)\n", + "Collecting click>=7.0 (from ray==2.41.0)\n", + " Downloading click-8.2.1-py3-none-any.whl.metadata (2.5 kB)\n", + "Collecting filelock (from ray==2.41.0)\n", + " Downloading filelock-3.19.1-py3-none-any.whl.metadata (2.1 kB)\n", + "Requirement already satisfied: jsonschema in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (4.22.0)\n", + "Collecting msgpack<2.0.0,>=1.0.0 (from ray==2.41.0)\n", + " Downloading msgpack-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB)\n", + "Requirement already satisfied: packaging in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (23.2)\n", + "Collecting protobuf!=3.19.5,>=3.15.3 (from ray==2.41.0)\n", + " Downloading protobuf-6.32.0-cp39-abi3-manylinux2014_x86_64.whl.metadata (593 bytes)\n", + "Requirement already satisfied: pyyaml in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (6.0.1)\n", + "Requirement already satisfied: aiosignal in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (1.3.1)\n", + "Requirement already satisfied: frozenlist in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (1.4.1)\n", + "Requirement already satisfied: requests in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (2.31.0)\n", + "Requirement already satisfied: attrs>=22.2.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray==2.41.0) (23.2.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray==2.41.0) (2023.12.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray==2.41.0) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray==2.41.0) (0.18.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests->ray==2.41.0) (2.0.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests->ray==2.41.0) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests->ray==2.41.0) (2.1.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests->ray==2.41.0) (2024.2.2)\n", + "Downloading ray-2.41.0-cp311-cp311-manylinux2014_x86_64.whl (67.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 MB\u001b[0m \u001b[31m25.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading click-8.2.1-py3-none-any.whl (102 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m102.2/102.2 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading msgpack-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (429 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m430.0/430.0 kB\u001b[0m \u001b[31m41.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading protobuf-6.32.0-cp39-abi3-manylinux2014_x86_64.whl (322 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m322.0/322.0 kB\u001b[0m \u001b[31m33.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading filelock-3.19.1-py3-none-any.whl (15 kB)\n", + "Installing collected packages: protobuf, msgpack, filelock, click, ray\n", + "\u001b[33m WARNING: The scripts ray, rllib, serve and tune are installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0mSuccessfully installed click-8.2.1 filelock-3.19.1 msgpack-1.1.1 protobuf-6.32.0 ray-2.41.0\n" + ] + } + ], + "source": [ + "!pip install ray==2.41.0" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "788f1517-251c-4171-af7d-f4c7a5073d71", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Collecting numpy\n", + " Downloading numpy-2.3.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (62 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.1/62.1 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting mlflow\n", + " Downloading mlflow-3.3.1-py3-none-any.whl.metadata (30 kB)\n", + "Collecting tensorflow\n", + " Downloading tensorflow-2.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.5 kB)\n", + "Requirement already satisfied: ray[client,default,serve] in ./models-cache/lib/python3.11/site-packages (2.41.0)\n", + "Collecting mlflow-skinny==3.3.1 (from mlflow)\n", + " Downloading mlflow_skinny-3.3.1-py3-none-any.whl.metadata (31 kB)\n", + "Collecting mlflow-tracing==3.3.1 (from mlflow)\n", + " Downloading mlflow_tracing-3.3.1-py3-none-any.whl.metadata (19 kB)\n", + "Collecting Flask<4 (from mlflow)\n", + " Downloading flask-3.1.2-py3-none-any.whl.metadata (3.2 kB)\n", + "Requirement already satisfied: alembic!=1.10.0,<2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow) (1.13.1)\n", + "Collecting cryptography<46,>=43.0.0 (from mlflow)\n", + " Downloading cryptography-45.0.6-cp311-abi3-manylinux_2_34_x86_64.whl.metadata (5.7 kB)\n", + "Requirement already satisfied: docker<8,>=4.0.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow) (7.1.0)\n", + "Collecting graphene<4 (from mlflow)\n", + " Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)\n", + "Collecting gunicorn<24 (from mlflow)\n", + " Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)\n", + "Collecting matplotlib<4 (from mlflow)\n", + " Downloading matplotlib-3.10.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)\n", + "Collecting pandas<3 (from mlflow)\n", + " Downloading pandas-2.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (91 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m91.2/91.2 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pyarrow<22,>=4.0.0 (from mlflow)\n", + " Downloading pyarrow-21.0.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (3.3 kB)\n", + "Collecting scikit-learn<2 (from mlflow)\n", + " Downloading scikit_learn-1.7.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)\n", + "Collecting scipy<2 (from mlflow)\n", + " Downloading scipy-1.16.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.0/62.0 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: sqlalchemy<3,>=1.4.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow) (2.0.30)\n", + "Collecting cachetools<7,>=5.0.0 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading cachetools-6.2.0-py3-none-any.whl.metadata (5.4 kB)\n", + "Requirement already satisfied: click<9,>=7.0 in ./models-cache/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (8.2.1)\n", + "Collecting cloudpickle<4 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading cloudpickle-3.1.1-py3-none-any.whl.metadata (7.1 kB)\n", + "Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading databricks_sdk-0.64.0-py3-none-any.whl.metadata (39 kB)\n", + "Collecting fastapi<1 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading fastapi-0.116.1-py3-none-any.whl.metadata (28 kB)\n", + "Collecting gitpython<4,>=3.1.9 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading gitpython-3.1.45-py3-none-any.whl.metadata (13 kB)\n", + "Collecting importlib_metadata!=4.7.0,<9,>=3.7.0 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading importlib_metadata-8.7.0-py3-none-any.whl.metadata (4.8 kB)\n", + "Collecting opentelemetry-api<3,>=1.9.0 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading opentelemetry_api-1.36.0-py3-none-any.whl.metadata (1.5 kB)\n", + "Collecting opentelemetry-sdk<3,>=1.9.0 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading opentelemetry_sdk-1.36.0-py3-none-any.whl.metadata (1.5 kB)\n", + "Requirement already satisfied: packaging<26 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (23.2)\n", + "Requirement already satisfied: protobuf<7,>=3.12.0 in ./models-cache/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (6.32.0)\n", + "Requirement already satisfied: pydantic<3,>=1.10.8 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (2.7.3)\n", + "Requirement already satisfied: pyyaml<7,>=5.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (6.0.1)\n", + "Requirement already satisfied: requests<3,>=2.17.3 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (2.31.0)\n", + "Collecting sqlparse<1,>=0.4.0 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading sqlparse-0.5.3-py3-none-any.whl.metadata (3.9 kB)\n", + "Requirement already satisfied: typing-extensions<5,>=4.0.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (4.12.2)\n", + "Collecting uvicorn<1 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading uvicorn-0.35.0-py3-none-any.whl.metadata (6.5 kB)\n", + "Collecting absl-py>=1.0.0 (from tensorflow)\n", + " Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)\n", + "Collecting astunparse>=1.6.0 (from tensorflow)\n", + " Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)\n", + "Collecting flatbuffers>=24.3.25 (from tensorflow)\n", + " Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)\n", + "Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)\n", + " Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)\n", + "Collecting google_pasta>=0.1.1 (from tensorflow)\n", + " Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)\n", + "Collecting libclang>=13.0.0 (from tensorflow)\n", + " Downloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl.metadata (5.2 kB)\n", + "Collecting opt_einsum>=2.3.2 (from tensorflow)\n", + " Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)\n", + "Requirement already satisfied: setuptools in /opt/bitnami/miniconda/lib/python3.11/site-packages (from tensorflow) (69.5.1)\n", + "Requirement already satisfied: six>=1.12.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from tensorflow) (1.16.0)\n", + "Collecting termcolor>=1.1.0 (from tensorflow)\n", + " Downloading termcolor-3.1.0-py3-none-any.whl.metadata (6.4 kB)\n", + "Collecting wrapt>=1.11.0 (from tensorflow)\n", + " Downloading wrapt-1.17.3-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl.metadata (6.4 kB)\n", + "Collecting grpcio<2.0,>=1.24.3 (from tensorflow)\n", + " Downloading grpcio-1.74.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n", + "Collecting tensorboard~=2.20.0 (from tensorflow)\n", + " Downloading tensorboard-2.20.0-py3-none-any.whl.metadata (1.8 kB)\n", + "Collecting keras>=3.10.0 (from tensorflow)\n", + " Downloading keras-3.11.3-py3-none-any.whl.metadata (5.9 kB)\n", + "Collecting h5py>=3.11.0 (from tensorflow)\n", + " Downloading h5py-3.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.7 kB)\n", + "Collecting ml_dtypes<1.0.0,>=0.5.1 (from tensorflow)\n", + " Downloading ml_dtypes-0.5.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (8.9 kB)\n", + "Requirement already satisfied: filelock in ./models-cache/lib/python3.11/site-packages (from ray[client,default,serve]) (3.19.1)\n", + "Requirement already satisfied: jsonschema in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (4.22.0)\n", + "Requirement already satisfied: msgpack<2.0.0,>=1.0.0 in ./models-cache/lib/python3.11/site-packages (from ray[client,default,serve]) (1.1.1)\n", + "Requirement already satisfied: aiosignal in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (1.3.1)\n", + "Requirement already satisfied: frozenlist in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (1.4.1)\n", + "Requirement already satisfied: aiohttp>=3.7 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (3.9.5)\n", + "Collecting aiohttp-cors (from ray[client,default,serve])\n", + " Downloading aiohttp_cors-0.8.1-py3-none-any.whl.metadata (20 kB)\n", + "Collecting colorful (from ray[client,default,serve])\n", + " Downloading colorful-0.5.7-py2.py3-none-any.whl.metadata (16 kB)\n", + "Collecting opencensus (from ray[client,default,serve])\n", + " Downloading opencensus-0.11.4-py2.py3-none-any.whl.metadata (12 kB)\n", + "Requirement already satisfied: prometheus-client>=0.7.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (0.20.0)\n", + "Collecting smart-open (from ray[client,default,serve])\n", + " Downloading smart_open-7.3.0.post1-py3-none-any.whl.metadata (24 kB)\n", + "Collecting virtualenv!=20.21.1,>=20.0.24 (from ray[client,default,serve])\n", + " Downloading virtualenv-20.34.0-py3-none-any.whl.metadata (4.6 kB)\n", + "Collecting py-spy>=0.2.0 (from ray[client,default,serve])\n", + " Downloading py_spy-0.4.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl.metadata (510 bytes)\n", + "Collecting starlette (from ray[client,default,serve])\n", + " Downloading starlette-0.47.3-py3-none-any.whl.metadata (6.2 kB)\n", + "Collecting watchfiles (from ray[client,default,serve])\n", + " Downloading watchfiles-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n", + "Requirement already satisfied: attrs>=17.3.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (23.2.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (1.9.4)\n", + "Requirement already satisfied: Mako in /opt/bitnami/miniconda/lib/python3.11/site-packages (from alembic!=1.10.0,<2->mlflow) (1.3.5)\n", + "Requirement already satisfied: wheel<1.0,>=0.23.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from astunparse>=1.6.0->tensorflow) (0.43.0)\n", + "Requirement already satisfied: cffi>=1.14 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from cryptography<46,>=43.0.0->mlflow) (1.16.0)\n", + "Requirement already satisfied: urllib3>=1.26.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from docker<8,>=4.0.0->mlflow) (2.1.0)\n", + "Collecting blinker>=1.9.0 (from Flask<4->mlflow)\n", + " Downloading blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)\n", + "Collecting itsdangerous>=2.2.0 (from Flask<4->mlflow)\n", + " Downloading itsdangerous-2.2.0-py3-none-any.whl.metadata (1.9 kB)\n", + "Requirement already satisfied: jinja2>=3.1.2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from Flask<4->mlflow) (3.1.4)\n", + "Requirement already satisfied: markupsafe>=2.1.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from Flask<4->mlflow) (2.1.5)\n", + "Collecting werkzeug>=3.1.0 (from Flask<4->mlflow)\n", + " Downloading werkzeug-3.1.3-py3-none-any.whl.metadata (3.7 kB)\n", + "Collecting graphql-core<3.3,>=3.1 (from graphene<4->mlflow)\n", + " Downloading graphql_core-3.2.6-py3-none-any.whl.metadata (11 kB)\n", + "Collecting graphql-relay<3.3,>=3.1 (from graphene<4->mlflow)\n", + " Downloading graphql_relay-3.2.0-py3-none-any.whl.metadata (12 kB)\n", + "Requirement already satisfied: python-dateutil<3,>=2.7.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from graphene<4->mlflow) (2.9.0.post0)\n", + "Collecting rich (from keras>=3.10.0->tensorflow)\n", + " Downloading rich-14.1.0-py3-none-any.whl.metadata (18 kB)\n", + "Collecting namex (from keras>=3.10.0->tensorflow)\n", + " Downloading namex-0.1.0-py3-none-any.whl.metadata (322 bytes)\n", + "Collecting optree (from keras>=3.10.0->tensorflow)\n", + " Downloading optree-0.17.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (33 kB)\n", + "Collecting contourpy>=1.0.1 (from matplotlib<4->mlflow)\n", + " Downloading contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.5 kB)\n", + "Collecting cycler>=0.10 (from matplotlib<4->mlflow)\n", + " Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)\n", + "Collecting fonttools>=4.22.0 (from matplotlib<4->mlflow)\n", + " Downloading fonttools-4.59.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (108 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m108.9/108.9 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting kiwisolver>=1.3.1 (from matplotlib<4->mlflow)\n", + " Downloading kiwisolver-1.4.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (6.3 kB)\n", + "Collecting pillow>=8 (from matplotlib<4->mlflow)\n", + " Downloading pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (9.0 kB)\n", + "Collecting pyparsing>=2.3.1 (from matplotlib<4->mlflow)\n", + " Downloading pyparsing-3.2.3-py3-none-any.whl.metadata (5.0 kB)\n", + "Collecting pytz>=2020.1 (from pandas<3->mlflow)\n", + " Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)\n", + "Collecting tzdata>=2022.7 (from pandas<3->mlflow)\n", + " Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from pydantic<3,>=1.10.8->mlflow-skinny==3.3.1->mlflow) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.18.4 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from pydantic<3,>=1.10.8->mlflow-skinny==3.3.1->mlflow) (2.18.4)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests<3,>=2.17.3->mlflow-skinny==3.3.1->mlflow) (2.0.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests<3,>=2.17.3->mlflow-skinny==3.3.1->mlflow) (3.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests<3,>=2.17.3->mlflow-skinny==3.3.1->mlflow) (2024.2.2)\n", + "Collecting joblib>=1.2.0 (from scikit-learn<2->mlflow)\n", + " Downloading joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)\n", + "Collecting threadpoolctl>=3.1.0 (from scikit-learn<2->mlflow)\n", + " Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from sqlalchemy<3,>=1.4.0->mlflow) (3.0.3)\n", + "Requirement already satisfied: anyio<5,>=3.6.2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from starlette->ray[client,default,serve]) (4.4.0)\n", + "Collecting markdown>=2.6.8 (from tensorboard~=2.20.0->tensorflow)\n", + " Downloading markdown-3.8.2-py3-none-any.whl.metadata (5.1 kB)\n", + "Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard~=2.20.0->tensorflow)\n", + " Downloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl.metadata (1.1 kB)\n", + "Collecting distlib<1,>=0.3.7 (from virtualenv!=20.21.1,>=20.0.24->ray[client,default,serve])\n", + " Downloading distlib-0.4.0-py2.py3-none-any.whl.metadata (5.2 kB)\n", + "Requirement already satisfied: platformdirs<5,>=3.9.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from virtualenv!=20.21.1,>=20.0.24->ray[client,default,serve]) (3.10.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray[client,default,serve]) (2023.12.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray[client,default,serve]) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray[client,default,serve]) (0.18.1)\n", + "Collecting opencensus-context>=0.1.3 (from opencensus->ray[client,default,serve])\n", + " Downloading opencensus_context-0.1.3-py2.py3-none-any.whl.metadata (3.3 kB)\n", + "Collecting google-api-core<3.0.0,>=1.0.0 (from opencensus->ray[client,default,serve])\n", + " Downloading google_api_core-2.25.1-py3-none-any.whl.metadata (3.0 kB)\n", + "Requirement already satisfied: h11>=0.8 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve]) (0.14.0)\n", + "Collecting httptools>=0.6.3 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n", + " Downloading httptools-0.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)\n", + "Collecting python-dotenv>=0.13 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n", + " Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)\n", + "Collecting uvloop>=0.15.1 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n", + " Downloading uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n", + "Collecting websockets>=10.4 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n", + " Downloading websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)\n", + "Requirement already satisfied: sniffio>=1.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from anyio<5,>=3.6.2->starlette->ray[client,default,serve]) (1.3.1)\n", + "Requirement already satisfied: pycparser in /opt/bitnami/miniconda/lib/python3.11/site-packages (from cffi>=1.14->cryptography<46,>=43.0.0->mlflow) (2.21)\n", + "Collecting google-auth~=2.0 (from databricks-sdk<1,>=0.20.0->mlflow-skinny==3.3.1->mlflow)\n", + " Downloading google_auth-2.40.3-py2.py3-none-any.whl.metadata (6.2 kB)\n", + "Collecting gitdb<5,>=4.0.1 (from gitpython<4,>=3.1.9->mlflow-skinny==3.3.1->mlflow)\n", + " Downloading gitdb-4.0.12-py3-none-any.whl.metadata (1.2 kB)\n", + "Collecting googleapis-common-protos<2.0.0,>=1.56.2 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve])\n", + " Downloading googleapis_common_protos-1.70.0-py3-none-any.whl.metadata (9.3 kB)\n", + "Collecting proto-plus<2.0.0,>=1.22.3 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve])\n", + " Downloading proto_plus-1.26.1-py3-none-any.whl.metadata (2.2 kB)\n", + "Collecting zipp>=3.20 (from importlib_metadata!=4.7.0,<9,>=3.7.0->mlflow-skinny==3.3.1->mlflow)\n", + " Downloading zipp-3.23.0-py3-none-any.whl.metadata (3.6 kB)\n", + "Collecting opentelemetry-semantic-conventions==0.57b0 (from opentelemetry-sdk<3,>=1.9.0->mlflow-skinny==3.3.1->mlflow)\n", + " Downloading opentelemetry_semantic_conventions-0.57b0-py3-none-any.whl.metadata (2.4 kB)\n", + "Collecting markdown-it-py>=2.2.0 (from rich->keras>=3.10.0->tensorflow)\n", + " Downloading markdown_it_py-4.0.0-py3-none-any.whl.metadata (7.3 kB)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from rich->keras>=3.10.0->tensorflow) (2.18.0)\n", + "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython<4,>=3.1.9->mlflow-skinny==3.3.1->mlflow)\n", + " Downloading smmap-5.0.2-py3-none-any.whl.metadata (4.3 kB)\n", + "Collecting cachetools<7,>=5.0.0 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading cachetools-5.5.2-py3-none-any.whl.metadata (5.4 kB)\n", + "Collecting pyasn1-modules>=0.2.1 (from google-auth~=2.0->databricks-sdk<1,>=0.20.0->mlflow-skinny==3.3.1->mlflow)\n", + " Downloading pyasn1_modules-0.4.2-py3-none-any.whl.metadata (3.5 kB)\n", + "Collecting rsa<5,>=3.1.4 (from google-auth~=2.0->databricks-sdk<1,>=0.20.0->mlflow-skinny==3.3.1->mlflow)\n", + " Downloading rsa-4.9.1-py3-none-any.whl.metadata (5.6 kB)\n", + "Collecting mdurl~=0.1 (from markdown-it-py>=2.2.0->rich->keras>=3.10.0->tensorflow)\n", + " Downloading mdurl-0.1.2-py3-none-any.whl.metadata (1.6 kB)\n", + "Collecting pyasn1<0.7.0,>=0.6.1 (from pyasn1-modules>=0.2.1->google-auth~=2.0->databricks-sdk<1,>=0.20.0->mlflow-skinny==3.3.1->mlflow)\n", + " Downloading pyasn1-0.6.1-py3-none-any.whl.metadata (8.4 kB)\n", + "Downloading numpy-2.3.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (16.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.9/16.9 MB\u001b[0m \u001b[31m119.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading mlflow-3.3.1-py3-none-any.whl (26.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m26.4/26.4 MB\u001b[0m \u001b[31m87.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading mlflow_skinny-3.3.1-py3-none-any.whl (2.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m104.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading mlflow_tracing-3.3.1-py3-none-any.whl (1.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m67.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tensorflow-2.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (620.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m620.6/620.6 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading absl_py-2.3.1-py3-none-any.whl (135 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m135.8/135.8 kB\u001b[0m \u001b[31m16.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)\n", + "Downloading cryptography-45.0.6-cp311-abi3-manylinux_2_34_x86_64.whl (4.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m121.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading fastapi-0.116.1-py3-none-any.whl (95 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m95.6/95.6 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading flask-3.1.2-py3-none-any.whl (103 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m103.3/103.3 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading flatbuffers-25.2.10-py2.py3-none-any.whl (30 kB)\n", + "Downloading gast-0.6.0-py3-none-any.whl (21 kB)\n", + "Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.5/57.5 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading graphene-3.4.3-py2.py3-none-any.whl (114 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.9/114.9 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading grpcio-1.74.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m114.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading gunicorn-23.0.0-py3-none-any.whl (85 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.0/85.0 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading h5py-3.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m128.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading keras-3.11.3-py3-none-any.whl (1.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m81.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl (24.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.5/24.5 MB\u001b[0m \u001b[31m102.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading matplotlib-3.10.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (8.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.7/8.7 MB\u001b[0m \u001b[31m133.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading ml_dtypes-0.5.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (4.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m108.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading opt_einsum-3.4.0-py3-none-any.whl (71 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pandas-2.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.4/12.4 MB\u001b[0m \u001b[31m138.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n", + "\u001b[?25hDownloading py_spy-0.4.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl (2.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.8/2.8 MB\u001b[0m \u001b[31m116.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pyarrow-21.0.0-cp311-cp311-manylinux_2_28_x86_64.whl (42.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.8/42.8 MB\u001b[0m \u001b[31m44.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading scikit_learn-1.7.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (9.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.7/9.7 MB\u001b[0m \u001b[31m144.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading scipy-1.16.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (35.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m35.4/35.4 MB\u001b[0m \u001b[31m56.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading starlette-0.47.3-py3-none-any.whl (72 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.0/73.0 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tensorboard-2.20.0-py3-none-any.whl (5.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m121.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading termcolor-3.1.0-py3-none-any.whl (7.7 kB)\n", + "Downloading virtualenv-20.34.0-py3-none-any.whl (6.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m125.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading wrapt-1.17.3-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl (82 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.4/82.4 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading aiohttp_cors-0.8.1-py3-none-any.whl (25 kB)\n", + "Downloading colorful-0.5.7-py2.py3-none-any.whl (201 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m201.5/201.5 kB\u001b[0m \u001b[31m24.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading opencensus-0.11.4-py2.py3-none-any.whl (128 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m128.2/128.2 kB\u001b[0m \u001b[31m18.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading smart_open-7.3.0.post1-py3-none-any.whl (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.9/61.9 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading watchfiles-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (453 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m453.1/453.1 kB\u001b[0m \u001b[31m49.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading blinker-1.9.0-py3-none-any.whl (8.5 kB)\n", + "Downloading cloudpickle-3.1.1-py3-none-any.whl (20 kB)\n", + "Downloading contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (355 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m355.2/355.2 kB\u001b[0m \u001b[31m40.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading cycler-0.12.1-py3-none-any.whl (8.3 kB)\n", + "Downloading databricks_sdk-0.64.0-py3-none-any.whl (703 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m703.4/703.4 kB\u001b[0m \u001b[31m52.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading distlib-0.4.0-py2.py3-none-any.whl (469 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m469.0/469.0 kB\u001b[0m \u001b[31m44.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fonttools-4.59.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (5.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m118.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading gitpython-3.1.45-py3-none-any.whl (208 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m208.2/208.2 kB\u001b[0m \u001b[31m25.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading google_api_core-2.25.1-py3-none-any.whl (160 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m160.8/160.8 kB\u001b[0m \u001b[31m21.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading graphql_core-3.2.6-py3-none-any.whl (203 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m203.4/203.4 kB\u001b[0m \u001b[31m28.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading graphql_relay-3.2.0-py3-none-any.whl (16 kB)\n", + "Downloading httptools-0.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (459 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m459.8/459.8 kB\u001b[0m \u001b[31m44.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading importlib_metadata-8.7.0-py3-none-any.whl (27 kB)\n", + "Downloading itsdangerous-2.2.0-py3-none-any.whl (16 kB)\n", + "Downloading joblib-1.5.1-py3-none-any.whl (307 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m33.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading kiwisolver-1.4.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (1.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m80.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading markdown-3.8.2-py3-none-any.whl (106 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.8/106.8 kB\u001b[0m \u001b[31m14.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading opencensus_context-0.1.3-py2.py3-none-any.whl (5.1 kB)\n", + "Downloading opentelemetry_api-1.36.0-py3-none-any.whl (65 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m65.6/65.6 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading opentelemetry_sdk-1.36.0-py3-none-any.whl (119 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m120.0/120.0 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading opentelemetry_semantic_conventions-0.57b0-py3-none-any.whl (201 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m201.6/201.6 kB\u001b[0m \u001b[31m23.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.6/6.6 MB\u001b[0m \u001b[31m136.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading pyparsing-3.2.3-py3-none-any.whl (111 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.1/111.1 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading python_dotenv-1.1.1-py3-none-any.whl (20 kB)\n", + "Downloading pytz-2025.2-py2.py3-none-any.whl (509 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m509.2/509.2 kB\u001b[0m \u001b[31m52.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading sqlparse-0.5.3-py3-none-any.whl (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.4/44.4 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl (6.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.6/6.6 MB\u001b[0m \u001b[31m133.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading threadpoolctl-3.6.0-py3-none-any.whl (18 kB)\n", + "Downloading tzdata-2025.2-py2.py3-none-any.whl (347 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m347.8/347.8 kB\u001b[0m \u001b[31m40.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading uvicorn-0.35.0-py3-none-any.whl (66 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.4/66.4 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.0/4.0 MB\u001b[0m \u001b[31m135.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (182 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m182.3/182.3 kB\u001b[0m \u001b[31m24.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading werkzeug-3.1.3-py3-none-any.whl (224 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m224.5/224.5 kB\u001b[0m \u001b[31m29.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading namex-0.1.0-py3-none-any.whl (5.9 kB)\n", + "Downloading optree-0.17.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (402 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m402.0/402.0 kB\u001b[0m \u001b[31m43.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading rich-14.1.0-py3-none-any.whl (243 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m243.4/243.4 kB\u001b[0m \u001b[31m31.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading gitdb-4.0.12-py3-none-any.whl (62 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.8/62.8 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading google_auth-2.40.3-py2.py3-none-any.whl (216 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m216.1/216.1 kB\u001b[0m \u001b[31m25.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading cachetools-5.5.2-py3-none-any.whl (10 kB)\n", + "Downloading googleapis_common_protos-1.70.0-py3-none-any.whl (294 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.5/294.5 kB\u001b[0m \u001b[31m39.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading markdown_it_py-4.0.0-py3-none-any.whl (87 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.3/87.3 kB\u001b[0m \u001b[31m12.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading proto_plus-1.26.1-py3-none-any.whl (50 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.2/50.2 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading zipp-3.23.0-py3-none-any.whl (10 kB)\n", + "Downloading mdurl-0.1.2-py3-none-any.whl (10.0 kB)\n", + "Downloading pyasn1_modules-0.4.2-py3-none-any.whl (181 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m181.3/181.3 kB\u001b[0m \u001b[31m23.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading rsa-4.9.1-py3-none-any.whl (34 kB)\n", + "Downloading smmap-5.0.2-py3-none-any.whl (24 kB)\n", + "Downloading pyasn1-0.6.1-py3-none-any.whl (83 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.1/83.1 kB\u001b[0m \u001b[31m10.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: pytz, py-spy, opencensus-context, namex, libclang, flatbuffers, distlib, colorful, zipp, wrapt, werkzeug, websockets, virtualenv, uvloop, uvicorn, tzdata, threadpoolctl, termcolor, tensorboard-data-server, sqlparse, smmap, python-dotenv, pyparsing, pyasn1, pyarrow, proto-plus, pillow, optree, opt_einsum, numpy, mdurl, markdown, kiwisolver, joblib, itsdangerous, httptools, gunicorn, grpcio, graphql-core, googleapis-common-protos, google_pasta, gast, fonttools, cycler, cloudpickle, cachetools, blinker, astunparse, absl-py, watchfiles, tensorboard, starlette, smart-open, scipy, rsa, pyasn1-modules, pandas, ml_dtypes, markdown-it-py, importlib_metadata, h5py, graphql-relay, gitdb, Flask, cryptography, contourpy, scikit-learn, rich, opentelemetry-api, matplotlib, graphene, google-auth, gitpython, fastapi, aiohttp-cors, opentelemetry-semantic-conventions, keras, google-api-core, databricks-sdk, tensorflow, opentelemetry-sdk, opencensus, mlflow-tracing, mlflow-skinny, mlflow\n", + "\u001b[33m WARNING: The script websockets is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script virtualenv is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script uvicorn is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script sqlformat is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script dotenv is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The scripts f2py and numpy-config are installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script markdown_py is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script gunicorn is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The scripts fonttools, pyftmerge, pyftsubset and ttx are installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script watchfiles is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script tensorboard is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The scripts pyrsa-decrypt, pyrsa-encrypt, pyrsa-keygen, pyrsa-priv2pub, pyrsa-sign and pyrsa-verify are installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script markdown-it is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script flask is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script fastapi is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The scripts import_pb_to_tensorboard, saved_model_cli, tensorboard, tf_upgrade_v2, tflite_convert and toco are installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script mlflow is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script mlflow is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "pyopenssl 24.1.0 requires cryptography<43,>=41.0.5, but you have cryptography 45.0.6 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed Flask-3.1.2 absl-py-2.3.1 aiohttp-cors-0.8.1 astunparse-1.6.3 blinker-1.9.0 cachetools-5.5.2 cloudpickle-3.1.1 colorful-0.5.7 contourpy-1.3.3 cryptography-45.0.6 cycler-0.12.1 databricks-sdk-0.64.0 distlib-0.4.0 fastapi-0.116.1 flatbuffers-25.2.10 fonttools-4.59.1 gast-0.6.0 gitdb-4.0.12 gitpython-3.1.45 google-api-core-2.25.1 google-auth-2.40.3 google_pasta-0.2.0 googleapis-common-protos-1.70.0 graphene-3.4.3 graphql-core-3.2.6 graphql-relay-3.2.0 grpcio-1.74.0 gunicorn-23.0.0 h5py-3.14.0 httptools-0.6.4 importlib_metadata-8.7.0 itsdangerous-2.2.0 joblib-1.5.1 keras-3.11.3 kiwisolver-1.4.9 libclang-18.1.1 markdown-3.8.2 markdown-it-py-4.0.0 matplotlib-3.10.5 mdurl-0.1.2 ml_dtypes-0.5.3 mlflow-3.3.1 mlflow-skinny-3.3.1 mlflow-tracing-3.3.1 namex-0.1.0 numpy-2.3.2 opencensus-0.11.4 opencensus-context-0.1.3 opentelemetry-api-1.36.0 opentelemetry-sdk-1.36.0 opentelemetry-semantic-conventions-0.57b0 opt_einsum-3.4.0 optree-0.17.0 pandas-2.3.2 pillow-11.3.0 proto-plus-1.26.1 py-spy-0.4.1 pyarrow-21.0.0 pyasn1-0.6.1 pyasn1-modules-0.4.2 pyparsing-3.2.3 python-dotenv-1.1.1 pytz-2025.2 rich-14.1.0 rsa-4.9.1 scikit-learn-1.7.1 scipy-1.16.1 smart-open-7.3.0.post1 smmap-5.0.2 sqlparse-0.5.3 starlette-0.47.3 tensorboard-2.20.0 tensorboard-data-server-0.7.2 tensorflow-2.20.0 termcolor-3.1.0 threadpoolctl-3.6.0 tzdata-2025.2 uvicorn-0.35.0 uvloop-0.21.0 virtualenv-20.34.0 watchfiles-1.1.0 websockets-15.0.1 werkzeug-3.1.3 wrapt-1.17.3 zipp-3.23.0\n" + ] + } + ], + "source": [ + "!pip install numpy mlflow tensorflow \"ray[serve,default,client]\"" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e8deec5c-6239-4087-8a4d-27c091e9fc3c", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-27 12:00:23.577265: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n", + "2025-08-27 12:00:23.626853: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2025-08-27 12:00:25.157402: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n", + "2025/08/27 12:00:27 INFO mlflow.tracking.fluent: Experiment with name 'Diabetes_Prediction_TensorFlow' does not exist. Creating a new experiment.\n", + "2025/08/27 12:00:27 WARNING mlflow.tracking.context.registry: Encountered unexpected error during resolving tags: 'getpwuid(): uid not found: 1001'\n", + "2025/08/27 12:00:27 WARNING mlflow.utils.git_utils: Failed to import Git (the Git executable is probably not on your PATH), so Git SHA is not available. Error: Failed to initialize: Bad git executable.\n", + "The git executable must be specified in one of the following ways:\n", + " - be included in your $PATH\n", + " - be set via $GIT_PYTHON_GIT_EXECUTABLE\n", + " - explicitly set via git.refresh()\n", + "\n", + "All git commands will error until this is rectified.\n", + "\n", + "This initial message can be silenced or aggravated in the future by setting the\n", + "$GIT_PYTHON_REFRESH environment variable. Use one of the following values:\n", + " - quiet|q|silence|s|silent|none|n|0: for no message or exception\n", + " - warn|w|warning|log|l|1: for a warning message (logging level CRITICAL, displayed by default)\n", + " - error|e|exception|raise|r|2: for a raised exception\n", + "\n", + "Example:\n", + " export GIT_PYTHON_REFRESH=quiet\n", + "\n", + "/tmp/models-cache/lib/python3.11/site-packages/keras/src/layers/core/dense.py:92: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n", + " super().__init__(activity_regularizer=activity_regularizer, **kwargs)\n", + "2025-08-27 12:00:29.352582: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "🏃 View run classy-wren-479 at: http://ai-starter-kit-mlflow:5000/#/experiments/1/runs/7ca28b8521a049dc8a014d4235909db3\n", + "🧪 View experiment at: http://ai-starter-kit-mlflow:5000/#/experiments/1\n" + ] + } + ], + "source": [ + "import mlflow\n", + "import mlflow.tensorflow\n", + "import numpy as np\n", + "\n", + "from sklearn.datasets import load_diabetes\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "from tensorflow.keras import layers\n", + "\n", + "# -------------------\n", + "# Prepare Data\n", + "# -------------------\n", + "data = load_diabetes()\n", + "X = data.data\n", + "y = data.target.reshape(-1, 1)\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y, test_size=0.2, random_state=42\n", + ")\n", + "\n", + "# -------------------\n", + "# Define Model\n", + "# -------------------\n", + "def create_model(input_dim):\n", + " model = keras.Sequential([\n", + " layers.Dense(64, activation=\"relu\", input_shape=(input_dim,)),\n", + " layers.Dense(32, activation=\"relu\"),\n", + " layers.Dense(1) # regression output\n", + " ])\n", + " model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mse\"])\n", + " return model\n", + "\n", + "input_dim = X_train.shape[1]\n", + "epochs = 50\n", + "batch_size = 32\n", + "\n", + "mlflow.set_experiment(\"Diabetes_Prediction_TensorFlow\")\n", + "\n", + "with mlflow.start_run():\n", + " mlflow.log_param(\"epochs\", epochs)\n", + " mlflow.log_param(\"batch_size\", batch_size)\n", + " mlflow.log_param(\"optimizer\", \"adam\")\n", + " mlflow.log_param(\"loss_fn\", \"mse\")\n", + " mlflow.log_param(\"input_features\", input_dim)\n", + "\n", + " model = create_model(input_dim)\n", + "\n", + " # Train\n", + " history = model.fit(\n", + " X_train, y_train,\n", + " validation_data=(X_test, y_test),\n", + " epochs=epochs,\n", + " batch_size=batch_size,\n", + " verbose=0\n", + " )\n", + "\n", + " # Evaluation\n", + " loss, mse = model.evaluate(X_test, y_test, verbose=0)\n", + " rmse = np.sqrt(mse)\n", + "\n", + " mlflow.log_metric(\"mse\", mse)\n", + " mlflow.log_metric(\"rmse\", rmse)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "556ae0b2-6fa6-4271-9e7d-553cd7056aab", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/bitnami/miniconda/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2025-08-27 12:00:35,162\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n", + "2025-08-27 12:00:35,747\tINFO worker.py:1514 -- Using address ray://ai-starter-kit-kuberay-head-svc:10001 set in the environment variable RAY_ADDRESS\n", + "2025-08-27 12:00:35,748\tINFO client_builder.py:244 -- Passing the following kwargs to ray.init() on the server: log_to_driver\n", + "SIGTERM handler is not set because current thread is not the main thread.\n", + "2025-08-27 12:00:40,043\tWARNING utils.py:1591 -- Python patch version mismatch: The cluster was started with:\n", + " Ray: 2.41.0\n", + " Python: 3.11.11\n", + "This process on Ray Client was started with:\n", + " Ray: 2.41.0\n", + " Python: 3.11.9\n", + "\n", + "\u001b[36m(ProxyActor pid=2818, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:48,855 proxy 10.23.2.212 -- Proxy starting on node 08fb6ed7e6841998dfb9cc9f99c999cd4317663f09d65db617dbd3dc (HTTP port: 8000).\n", + "\u001b[36m(ProxyActor pid=2818)\u001b[0m INFO 2025-08-27 05:00:48,855 proxy 10.23.2.212 -- Proxy starting on node 08fb6ed7e6841998dfb9cc9f99c999cd4317663f09d65db617dbd3dc (HTTP port: 8000).\n", + "INFO 2025-08-27 12:00:49,116 serve 124 -- Started Serve in namespace \"serve\".\n", + "\u001b[36m(ProxyActor pid=2818, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:49,087 proxy 10.23.2.212 -- Got updated endpoints: {}.\n", + "\u001b[36m(ProxyActor pid=2818)\u001b[0m INFO 2025-08-27 05:00:49,087 proxy 10.23.2.212 -- Got updated endpoints: {}.\n", + "\u001b[36m(ServeController pid=2739, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:49,258 controller 2739 -- Deploying new version of Deployment(name='TensorFlowMLflowDeployment', app='default') (initial target replicas: 1).\n", + "\u001b[36m(ServeController pid=2739)\u001b[0m INFO 2025-08-27 05:00:49,258 controller 2739 -- Deploying new version of Deployment(name='TensorFlowMLflowDeployment', app='default') (initial target replicas: 1).\n", + "\u001b[36m(ProxyActor pid=2818, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:49,262 proxy 10.23.2.212 -- Got updated endpoints: {Deployment(name='TensorFlowMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n", + "\u001b[36m(ProxyActor pid=2818)\u001b[0m INFO 2025-08-27 05:00:49,262 proxy 10.23.2.212 -- Got updated endpoints: {Deployment(name='TensorFlowMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n", + "\u001b[36m(ServeController pid=2739, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:49,363 controller 2739 -- Adding 1 replica to Deployment(name='TensorFlowMLflowDeployment', app='default').\n", + "\u001b[36m(ServeController pid=2739)\u001b[0m INFO 2025-08-27 05:00:49,363 controller 2739 -- Adding 1 replica to Deployment(name='TensorFlowMLflowDeployment', app='default').\n", + "\u001b[36m(ServeController pid=2739, ip=10.23.2.212)\u001b[0m WARNING 2025-08-27 05:01:19,429 controller 2739 -- Deployment 'TensorFlowMLflowDeployment' in application 'default' has 1 replicas that have taken more than 30s to be scheduled. This may be due to waiting for the cluster to auto-scale or for a runtime environment to be installed. Resources required for each replica: {\"CPU\": 1}, total resources available: {\"CPU\": 7.0}. Use `ray status` for more details.\n", + "\u001b[36m(ServeController pid=2739)\u001b[0m WARNING 2025-08-27 05:01:19,429 controller 2739 -- Deployment 'TensorFlowMLflowDeployment' in application 'default' has 1 replicas that have taken more than 30s to be scheduled. This may be due to waiting for the cluster to auto-scale or for a runtime environment to be installed. Resources required for each replica: {\"CPU\": 1}, total resources available: {\"CPU\": 7.0}. Use `ray status` for more details.\n", + "\u001b[36m(ServeController pid=2739, ip=10.23.2.212)\u001b[0m WARNING 2025-08-27 05:01:49,531 controller 2739 -- Deployment 'TensorFlowMLflowDeployment' in application 'default' has 1 replicas that have taken more than 30s to be scheduled. This may be due to waiting for the cluster to auto-scale or for a runtime environment to be installed. Resources required for each replica: {\"CPU\": 1}, total resources available: {\"CPU\": 7.0}. Use `ray status` for more details.\n", + "\u001b[36m(ServeController pid=2739)\u001b[0m WARNING 2025-08-27 05:01:49,531 controller 2739 -- Deployment 'TensorFlowMLflowDeployment' in application 'default' has 1 replicas that have taken more than 30s to be scheduled. This may be due to waiting for the cluster to auto-scale or for a runtime environment to be installed. Resources required for each replica: {\"CPU\": 1}, total resources available: {\"CPU\": 7.0}. Use `ray status` for more details.\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m 2025-08-27 05:02:02.743930: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m 2025-08-27 05:02:02.743930: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m 2025-08-27 05:02:07.748054: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m 2025-08-27 05:02:07.748054: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m Loading model...\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m Model loaded successfully.\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m Loading model...\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m Model loaded successfully.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO 2025-08-27 12:02:08,716 serve 124 -- Application 'default' is ready at http://127.0.0.1:8000/predict.\n", + "INFO 2025-08-27 12:02:08,718 serve 124 -- Deployed app 'default' successfully.\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m INFO 2025-08-27 05:02:08,967 default_TensorFlowMLflowDeployment 0zpbmyix 63d6b1b4-67f7-4a5c-ad92-6d7989e3cf5a -- CALL __call__ OK 150.7ms\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:02:08,967 default_TensorFlowMLflowDeployment 0zpbmyix 63d6b1b4-67f7-4a5c-ad92-6d7989e3cf5a -- CALL __call__ OK 150.7ms\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 85ms/step\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 85ms/step3.2.212)\u001b[0m \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[36m(ProxyActor pid=1775)\u001b[0m INFO 2025-08-27 05:02:09,833 proxy 10.23.2.213 -- Proxy starting on node 45179940bb5a43115519b525607191a8f1a059b70c5c61c14cee8a0f (HTTP port: 8000).\n", + "\u001b[36m(ProxyActor pid=1775, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:02:09,833 proxy 10.23.2.213 -- Proxy starting on node 45179940bb5a43115519b525607191a8f1a059b70c5c61c14cee8a0f (HTTP port: 8000).\n", + "\u001b[36m(ProxyActor pid=1775)\u001b[0m INFO 2025-08-27 05:02:09,992 proxy 10.23.2.213 -- Got updated endpoints: {Deployment(name='TensorFlowMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n", + "\u001b[36m(ProxyActor pid=1775, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:02:09,992 proxy 10.23.2.213 -- Got updated endpoints: {Deployment(name='TensorFlowMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import mlflow.tensorflow\n", + "import tensorflow as tf\n", + "from starlette.requests import Request\n", + "from typing import Dict\n", + "\n", + "from ray import serve\n", + "import ray\n", + "\n", + "\n", + "@serve.deployment(\n", + " ray_actor_options={\n", + " \"runtime_env\": {\n", + " \"pip\": [\"tensorflow\"]\n", + " },\n", + " }\n", + ")\n", + "class TensorFlowMLflowDeployment:\n", + " def __init__(self):\n", + " print(\"Loading model...\")\n", + " self.model = model\n", + " print(\"Model loaded successfully.\")\n", + "\n", + " async def __call__(self, input_data) -> Dict:\n", + " try:\n", + " if isinstance(input_data, Request):\n", + " data = await input_data.json()\n", + " else:\n", + " data = input_data\n", + " features = data.get(\"features\", None)\n", + " if features is None:\n", + " return {\"error\": \"Missing 'features' in request\"}\n", + " X = np.array(features).reshape(1, -1)\n", + "\n", + " # Make prediction with TensorFlow model\n", + " prediction = self.model.predict(X).flatten().tolist()\n", + "\n", + " return {\"prediction\": prediction}\n", + " except Exception as e:\n", + " return {\"error\": str(e)}\n", + "\n", + "\n", + "# Bind and deploy\n", + "app = TensorFlowMLflowDeployment.bind()\n", + "handle = serve.run(app, route_prefix=\"/predict\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e264af73-6634-412b-9cbc-86b79c18e775", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'prediction': [179.46218872070312]}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "json_data = {\"features\": [0.0380759, 0.0506801, 0.0616962, 0.0218724, -0.0442235, -0.0348208, -0.0434008, -0.00259226, 0.0199084, -0.0176461]}\n", + "response = handle.remote(json_data)\n", + "await response" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt new file mode 100644 index 000000000..ec119b4e1 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt @@ -0,0 +1,10 @@ +transformers +torch +tensorflow +huggingface_hub +numpy +ipywidgets +mlflow==2.19.0 +ollama +panel +ray==2.41.0 diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb new file mode 100644 index 000000000..19bc4bdb6 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb @@ -0,0 +1,104 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "id": "8048aa56-4549-4afa-b8b0-d111cc7020c3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0.7645573019981384, 0.14142529666423798], [0.13549786806106567, 0.5999557375907898]]\n" + ] + } + ], + "source": [ + "# Requires transformers>=4.51.0\n", + "\n", + "import torch\n", + "import torch.nn.functional as F\n", + "\n", + "from torch import Tensor\n", + "from transformers import AutoTokenizer, AutoModel\n", + "\n", + "\n", + "def last_token_pool(last_hidden_states: Tensor,\n", + " attention_mask: Tensor) -> Tensor:\n", + " left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])\n", + " if left_padding:\n", + " return last_hidden_states[:, -1]\n", + " else:\n", + " sequence_lengths = attention_mask.sum(dim=1) - 1\n", + " batch_size = last_hidden_states.shape[0]\n", + " return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]\n", + "\n", + "\n", + "def get_detailed_instruct(task_description: str, query: str) -> str:\n", + " return f'Instruct: {task_description}\\nQuery:{query}'\n", + "\n", + "# Each query must come with a one-sentence instruction that describes the task\n", + "task = 'Given a web search query, retrieve relevant passages that answer the query'\n", + "\n", + "queries = [\n", + " get_detailed_instruct(task, 'What is the capital of China?'),\n", + " get_detailed_instruct(task, 'Explain gravity')\n", + "]\n", + "# No need to add instruction for retrieval documents\n", + "documents = [\n", + " \"The capital of China is Beijing.\",\n", + " \"Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun.\"\n", + "]\n", + "input_texts = queries + documents\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen3-Embedding-0.6B', padding_side='left')\n", + "model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-0.6B')\n", + "\n", + "# We recommend enabling flash_attention_2 for better acceleration and memory saving.\n", + "# model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-0.6B', attn_implementation=\"flash_attention_2\", torch_dtype=torch.float16).cuda()\n", + "\n", + "max_length = 8192\n", + "\n", + "# Tokenize the input texts\n", + "batch_dict = tokenizer(\n", + " input_texts,\n", + " padding=True,\n", + " truncation=True,\n", + " max_length=max_length,\n", + " return_tensors=\"pt\",\n", + ")\n", + "batch_dict.to(model.device)\n", + "outputs = model(**batch_dict)\n", + "embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])\n", + "\n", + "# normalize embeddings\n", + "embeddings = F.normalize(embeddings, p=2, dim=1)\n", + "scores = (embeddings[:2] @ embeddings[2:].T)\n", + "print(scores.tolist())\n", + "# [[0.7645568251609802, 0.14142508804798126], [0.13549736142158508, 0.5999549627304077]]\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/networkpolicy.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/networkpolicy.yaml new file mode 100644 index 000000000..d985d3ba8 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/networkpolicy.yaml @@ -0,0 +1,46 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-singleuser-egress-to-ray + namespace: default +spec: + podSelector: + matchLabels: + app: jupyterhub + component: singleuser-server + release: ai-starter-kit + policyTypes: ["Egress"] + egress: + - to: + - podSelector: + matchLabels: + ray.io/node-type: head + ports: + - protocol: TCP + port: 8265 + - protocol: TCP + port: 8000 + - protocol: TCP + port: 10001 +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-singleuser-egress-to-mlflow + namespace: default +spec: + podSelector: + matchLabels: + app: jupyterhub + component: singleuser-server + release: ai-starter-kit + policyTypes: ["Egress"] + egress: + - to: + - podSelector: + matchLabels: + app.kubernetes.io/name: mlflow + app.kubernetes.io/instance: ai-starter-kit + ports: + - protocol: TCP + port: 5000 \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/NOTES.txt b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/NOTES.txt new file mode 100644 index 000000000..4e33a20ed --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/NOTES.txt @@ -0,0 +1 @@ +AI Starter Kit installed. Enjoy \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/_helpers.tpl b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/_helpers.tpl new file mode 100644 index 000000000..cf0c5e081 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "ai-starter-kit.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "ai-starter-kit.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "ai-starter-kit.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "ai-starter-kit.labels" -}} +helm.sh/chart: {{ include "ai-starter-kit.chart" . }} +{{ include "ai-starter-kit.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "ai-starter-kit.selectorLabels" -}} +app.kubernetes.io/name: {{ include "ai-starter-kit.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "ai-starter-kit.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "ai-starter-kit.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml new file mode 100644 index 000000000..e03429ee9 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml @@ -0,0 +1,18 @@ +--- +{{- /* +Create a single ConfigMap with all initialization files for the jupyterhub singleuser pod. +This ConfigMap is mounted as a volume. +*/ -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: ai-starter-kit-init-files + labels: + app.kubernetes.io/managed-by: {{ $.Release.Service | quote }} + app.kubernetes.io/instance: {{ $.Release.Name | quote }} + helm.sh/chart: "{{ $.Chart.Name }}-{{ $.Chart.Version }}" +data: +{{- range $path, $bytes := .Files.Glob "files/*" }} + {{ base $path | quote }}: |- +{{ $bytes | toString | nindent 4 }} +{{- end }} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml new file mode 100644 index 000000000..308b0a94a --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml @@ -0,0 +1,13 @@ +{{- if .Values.huggingface.token }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Release.Name }}-hf-token-secret + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" +type: Opaque +stringData: + token: {{ .Values.huggingface.token }} +{{- end }} \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/local-pv.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/local-pv.yaml new file mode 100644 index 000000000..0797b93e3 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/local-pv.yaml @@ -0,0 +1,16 @@ +{{- if .Values.localPersistence.enabled }} +apiVersion: v1 +kind: PersistentVolume +metadata: + name: {{ .Release.Name }}-models-cache-pv + labels: + type: local +spec: + storageClassName: manual + capacity: + storage: {{ .Values.modelsCachePvc.size }} + accessModes: + - ReadWriteOnce + hostPath: + path: "{{ .Values.localPersistence.hostPath }}" +{{- end }} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-ray.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-ray.yaml new file mode 100644 index 000000000..cb4ae5b1d --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-ray.yaml @@ -0,0 +1,28 @@ +{{- if .Values.rayPvc.enabled -}} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ .Release.Name }}-ray-pvc + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" +spec: + accessModes: + {{- toYaml .Values.rayPvc.accessModes | nindent 4 }} + {{- if .Values.localPersistence.enabled }} + storageClassName: manual + {{- else }} + {{- /* + If storageClassName is set to a specific class, it will be used. + If storageClassName is set to an empty string (""), no storage class will be used for provisioning. + If storageClassName is null or omitted, the default storage class will be used. + */}} + {{- if or .Values.rayPvc.storageClassName (eq .Values.rayPvc.storageClassName "") }} + storageClassName: {{ .Values.rayPvc.storageClassName | quote }} + {{- end }} + {{- end }} + resources: + requests: + storage: {{ .Values.rayPvc.size }} +{{- end -}} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml new file mode 100644 index 000000000..36ba98fdc --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml @@ -0,0 +1,28 @@ +{{- if .Values.modelsCachePvc.enabled -}} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ .Release.Name }}-models-cache-pvc + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" +spec: + accessModes: + {{- toYaml .Values.modelsCachePvc.accessModes | nindent 4 }} + {{- if .Values.localPersistence.enabled }} + storageClassName: manual + {{- else }} + {{- /* + If storageClassName is set to a specific class, it will be used. + If storageClassName is set to an empty string (""), no storage class will be used for provisioning. + If storageClassName is null or omitted, the default storage class will be used. + */}} + {{- if or .Values.modelsCachePvc.storageClassName (eq .Values.modelsCachePvc.storageClassName "") }} + storageClassName: {{ .Values.modelsCachePvc.storageClassName | quote }} + {{- end }} + {{- end }} + resources: + requests: + storage: {{ .Values.modelsCachePvc.size }} +{{- end -}} \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/ramalama-deployment.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/ramalama-deployment.yaml new file mode 100644 index 000000000..0d88703d8 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/ramalama-deployment.yaml @@ -0,0 +1,49 @@ +--- +{{- if .Values.ramalama.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "ai-starter-kit.fullname" . }}-ramalama + labels: + {{- include "ai-starter-kit.labels" . | nindent 4 }} + app.kubernetes.io/component: ramalama +spec: + selector: + matchLabels: + {{- include "ai-starter-kit.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: ramalama + template: + metadata: + labels: + {{- include "ai-starter-kit.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: ramalama + spec: + containers: + - name: ramalama + image: "{{ .Values.ramalama.image.repository }}:{{ .Values.ramalama.image.tag }}" + imagePullPolicy: {{ .Values.ramalama.image.pullPolicy }} + command: + {{- toYaml .Values.ramalama.command | nindent 10 }} + ports: + - name: http + containerPort: 8080 + protocol: TCP +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "ai-starter-kit.fullname" . }}-ramalama + labels: + {{- include "ai-starter-kit.labels" . | nindent 4 }} + app.kubernetes.io/component: ramalama +spec: + type: ClusterIP + ports: + - name: http + port: 8080 + targetPort: http + protocol: TCP + selector: + {{- include "ai-starter-kit.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: ramalama +{{- end }} \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml new file mode 100644 index 000000000..02e4ea619 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml @@ -0,0 +1,121 @@ +jupyterhub: + singleuser: + initContainers: + # This init cntainer makes sure that home folder that we mount has correct owner + - name: chown-home-mount-dir + image: jupyterhub/k8s-singleuser-sample:4.2.0 + securityContext: + runAsUser: 0 + command: ["chown", "jovyan", "/home/jovyan"] + volumeMounts: + - name: home + mountPath: /home/jovyan + subPath: jupyterhub_workspace + + - name: model-initializer + image: jupyterhub/k8s-singleuser-sample:4.2.0 + env: + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: ai-starter-kit-hf-token-secret + key: token + command: + - /bin/sh + - -c + - | + set -e + pip install -r /tmp/requirements.txt + + python /tmp/download_models.py + + # populate workspace with initial notebook files + for f in /tmp/*.ipynb; do + if [ -f "$f" ]; then + # Use cp -n to not overwrite existing files. + cp -n "$f" /home/jovyan/ + fi + done + volumeMounts: + # This 'home' volume is created by the helm chart's 'homeMountPath' option. + # We mount it to initContainer too, so all downloads and installations are persisted in this mounted home folder. + - name: home + mountPath: /home/jovyan + subPath: jupyterhub_workspace + - name: init-files + mountPath: /tmp + readOnly: true + resources: + requests: + cpu: "2" + memory: 16Gi + ephemeral-storage: 10Gi + limits: + cpu: "4" + memory: 32Gi + ephemeral-storage: 10Gi + cloudMetadata: + # Without this disabled, the GKE Autopilot Warden will raise an error about container with escalated privilieges + blockWithIptables: false + memory: + limit: 32G + guarantee: 16G + extraResource: + limits: + ephemeral-storage: '10G' + guarantees: + ephemeral-storage: '10G' + scheduling: + userScheduler: + # For now we disable the scheduler because GKE Autopilot Warden does not allow using of a custom scheduler + enabled: false + +ray-cluster: + enabled: true + image: + tag: 2.41.0-py312-gpu + head: + resources: + requests: + cpu: "4" + memory: "4G" + ephemeral-storage: 10Gi + limits: + cpu: "8" + memory: "6G" + ephemeral-storage: 10Gi + worker: + containerEnv: + - name: PYTHONPATH + value: "/mnt/ray-storage/libraries" + - name: TMPDIR + value: "/mnt/ray-storage/temp" + resources: + requests: + cpu: "4" + memory: "4G" + ephemeral-storage: 10Gi + limits: + cpu: "8" + memory: "6G" + ephemeral-storage: 10Gi + volumes: + - name: ray-pvc-storage + persistentVolumeClaim: + claimName: "ai-starter-kit-ray-pvc" # this value should'n be hardcoded. The actual value should be: {{ .Release.Name }}-models-cache-pvc + volumeMounts: + - name: ray-pvc-storage + mountPath: /mnt/ray-storage + podSecurityContext: + fsGroup: 1000 + +rayPvc: + enabled: true + +modelsCachePvc: + storageClassName: "standard-rwx" + accessModes: + - ReadWriteMany + +localPersistence: + enabled: false diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml new file mode 100644 index 000000000..c770d519f --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml @@ -0,0 +1,192 @@ +jupyterhub: + nameOverride: "jupyterhub" + # This value has to be null in order to apply release name on this chart's resources. + # https://github.com/jupyterhub/zero-to-jupyterhub-k8s/blob/b4b51301ac886511c643cc5d428b15ff38006bee/jupyterhub/values.yaml#L1 + fullnameOverride: + + singleuser: + networkPolicy: + enabled: false + defaultUrl: "/lab/tree/welcome.ipynb" + image: + name: jupyterhub/k8s-singleuser-sample + tag: "4.2.0" + initContainers: + # This init cntainer makes sure that home folder that we mount has correct owner + - name: chown-home-mount-dir + image: jupyterhub/k8s-singleuser-sample:4.2.0 + securityContext: + runAsUser: 0 + command: ["chown", "jovyan", "/home/jovyan"] + volumeMounts: + - name: home + mountPath: /home/jovyan + subPath: jupyterhub_workspace + + - name: model-initializer + image: jupyterhub/k8s-singleuser-sample:4.2.0 + env: + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: ai-starter-kit-hf-token-secret + key: token + command: + - /bin/sh + - -c + - | + set -e + pip install -r /tmp/requirements.txt + + python /tmp/download_models.py + + # populate workspace with initial notebook files + for f in /tmp/*.ipynb; do + if [ -f "$f" ]; then + # Use cp -n to not overwrite existing files. + cp -n "$f" /home/jovyan/ + fi + done + volumeMounts: + # This 'home' volume is created by the helm chart's 'homeMountPath' option. + # We mount it to initContainer too, so all downloads and installations are persisted in this mounted home folder. + - name: home + mountPath: /home/jovyan + subPath: jupyterhub_workspace + - name: init-files + mountPath: /tmp + readOnly: true + + storage: + type: static + static: + pvcName: "ai-starter-kit-models-cache-pvc" + subPath: "jupyterhub_workspace" + capacity: 20Gi + homeMountPath: /home/jovyan + extraVolumes: + - name: init-files + configMap: + name: "ai-starter-kit-init-files" + # This environment variables list have its own format: https://z2jh.jupyter.org/en/latest/resources/reference.html#singleuser-extraenv + extraEnv: + HF_TOKEN: + name: HF_TOKEN + valueFrom: + secretKeyRef: + name: ai-starter-kit-hf-token-secret + key: token + RAY_ADDRESS: "ray://ai-starter-kit-kuberay-head-svc:10001" + MLFLOW_TRACKING_URI: "http://ai-starter-kit-mlflow:5000" + hub: + networkPolicy: + enabled: false + db: + type: sqlite-pvc + pvc: + annotations: + # Without this helm will not keep the pvc after uninstallation + # https://github.com/jupyterhub/zero-to-jupyterhub-k8s/issues/3718 + helm.sh/resource-policy: keep + extraConfig: + 00-dummy-authenticator: | + c.DummyAuthenticator.password = "sneakypass" + 01-spawner-timeouts: | + c.KubeSpawner.start_timeout = 1800 + proxy: + chp: + networkPolicy: + enabled: false + traefik: + networkPolicy: + enabled: false + +ray-cluster: + image: + tag: "2.41.0-py312-cpu-aarch64" + head: + serviceType: ClusterIP + resources: + requests: + cpu: "1" + memory: "2G" + ephemeral-storage: 10Gi + limits: + cpu: "4" + memory: "8G" + ephemeral-storage: 10Gi + worker: + resources: + requests: + cpu: "1" + memory: "2G" + ephemeral-storage: 10Gi + limits: + cpu: "4" + memory: "8G" + ephemeral-storage: 10Gi + +mlflow: + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 1000m + memory: 1Gi + +huggingface: + # Provide your Hugging Face token here to download gated or private models. + # It is recommended to set this via --set or a separate values file, e.g., + # --set huggingface.token=hf_... + token: "" + +rayPvc: + enabled: false + storageClassName: "standard-rwo" + accessModes: + - ReadWriteOnce + size: 100Gi + +modelsCachePvc: + enabled: true + # To use the default StorageClass, set storageClassName to null or omit it. + # To use a specific StorageClass (e.g. "standard-rwo" on GKE), provide its name. + # To create a PVC that doesn't request any StorageClass, set it to an empty string (""). + storageClassName: "standard-rwo" + accessModes: + - ReadWriteOnce + size: 10Gi + +localPersistence: + # For local development with minikube, this allows persisting the models-cache + # on the host machine, surviving `minikube stop/start`. + # 1. Create a directory on your host: `mkdir -p /data/models-cache` + # 2. Start minikube with the mount: `minikube start --mount --mount-string="/data/models-cache:/data/models-cache"` + # 3. Set enabled to true below, or via `--set localPersistence.enabled=true` + enabled: true + # This path must match the destination path inside the minikube node. + hostPath: "/tmp/models-cache" + +ollama: + enabled: true + ollama: + models: + pull: + - gemma3 + persistentVolume: + enabled: true + existingClaim: "ai-starter-kit-models-cache-pvc" + subPath: "ollama" + + +ramalama: + enabled: true + command: + - /bin/sh + - -c + - ramalama pull qwen2.5:1.5b && ramalama serve qwen2.5:1.5b --port 8080 + image: + repository: "quay.io/ramalama/ramalama" + tag: "latest" + pullPolicy: IfNotPresent diff --git a/ai/ai-starter-kit/notebooks/multi-agent.ipynb b/ai/ai-starter-kit/notebooks/multi-agent.ipynb new file mode 100644 index 000000000..ea2f3caa1 --- /dev/null +++ b/ai/ai-starter-kit/notebooks/multi-agent.ipynb @@ -0,0 +1,621 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "079fadd2-200e-4d37-8ae2-be2792e3a24e", + "metadata": {}, + "source": [ + "### Cell 1 - Initialize Ray endpoints and verify dashboard\n", + "\n", + "Installs requests, derives the Ray head host from RAY_ADDRESS, builds Dashboard/Serve/MLflow URLs, reads an Hugging Face token, and prints the endpoints plus the Jobs API version for a quick health check." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79db57cd-fb72-4b10-b0fb-5e9cd5c007b6", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install requests==2.* --disable-pip-version-check\n", + "\n", + "import os, textwrap, base64, time, json, requests\n", + "from string import Template\n", + "\n", + "raw_addr = os.getenv(\"RAY_ADDRESS\", \"ray://ai-starter-kit-kuberay-head-svc:10001\")\n", + "if raw_addr.startswith(\"ray://\"):\n", + " HEAD_HOST = raw_addr.split(\"://\", 1)[1].split(\":\", 1)[0]\n", + "else:\n", + " HEAD_HOST = raw_addr.split(\":\", 1)[0] or \"ai-starter-kit-kuberay-head-svc\"\n", + "\n", + "DASH_URL = f\"http://{HEAD_HOST}:8265\"\n", + "SERVE_PORT = int(os.getenv(\"SERVE_PORT\", \"8000\"))\n", + "SERVE_ROUTE = \"/v1\"\n", + "\n", + "HF_TOKEN_PATH = \"/etc/secrets/huggingface/token\"\n", + "HF_TOKEN = \"\"\n", + "if os.path.exists(HF_TOKEN_PATH):\n", + " try:\n", + " HF_TOKEN = open(HF_TOKEN_PATH).read().strip()\n", + " except Exception:\n", + " HF_TOKEN = \"\"\n", + "\n", + "print(\"Head host:\", HEAD_HOST)\n", + "print(\"Jobs API :\", f\"{DASH_URL}/api/jobs/\")\n", + "print(\"Serve URL:\", f\"http://{HEAD_HOST}:{SERVE_PORT}{SERVE_ROUTE}\")\n", + "print(\"MLflow :\", os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\"))\n", + "\n", + "print(\"Jobs API version:\", requests.get(f\"{DASH_URL}/api/version\", timeout=10).json())\n" + ] + }, + { + "cell_type": "markdown", + "id": "fe862173-fd9a-41ae-a27b-63875f788024", + "metadata": {}, + "source": [ + "### Cell 2 - Deploy a minimal Ray Serve smoke test and verify readiness\n", + "\n", + "Submits a tiny FastAPI app to Ray Serve (one /healthz endpoint under /smoke) as a Ray Job, installing FastAPI on the fly. It polls the Jobs API for status and hits :8000/smoke/healthz up to 60 seconds, printing when the service responds 200 (i.e., smoke test passes)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34da3e26-6276-48b7-b3ac-c90359df6547", + "metadata": {}, + "outputs": [], + "source": [ + "import os, base64, textwrap, time, requests\n", + "\n", + "DASH_URL = \"http://ai-starter-kit-kuberay-head-svc:8265\"\n", + "\n", + "print(\"Jobs API:\", requests.get(f\"{DASH_URL}/api/version\", timeout=10).json())\n", + "\n", + "serve_py = textwrap.dedent(\"\"\"\n", + " from fastapi import FastAPI\n", + " from ray import serve\n", + " serve.start(detached=True, http_options={\"host\":\"0.0.0.0\",\"port\":8000})\n", + " app = FastAPI()\n", + "\n", + " @serve.deployment(name=\"smoke\", num_replicas=1)\n", + " @serve.ingress(app)\n", + " class Smoke:\n", + " @app.get(\"/healthz\")\n", + " async def health(self): return {\"ok\": True}\n", + "\n", + " serve.run(Smoke.bind(), route_prefix=\"/smoke\")\n", + " print(\"READY: smoke\", flush=True)\n", + "\"\"\").strip()\n", + "\n", + "b64 = base64.b64encode(serve_py.encode()).decode()\n", + "entry = f'python -c \"import base64; exec(base64.b64decode(\\'{b64}\\'))\"'\n", + "submit = requests.post(f\"{DASH_URL}/api/jobs/\", json={\"entrypoint\": entry, \"runtime_env\": {\"pip\": [\"fastapi>=0.110\"]}}, timeout=60).json()\n", + "job_id = submit[\"job_id\"]\n", + "print(\"Job:\", job_id)\n", + "\n", + "svc = \"http://ai-starter-kit-kuberay-head-svc:8000/smoke/healthz\"\n", + "for i in range(60):\n", + " s = requests.get(f\"{DASH_URL}/api/jobs/{job_id}\", timeout=10).json()[\"status\"]\n", + " try:\n", + " r = requests.get(svc, timeout=2)\n", + " print(f\"tick {i:02d}: job={s}, health={r.status_code}\")\n", + " if r.status_code == 200:\n", + " print(\"Smoke OK\")\n", + " break\n", + " except Exception as e:\n", + " print(f\"tick {i:02d}: job={s}, health=ERR {e}\")\n", + " time.sleep(1)" + ] + }, + { + "cell_type": "markdown", + "id": "8111d705-595e-4e65-8479-bdc76191fa31", + "metadata": {}, + "source": [ + "### Cell 3 - Deploy model on Ray Serve with llama-cpp\n", + "\n", + "Packages and submits a Ray Job that spins up a Ray Serve app exposing /v1/healthz and /v1/chat/completions. It downloads the preferred GGUF from Hugging Face, initializes llama-cpp-python, logs to MLflow, and prints the deployed health/chat URLs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbea1539-e9ab-460a-9cfc-20a42807f616", + "metadata": {}, + "outputs": [], + "source": [ + "import os, base64, textwrap, requests\n", + "\n", + "HEAD = os.environ.get(\"RAY_HEAD_SVC\", \"ai-starter-kit-kuberay-head-svc\")\n", + "DASH_URL = f\"http://{HEAD}:8265\"\n", + "SERVE_PORT = 8000\n", + "SERVE_ROUTE = \"/v1\"\n", + "\n", + "runtime_env = {\n", + " \"pip\": [\n", + " \"fastapi==0.110.0\",\n", + " \"uvicorn==0.23.2\",\n", + " \"huggingface_hub==0.25.2\",\n", + " \"llama-cpp-python==0.3.16\", \n", + " \"hf_transfer==0.1.6\",\n", + " \"mlflow==2.14.3\", \n", + " ],\n", + " \"env_vars\": {\n", + " \"HF_HUB_ENABLE_HF_TRANSFER\": \"1\",\n", + " \"HUGGINGFACE_HUB_TOKEN\": os.environ.get(\"HUGGINGFACE_HUB_TOKEN\", \"\"),\n", + " \"SERVE_PORT\": str(SERVE_PORT),\n", + "\n", + " \"MODEL_REPO\": \"Qwen/Qwen2.5-1.5B-Instruct-GGUF\",\n", + " \"GGUF_PREF_ORDER\": \"q4_k_m,q4_0,q3_k_m,q2_k\",\n", + "\n", + " \"LLM_CONTEXT\": os.environ.get(\"LLM_CONTEXT\", \"1024\"),\n", + " \"LLM_MAX_TOKENS\": os.environ.get(\"LLM_MAX_TOKENS\", \"256\"),\n", + " \"SERVER_MAX_NEW_TOKENS\": os.environ.get(\"SERVER_MAX_NEW_TOKENS\", \"512\"),\n", + "\n", + " \"LLM_THREADS\": os.environ.get(\"LLM_THREADS\", \"6\"),\n", + " \"OMP_NUM_THREADS\": os.environ.get(\"OMP_NUM_THREADS\", \"6\"),\n", + " \"GPU_LAYERS\": \"0\", \n", + " \n", + " \"PIP_PREFER_BINARY\": \"1\",\n", + " \"CMAKE_ARGS\": \"-DGGML_OPENMP=OFF -DLLAMA_NATIVE=OFF\",\n", + "\n", + " \"HF_HOME\": \"/tmp/hf-cache\",\n", + " \"TRANSFORMERS_CACHE\": \"/tmp/hf-cache\",\n", + "\n", + " \"MLFLOW_TRACKING_URI\": os.environ.get(\"MLFLOW_TRACKING_URI\", \"\"),\n", + " \"MLFLOW_EXPERIMENT_NAME\": os.environ.get(\"MLFLOW_EXPERIMENT_NAME\", \"ray-llama-cpp\"),\n", + " },\n", + "}\n", + "\n", + "serve_py = textwrap.dedent(f\"\"\"\n", + "import os, time, multiprocessing, uuid\n", + "from typing import List, Dict, Any\n", + "from fastapi import FastAPI, Request\n", + "from fastapi.responses import JSONResponse\n", + "from huggingface_hub import HfApi, hf_hub_download\n", + "from ray import serve\n", + "from llama_cpp import Llama\n", + "\n", + "USE_MLFLOW = False\n", + "try:\n", + " import mlflow\n", + " if os.getenv(\"MLFLOW_TRACKING_URI\"):\n", + " mlflow.set_tracking_uri(os.getenv(\"MLFLOW_TRACKING_URI\"))\n", + " mlflow.set_experiment(os.getenv(\"MLFLOW_EXPERIMENT_NAME\",\"ray-llama-cpp\"))\n", + " USE_MLFLOW = True\n", + "except Exception as _e:\n", + " USE_MLFLOW = False\n", + "\n", + "SERVE_PORT = int(os.getenv(\"SERVE_PORT\", \"{SERVE_PORT}\"))\n", + "SERVE_ROUTE = \"{SERVE_ROUTE}\"\n", + "MODEL_REPO = os.getenv(\"MODEL_REPO\", \"Qwen/Qwen2.5-1.5B-Instruct-GGUF\")\n", + "GGUF_PREFS = [s.strip() for s in os.getenv(\"GGUF_PREF_ORDER\",\"q4_k_m,q4_0,q3_k_m,q2_k\").split(\",\") if s.strip()]\n", + "CTX_LEN = int(os.getenv(\"LLM_CONTEXT\", \"2048\"))\n", + "MAX_TOKENS = int(os.getenv(\"LLM_MAX_TOKENS\", \"256\"))\n", + "HF_TOKEN = os.getenv(\"HUGGINGFACE_HUB_TOKEN\") or None\n", + "\n", + "serve.start(detached=True, http_options={{\"host\":\"0.0.0.0\", \"port\":SERVE_PORT}})\n", + "app = FastAPI()\n", + "\n", + "def pick_one_file(repo_id: str, prefs):\n", + " api = HfApi()\n", + " files = api.list_repo_files(repo_id=repo_id, repo_type=\"model\", token=HF_TOKEN)\n", + " ggufs = [f for f in files if f.lower().endswith(\".gguf\")]\n", + " if not ggufs:\n", + " raise RuntimeError(f\"No .gguf files visible in {{repo_id}}\")\n", + " for pref in prefs:\n", + " for f in ggufs:\n", + " if pref.lower() in f.lower():\n", + " return f\n", + " return ggufs[0]\n", + "\n", + "def pick_chat_format(repo: str, fname: str) -> str:\n", + " return \"qwen\"\n", + "\n", + "@serve.deployment(name=\"qwen\", num_replicas=1, ray_actor_options={{\"num_cpus\": 6}})\n", + "@serve.ingress(app)\n", + "class OpenAICompatLlama:\n", + " def __init__(self, repo_id: str = MODEL_REPO):\n", + " target = pick_one_file(repo_id, GGUF_PREFS)\n", + " print(f\"[env] model repo: {{repo_id}} file: {{target}}\", flush=True)\n", + " local_dir = \"/tmp/hf-gguf\"; os.makedirs(local_dir, exist_ok=True)\n", + "\n", + " gguf_path = hf_hub_download(\n", + " repo_id=repo_id, filename=target, token=HF_TOKEN,\n", + " local_dir=local_dir, local_dir_use_symlinks=False,\n", + " force_download=False, resume_download=True\n", + " )\n", + " print(f\"[download] done: {{gguf_path}}\", flush=True)\n", + "\n", + " n_threads = int(os.getenv(\"LLM_THREADS\", max(2, (multiprocessing.cpu_count() or 4)//2)))\n", + " print(f\"[load] llama-cpp-python | ctx={{CTX_LEN}} threads={{n_threads}} gpu_layers={{int(os.getenv('GPU_LAYERS','0'))}}\", flush=True)\n", + "\n", + " self.model_file = os.path.basename(gguf_path)\n", + " self.model_repo = repo_id\n", + " chat_format = pick_chat_format(self.model_repo, self.model_file)\n", + " print(f\"[load] chat_format={{chat_format}}\", flush=True)\n", + "\n", + " self.llm = Llama(\n", + " model_path=gguf_path,\n", + " n_ctx=CTX_LEN,\n", + " n_threads=n_threads,\n", + " n_batch=256, \n", + " n_gpu_layers=int(os.getenv(\"GPU_LAYERS\",\"0\")),\n", + " chat_format=chat_format,\n", + " verbose=False\n", + " )\n", + " print(\"[ready] model loaded\", flush=True)\n", + "\n", + " @app.get(\"/healthz\")\n", + " async def health(self):\n", + " return {{\"status\":\"ok\"}}\n", + "\n", + " @app.post(\"/chat/completions\")\n", + " async def chat_completions(self, request: Request):\n", + " t0 = time.time()\n", + " body = await request.json()\n", + "\n", + " messages = body.get(\"messages\", [])\n", + " temperature = float(body.get(\"temperature\", 0.2))\n", + " req_max = body.get(\"max_tokens\", None)\n", + " stop_words = (body.get(\"stop\", []) or []) + [\"<|im_end|>\", \"\"]\n", + "\n", + " SERVER_MAX = int(os.getenv(\"SERVER_MAX_NEW_TOKENS\", \"512\"))\n", + " max_tokens = int(req_max if isinstance(req_max, int) else MAX_TOKENS)\n", + " max_tokens = max(32, min(max_tokens, CTX_LEN - 128, SERVER_MAX))\n", + "\n", + " rid = \"chatcmpl-\" + uuid.uuid4().hex[:24]\n", + " created = int(time.time())\n", + " model_name = f\"{{self.model_repo}}/{{self.model_file}}\"\n", + "\n", + " try:\n", + " result = self.llm.create_chat_completion(\n", + " messages=messages,\n", + " temperature=temperature,\n", + " max_tokens=max_tokens,\n", + " top_k=50,\n", + " top_p=0.9,\n", + " repeat_penalty=1.1,\n", + " stop=stop_words,\n", + " )\n", + " out_text = (result[\"choices\"][0][\"message\"][\"content\"] or \"\").strip()\n", + " usage_raw = result.get(\"usage\") or {{}}\n", + " p_tokens = int(usage_raw.get(\"prompt_tokens\") or 0)\n", + " c_tokens = int(usage_raw.get(\"completion_tokens\") or 0)\n", + " err = None\n", + " except Exception as e:\n", + " out_text = \"\"\n", + " p_tokens = c_tokens = 0\n", + " err = str(e)\n", + "\n", + " if USE_MLFLOW:\n", + " try:\n", + " dur_ms = int((time.time()-t0) * 1000)\n", + " with mlflow.start_run(run_name=\"chat\"):\n", + " mlflow.set_tags({{\n", + " \"model_repo\": self.model_repo,\n", + " \"model_file\": self.model_file,\n", + " \"framework\": \"llama-cpp-python\",\n", + " }})\n", + " mlflow.log_params({{\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"ctx\": CTX_LEN,\n", + " }})\n", + " if not (p_tokens and c_tokens):\n", + " p_tokens = p_tokens or max(1, len(\" \".join(m.get(\"content\",\"\") for m in messages).split()))\n", + " c_tokens = c_tokens or max(0, len(out_text.split()))\n", + " mlflow.log_metrics({{\n", + " \"duration_ms\": dur_ms,\n", + " \"prompt_tokens_approx\": p_tokens,\n", + " \"completion_tokens_approx\": c_tokens,\n", + " \"total_tokens_approx\": p_tokens + c_tokens,\n", + " }})\n", + " except Exception:\n", + " pass\n", + "\n", + " if err:\n", + " return JSONResponse(status_code=500, content={{\"error\": err, \"type\":\"generation_error\"}})\n", + "\n", + " usage = {{\n", + " \"prompt_tokens\": p_tokens,\n", + " \"completion_tokens\": c_tokens,\n", + " \"total_tokens\": p_tokens + c_tokens,\n", + " }}\n", + " return {{\n", + " \"id\": rid,\n", + " \"object\": \"chat.completion\",\n", + " \"created\": created,\n", + " \"model\": model_name,\n", + " \"choices\": [\n", + " {{\n", + " \"index\": 0,\n", + " \"message\": {{\"role\":\"assistant\",\"content\": out_text}},\n", + " \"finish_reason\": \"stop\"\n", + " }}\n", + " ],\n", + " \"usage\": usage\n", + " }}\n", + "\n", + "serve.run(OpenAICompatLlama.bind(), route_prefix=SERVE_ROUTE)\n", + "print(\"READY\", flush=True)\n", + "\"\"\").strip()\n", + "\n", + "payload = base64.b64encode(serve_py.encode()).decode()\n", + "entrypoint = 'python -c \"import base64,sys;exec(base64.b64decode(\\'{}\\').decode())\"'.format(payload)\n", + "\n", + "job = requests.post(\n", + " f\"{DASH_URL}/api/jobs/\",\n", + " json={\n", + " \"entrypoint\": entrypoint,\n", + " \"runtime_env\": runtime_env,\n", + " \"metadata\": {\"job_name\": \"serve-qwen2_5-llama_cpp-openai\"},\n", + " },\n", + " timeout=45\n", + ").json()\n", + "\n", + "print(\"Job:\", job.get(\"job_id\"))\n", + "print(\"Health:\", f\"http://{HEAD}:{SERVE_PORT}{SERVE_ROUTE}/healthz\")\n", + "print(\"Chat: \", f\"http://{HEAD}:{SERVE_PORT}{SERVE_ROUTE}/chat/completions\")" + ] + }, + { + "cell_type": "markdown", + "id": "a411c015-c802-4ca1-81bb-3f4790d9626a", + "metadata": {}, + "source": [ + "### Cell 4 - Basic client + latency test\n", + "\n", + "Calls /v1/healthz and then sends an OpenAI-style chat request to /v1/chat/completions with a short prompt. Prints latency and token usage, returning the assistant text." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3be634e2-a82f-42c9-8e31-57e6868a86ee", + "metadata": {}, + "outputs": [], + "source": [ + "import os, time, requests, json\n", + "\n", + "HEAD = os.environ.get(\"RAY_HEAD_SVC\", \"ai-starter-kit-kuberay-head-svc\")\n", + "SERVE_PORT = 8000\n", + "BASE_URL = f\"http://{HEAD}:{SERVE_PORT}/v1\"\n", + "\n", + "def health():\n", + " r = requests.get(f\"{BASE_URL}/healthz\", timeout=10)\n", + " print(\"Health:\", r.status_code, r.json())\n", + "\n", + "def chat(prompt, temperature=0.4, max_tokens=220, stop=None):\n", + " body = {\n", + " \"model\": \"qwen2.5-1.5b-instruct-gguf\",\n", + " \"temperature\": float(temperature),\n", + " \"max_tokens\": int(max_tokens),\n", + " \"messages\": [\n", + " {\"role\": \"system\", \"content\": \"You are Qwen2.5 Instruct running on a tiny CPU host. Be concise, complete sentences.\"},\n", + " {\"role\": \"user\", \"content\": prompt},\n", + " ],\n", + " }\n", + " if stop:\n", + " body[\"stop\"] = stop\n", + "\n", + " t0 = time.time()\n", + " r = requests.post(f\"{BASE_URL}/chat/completions\", json=body, timeout=300)\n", + " dt = time.time() - t0\n", + " r.raise_for_status()\n", + " out = r.json()[\"choices\"][0][\"message\"][\"content\"]\n", + " usage = r.json().get(\"usage\", {})\n", + " print(f\"\\nLatency: {dt:.2f}s | usage: {usage}\")\n", + " print(\"\\n---\\n\", out)\n", + " return out\n", + "\n", + "health()\n", + "_ = chat(\"Say 'test ok' then give me one short fun fact about llamas.\", stop=[\"<|im_end|>\"])" + ] + }, + { + "cell_type": "markdown", + "id": "553d2756-8949-43e3-8342-71387688e0fa", + "metadata": {}, + "source": [ + "### Cell 5 - Multi-agent (Autogen) pipeline\n", + "\n", + "Installs Autogen, configures OpenAIWrapper to hit Ray Serve /v1 endpoint, warms up the model, then runs a simple three-agent workflow (Researcher -> Writer -> Critic) to produce and refine a short report." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f6713f3-8b60-40b2-ad3c-ebf6db4f66e1", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install pyautogen~=0.2.35 \"flaml[automl]\" --disable-pip-version-check\n", + "\n", + "import os, sys\n", + "\n", + "for p in [\n", + " \"/tmp/models-cache/lib/python3.11/site-packages\", \n", + " os.path.expanduser(\"~/.local/lib/python3.11/site-packages\"), \n", + "]:\n", + " if os.path.isdir(p) and p not in sys.path:\n", + " sys.path.insert(0, p)\n", + "\n", + "import os, autogen\n", + "from autogen import AssistantAgent, UserProxyAgent\n", + "\n", + "HEAD = os.environ.get(\"RAY_HEAD_SVC\", \"ai-starter-kit-kuberay-head-svc\")\n", + "SERVE_PORT = 8000\n", + "BASE_URL = f\"http://{HEAD}:{SERVE_PORT}/v1\" \n", + "\n", + "config_list = [\n", + " {\n", + " \"model\": \"qwen2.5-1.5b-instruct-gguf\", \n", + " \"base_url\": BASE_URL, \n", + " \"api_key\": \"local\", \n", + " \"price\": [0.0, 0.0],\n", + " }\n", + "]\n", + "\n", + "llm = autogen.OpenAIWrapper(config_list=config_list)\n", + "try:\n", + " r = llm.create(messages=[{\"role\":\"user\",\"content\":\"Say 'test ok'.\"}], temperature=0.2, max_tokens=16)\n", + " print(\"Warmup:\", r.choices[0].message.content)\n", + "except Exception as e:\n", + " print(\"Warmup failed:\", e)\n", + "\n", + "user_proxy = UserProxyAgent(\n", + " name=\"UserProxy\",\n", + " system_message=\"You are the human admin. Initiate the task.\",\n", + " code_execution_config=False,\n", + " human_input_mode=\"NEVER\",\n", + ")\n", + "\n", + "researcher = AssistantAgent(\n", + " name=\"Researcher\",\n", + " system_message=(\n", + " \"You are a researcher. Gather concise, verified facts on the topic. \"\n", + " \"Return several bullet points with inline source domains (e.g., nature.com, ibm.com). \"\n", + " \"Keep under 100 words total. No made-up sources. \"\n", + " \"Do not include any special end token.\"\n", + " ),\n", + " llm_config={\"config_list\": config_list, \"temperature\": 0.35, \"max_tokens\": 140, \"timeout\": 300},\n", + ")\n", + "\n", + "writer = AssistantAgent(\n", + " name=\"Writer\",\n", + " system_message=(\n", + " \"You are a writer. Using the Researcher’s notes, produce a clear word report under 160 words. \"\n", + " \"Avoid speculation. Keep it structured and readable. \"\n", + " \"Do not include any special end token.\"\n", + " ),\n", + " llm_config={\"config_list\": config_list, \"temperature\": 0.55, \"max_tokens\": 220, \"timeout\": 180},\n", + ")\n", + "\n", + "critic = AssistantAgent(\n", + " name=\"Critic\",\n", + " system_message=(\n", + " \"You are a critic. Review the Writer’s report for accuracy, clarity, and flow.\"\n", + " \"Present the tightened final text and keep it under 140 words. On a new last line output exactly: <|END|>\"\n", + " ),\n", + " llm_config={\"config_list\": config_list, \"temperature\": 0.45, \"max_tokens\": 160, \"timeout\": 300},\n", + ")\n", + "\n", + "def run_sequential(task):\n", + " research_response = researcher.generate_reply(messages=[{\"content\": task, \"role\": \"user\"}])\n", + " research_notes = research_response if isinstance(research_response, str) else research_response.get(\"content\", \"[no output]\")\n", + " print(\"\\nResearch Notes:\\n\", research_notes)\n", + "\n", + " writer_prompt = f\"Using these research notes, write the report:\\n{research_notes}\"\n", + " writer_response = writer.generate_reply(messages=[{\"content\": writer_prompt, \"role\": \"user\"}])\n", + " report = writer_response if isinstance(writer_response, str) else writer_response.get(\"content\", \"[no output]\")\n", + " print(\"\\nDraft Report:\\n\", report)\n", + "\n", + " critic_prompt = f\"Review this report:\\n{report}\"\n", + " critic_response = critic.generate_reply(messages=[{\"content\": critic_prompt, \"role\": \"user\"}])\n", + " final_text = critic_response if isinstance(critic_response, str) else critic_response.get(\"content\", \"[no output]\")\n", + " print(\"\\nFinal Review:\\n\", final_text)\n", + " return final_text\n", + "\n", + "task = \"Research the latest advancements in quantum computing as of 2025. Gather key facts, then write a short report (200–300 words). Have the Critic review and finalize.\"\n", + "final_output = run_sequential(task)" + ] + }, + { + "cell_type": "markdown", + "id": "0af596cf-5ba6-42df-a030-61d7a20d6f7b", + "metadata": {}, + "source": [ + "### Cell 6 - MLFlow: connect to tracking server and list recent chat runs\n", + "\n", + "Installs MLflow, sets the tracking URI and experiment, then queries and prints the latest runs with key params/metrics (temperature, max_tokens, duration) to verify Serve logging." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03a1b042-04df-4cd0-9099-4cc763ecfe9d", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install mlflow==2.14.3 --disable-pip-version-check\n", + "\n", + "import os, mlflow\n", + "from datetime import datetime\n", + "\n", + "tracking_uri = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n", + "mlflow.set_tracking_uri(tracking_uri)\n", + "print(f\"MLflow Tracking URI: {tracking_uri}\")\n", + "\n", + "exp_name = os.getenv(\"MLFLOW_EXPERIMENT_NAME\", \"ray-llama-cpp\")\n", + "exp = mlflow.set_experiment(exp_name)\n", + "print(f\"Experiment: {exp.name} (ID: {exp.experiment_id})\")\n", + "print(\"-\" * 60)\n", + "\n", + "client = mlflow.tracking.MlflowClient()\n", + "runs = client.search_runs(\n", + " exp.experiment_id, \n", + " order_by=[\"attributes.start_time DESC\"], \n", + " max_results=10\n", + ")\n", + "\n", + "if not runs:\n", + " print(\"No runs found. Run cells 4 or 5 first to generate inference requests.\")\n", + "else:\n", + " print(f\"\\nFound {len(runs)} recent runs:\")\n", + " print(\"-\" * 60)\n", + " \n", + " for i, run in enumerate(runs, 1):\n", + " start_time = datetime.fromtimestamp(run.info.start_time/1000).strftime('%Y-%m-%d %H:%M:%S')\n", + " duration = run.data.metrics.get('duration_ms', 'N/A')\n", + " temp = run.data.params.get('temperature', 'N/A')\n", + " max_tokens = run.data.params.get('max_tokens', 'N/A')\n", + " total_tokens = run.data.metrics.get('total_tokens_approx', 'N/A')\n", + " \n", + " print(f\"\\nRun {i}:\")\n", + " print(f\" ID: {run.info.run_id[:12]}...\")\n", + " print(f\" Time: {start_time}\")\n", + " print(f\" Status: {run.info.status}\")\n", + " print(f\" Temperature: {temp}\")\n", + " print(f\" Max Tokens: {max_tokens}\")\n", + " print(f\" Duration: {duration} ms\")\n", + " print(f\" Total Tokens: {total_tokens}\")\n", + " \n", + " print(\"\\n\" + \"=\" * 60)\n", + " print(\"SUMMARY:\")\n", + " successful = sum(1 for r in runs if r.info.status == 'FINISHED')\n", + " durations = [r.data.metrics.get('duration_ms', 0) for r in runs if r.data.metrics.get('duration_ms')]\n", + " avg_duration = sum(durations) / len(durations) if durations else 0\n", + " \n", + " print(f\" Total Runs: {len(runs)}\")\n", + " print(f\" Successful: {successful}\")\n", + " print(f\" Failed: {len(runs) - successful}\")\n", + " print(f\" Avg Duration: {avg_duration:.1f} ms\" if avg_duration else \" Avg Duration: N/A\")\n", + "\n", + "print(\"\\n\" + \"=\" * 60)\n", + "print(\"MLflow verification complete\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ai/ai-starter-kit/notebooks/test_ollama.py b/ai/ai-starter-kit/notebooks/test_ollama.py new file mode 100644 index 000000000..58cf22e05 --- /dev/null +++ b/ai/ai-starter-kit/notebooks/test_ollama.py @@ -0,0 +1,11 @@ +from ollama import Client +client = Client( + host='http://ai-starter-kit-ollama:11434', + headers={'x-some-header': 'some-value'} +) +response = client.chat(model='gemma3', messages=[ + { + 'role': 'user', + 'content': 'Why is the sky blue?', + }, +]) diff --git a/ai/ai-starter-kit/notebooks/test_ray.py b/ai/ai-starter-kit/notebooks/test_ray.py new file mode 100644 index 000000000..b04ef8cd8 --- /dev/null +++ b/ai/ai-starter-kit/notebooks/test_ray.py @@ -0,0 +1,12 @@ +!pip install ray +from ray.job_submission import JobSubmissionClient + +# If using a remote cluster, replace 127.0.0.1 with the head node's IP address or set up port forwarding. +client = JobSubmissionClient() +job_id = client.submit_job( + # Entrypoint shell command to execute + entrypoint="python script.py", + # Path to the local directory that contains the script.py file + runtime_env={"working_dir": "./","excludes": ["/.cache","/.local"]} +) +print(job_id) \ No newline at end of file From d57a1ea1393df3a52c8ebbce327ca10a75476e89 Mon Sep 17 00:00:00 2001 From: Vlado Djerek Date: Thu, 11 Sep 2025 15:48:20 +0200 Subject: [PATCH 02/14] Updated PVCs. added GPU support, added MacOS support --- .gitignore | 5 +- ai/ai-starter-kit/Makefile | 35 +- .../helm-chart/ai-starter-kit/README.md | 291 ++++++++ .../ai-starter-kit/files/download_models.py | 29 +- .../files/multi-agent-ollama.ipynb | 10 +- .../templates/generic-device-plugin.yaml | 65 ++ .../ai-starter-kit/templates/pvc-mc-only.yaml | 28 + .../templates/ramalama-deployment.yaml | 23 +- .../ai-starter-kit/values-addon.yaml | 13 + .../ai-starter-kit/values-gke-gpu.yaml | 219 ++++++ .../ai-starter-kit/values-gke-ollama-gpu.yaml | 44 ++ .../values-gke-ramalama-gpu.yaml | 38 ++ .../helm-chart/ai-starter-kit/values-gke.yaml | 43 +- .../ai-starter-kit/values-macos-gpu.yaml | 98 +++ .../helm-chart/ai-starter-kit/values.yaml | 7 + ai/ai-starter-kit/notebooks/multi-agent.ipynb | 621 ------------------ ai/ai-starter-kit/notebooks/test_ollama.py | 11 - ai/ai-starter-kit/notebooks/test_ray.py | 12 - 18 files changed, 918 insertions(+), 674 deletions(-) create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/generic-device-plugin.yaml create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-mc-only.yaml create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/values-addon.yaml create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-gpu.yaml create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-ollama-gpu.yaml create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-ramalama-gpu.yaml create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/values-macos-gpu.yaml delete mode 100644 ai/ai-starter-kit/notebooks/multi-agent.ipynb delete mode 100644 ai/ai-starter-kit/notebooks/test_ollama.py delete mode 100644 ai/ai-starter-kit/notebooks/test_ray.py diff --git a/.gitignore b/.gitignore index f8e65abf8..0635abd98 100644 --- a/.gitignore +++ b/.gitignore @@ -46,4 +46,7 @@ cscope.* # Helm chart dependecies cache **/Chart.lock -**/charts/*.tgz \ No newline at end of file +**/charts/*.tgz + +# Helm chart output directory +ai/ai-starter-kit/out \ No newline at end of file diff --git a/ai/ai-starter-kit/Makefile b/ai/ai-starter-kit/Makefile index eead31a50..40717cb20 100644 --- a/ai/ai-starter-kit/Makefile +++ b/ai/ai-starter-kit/Makefile @@ -1,21 +1,48 @@ +.PHONY: check_hf_token check_OCI_target package_helm lint dep_update install install_gke start uninstall push_helm + +check_hf_token: +ifndef HF_TOKEN + $(error HF_TOKEN is not set) +endif + +check_OCI_target: +ifndef OCI_HELM_TARGET + $(error OCI_HELM_TARGET is not set) +endif + +package_helm: + helm package helm-chart/ai-starter-kit/ --destination out/ + +push_helm: check_OCI_target + helm push out/ai-starter-kit* oci://$$OCI_HELM_TARGET + lint: helm lint helm-chart/ai-starter-kit dep_update: helm dependency update helm-chart/ai-starter-kit -install: - helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="your_hf_token" --timeout 10m -f helm-chart/ai-starter-kit/values.yaml +install: check_hf_token + helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="$$HF_TOKEN" --timeout 10m -f helm-chart/ai-starter-kit/values.yaml + +install_gke: check_hf_token + helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="$$HF_TOKEN" --timeout 10m -f helm-chart/ai-starter-kit/values-gke.yaml -install_gke: - helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="your_hf_token" --timeout 10m -f helm-chart/ai-starter-kit/values-gke.yaml +install_gke_gpu: check_hf_token + helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="$$HF_TOKEN" --timeout 10m -f helm-chart/ai-starter-kit/values-gke-gpu.yaml start: mkdir -p /tmp/models-cache minikube start --cpus 4 --memory 15000 --mount --mount-string="/tmp/models-cache:/tmp/models-cache" +start_gpu: + mkdir -p $HOME/models-cache + minikube start --driver krunkit --cpus 4 --memory 15000 --mount --mount-string="$HOME/models-cache:$HOME/models-cache" + uninstall: helm uninstall ai-starter-kit + kubectl delete pod jupyter-user + kubectl delete pvc ai-starter-kit-jupyterhub-hub-db-dir destroy: minikube delete diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md new file mode 100644 index 000000000..741d27007 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md @@ -0,0 +1,291 @@ +# AI Starter Kit + +A comprehensive Helm chart for deploying a complete AI/ML development environment on Kubernetes. This starter kit provides a ready-to-use platform with JupyterHub notebooks, model serving capabilities, and experiment tracking - perfect for teams starting their AI journey or prototyping AI applications. + +## Purpose + +The AI Starter Kit simplifies the deployment of AI infrastructure by providing: + +- **JupyterHub**: Multi-user notebook environment with pre-configured AI/ML libraries +- **Model Serving**: Support for both Ollama and Ramalama model servers +- **MLflow**: Experiment tracking and model management +- **GPU Support**: Configurations for GPU acceleration on GKE and macOS +- **Model Caching**: Persistent storage for efficient model management +- **Example Notebooks**: Pre-loaded notebooks to get you started immediately + +## Prerequisites + +### General Requirements +- Kubernetes cluster (minikube, GKE) +- Helm 3.x installed +- kubectl configured to access your cluster +- Hugging Face token for accessing models + +### Platform-Specific Requirements + +#### Minikube (Local Development) +- Docker Desktop or similar container runtime +- Minimum 4 CPU cores and 16GB RAM available +- 40GB+ free disk space + +#### GKE (Google Kubernetes Engine) +- Google Cloud CLI (`gcloud`) installed and configured +- Appropriate GCP permissions to create clusters + +#### macOS with GPU (Apple Silicon) +- macOS with Apple Silicon (M1/M2/M3/M4) +- minikube with krunkit driver +- 16GB+ RAM recommended + +## Installation + +### Quick Start (Minikube) + +1. **Start minikube with persistent storage:** +```bash +minikube start --cpus 4 --memory 15000 \ + --mount --mount-string="/tmp/models-cache:/tmp/models-cache" +``` + +2. **Install the chart:** +```bash +helm install ai-starter-kit . \ + --set huggingface.token="YOUR_HF_TOKEN" \ + -f values.yaml +``` + +3. **Access JupyterHub:** +```bash +kubectl port-forward svc/ai-starter-kit-jupyterhub-proxy-public 8080:80 +``` +Navigate to http://localhost:8080 and login with any username and password `sneakypass` + +### GKE Deployment + +1. **Create a GKE Autopilot cluster:** +```bash +export REGION=us-central1 +export CLUSTER_NAME="ai-starter-cluster" +export PROJECT_ID=$(gcloud config get project) + +gcloud container clusters create-auto ${CLUSTER_NAME} \ + --project=${PROJECT_ID} \ + --region=${REGION} \ + --release-channel=rapid \ + --labels=created-by=ai-on-gke,guide=ai-starter-kit +``` + +2. **Get cluster credentials:** +```bash +gcloud container clusters get-credentials ${CLUSTER_NAME} --location=${REGION} +``` + +3. **Install the chart with GKE-specific values:** +```bash +helm install ai-starter-kit . \ + --set huggingface.token="YOUR_HF_TOKEN" \ + -f values.yaml \ + -f values-gke.yaml +``` + +### GKE with GPU (Ollama) + +For GPU-accelerated model serving with Ollama: + +```bash +helm install ai-starter-kit . \ + --set huggingface.token="YOUR_HF_TOKEN" \ + -f values-gke.yaml \ + -f values-ollama-gpu.yaml +``` + +### GKE with GPU (Ramalama) + +For GPU-accelerated model serving with Ramalama: + +```bash +helm install ai-starter-kit . \ + --set huggingface.token="YOUR_HF_TOKEN" \ + -f values-gke.yaml \ + -f values-ramalama-gpu.yaml +``` + +### macOS with Apple Silicon GPU + +1. **Start minikube with krunkit driver:** +```bash +minikube start --driver krunkit \ + --cpus 8 --memory 16000 --disk-size 40000mb \ + --mount --mount-string="/tmp/models-cache:/tmp/models-cache" +``` + +2. **Install with macOS GPU support:** +```bash +helm install ai-starter-kit . \ + --set huggingface.token="YOUR_HF_TOKEN" \ + -f values.yaml \ + -f values-macos.yaml +``` + +## Configuration + +### Key Configuration Options + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `huggingface.token` | HuggingFace token for models | `"YOUR_HF_TOKEN"` | +| `ollama.enabled` | Enable Ollama model server | `true` | +| `ramalama.enabled` | Enable Ramalama model server | `true` | +| `modelsCachePvc.size` | Size of model cache storage | `10Gi` | +| `jupyterhub.singleuser.defaultUrl` | Default notebook path | `/lab/tree/welcome.ipynb` | +| `mlflow.enabled` | Enable MLflow tracking server | `true` | + +### Storage Configuration + +The chart supports different storage configurations: + +- **Local Development**: Uses hostPath volumes with minikube mount +- **GKE**: Uses standard GKE storage classes (`standard-rwo`, `standard-rwx`) +- **Custom**: Configure via `modelsCachePvc.storageClassName` + +### Model Servers + +#### Ollama +Ollama is enabled by default and provides: +- Easy model management +- REST API for inference +- Support for popular models (Llama, Gemma, Qwen, etc.) +- GPU acceleration support + +#### Ramalama +Ramalama provides: +- Alternative model serving solution +- Support for CUDA and Metal (macOS) acceleration +- Lightweight deployment option + +You can run either Ollama or Ramalama, but not both simultaneously. Toggle using: +```yaml +ollama: + enabled: true/false +ramalama: + enabled: true/false +``` + +## Usage + +### Accessing Services + +#### JupyterHub +```bash +# Port forward to access JupyterHub +kubectl port-forward svc/ai-starter-kit-jupyterhub-proxy-public 8080:80 +# Access at: http://localhost:8080 +# Default password: sneakypass +``` + +#### MLflow +```bash +# Port forward to access MLflow UI +kubectl port-forward svc/ai-starter-kit-mlflow 5000:5000 +# Access at: http://localhost:5000 +``` + +#### Ollama/Ramalama API +```bash +# For Ollama +kubectl port-forward svc/ai-starter-kit-ollama 11434:11434 + +# For Ramalama +kubectl port-forward svc/ai-starter-kit-ramalama 8080:8080 +``` + +### Pre-loaded Example Notebooks + +The JupyterHub environment comes with pre-loaded example notebooks: +- `chat_bot.ipynb`: Simple chatbot interface using Ollama for conversational AI. +- `multi-agent-ollama.ipynb`: Multi-agent workflow demonstration using Ollama. +- `multi-agent-ramalama.ipynb`: Similar multi-agent workflow using RamaLama runtime for comparison. +- `welcome.ipynb`: Introduction notebook with embedding model examples using Qwen models. + +These notebooks are automatically copied to your workspace on first login. + +## Architecture + +The AI Starter Kit consists of: + +1. **JupyterHub**: Multi-user notebook server with persistent storage +2. **Model Serving**: Choice of Ollama or Ramalama for LLM inference +3. **MLflow**: Experiment tracking and model registry +4. **Persistent Storage**: Shared model cache to avoid redundant downloads +5. **Init Containers**: Automated setup of models and notebooks + +## Cleanup + +### Uninstall the chart +```bash +helm uninstall ai-starter-kit +``` + +### Delete persistent volumes (optional) +```bash +kubectl delete pvc ai-starter-kit-models-cache-pvc +kubectl delete pvc ai-starter-kit-jupyterhub-hub-db-dir +``` + +### Delete GKE cluster +```bash +gcloud container clusters delete ${CLUSTER_NAME} --region=${REGION} +``` + +### Stop minikube +```bash +minikube stop +minikube delete # To completely remove the cluster +``` + +## Troubleshooting + +### Common Issues + +#### Pods stuck in Pending state +- Check available resources: `kubectl describe pod ` +- Increase cluster resources or reduce resource requests + +#### Model download failures +- Verify Hugging Face token is set correctly +- Check internet connectivity from pods +- Increase init container timeout in values + +#### GPU not detected +- Verify GPU nodes are available: `kubectl get nodes -o wide` +- Check GPU driver installation +- Ensure correct node selectors and tolerations + +#### Storage issues +- Verify PVC is bound: `kubectl get pvc` +- Check storage class availability: `kubectl get storageclass` +- Ensure sufficient disk space + +### Debug Commands +```bash +# Check pod status +kubectl get pods -n default + +# View pod logs +kubectl logs -f + +# Describe pod for events +kubectl describe pod + +# Check resource usage +kubectl top nodes +kubectl top pods +``` + +## Resources + +- [JupyterHub Documentation](https://jupyterhub.readthedocs.io/) +- [MLflow Documentation](https://mlflow.org/docs/latest/index.html) +- [Ollama Documentation](https://ollama.ai/docs) +- [Kubernetes Documentation](https://kubernetes.io/docs/) +- [Helm Documentation](https://helm.sh/docs/) \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py index aee733734..69529726b 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py @@ -1,17 +1,22 @@ +import sys from huggingface_hub import snapshot_download # --- Model Download --- -# List your desired Hugging Face model names here -model_names = [ - "Qwen/Qwen3-Embedding-0.6B", -] +if __name__ == "__main__": + # List your desired Hugging Face model names here + model_names = [ + "Qwen/Qwen3-Embedding-0.6B", + ] -for model_name in model_names: - print(f"--- Downloading {model_name} ---") - try: - snapshot_download(repo_id=model_name) - print(f"Successfully cached {model_name}") - except Exception as e: - print(f"Failed to download {model_name}. Error: {e}") + for model_name in model_names: + print(f"--- Downloading {model_name} ---") + try: + if len(sys.argv) > 1: + snapshot_download(repo_id=model_name, cache_dir=sys.argv[0]) + else: + snapshot_download(repo_id=model_name) + print(f"Successfully cached {model_name}") + except Exception as e: + print(f"Failed to download {model_name}. Error: {e}") -print("--- Model download process finished. ---") + print("--- Model download process finished. ---") diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb index c8f8040c5..a25cfbe2a 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb @@ -5,9 +5,9 @@ "id": "079fadd2-200e-4d37-8ae2-be2792e3a24e", "metadata": {}, "source": [ - "### Cell 1 - Install RamaLama and verify environment\n", + "### Cell 1 - Install Ollama and verify environment\n", "\n", - "Installs RamaLama for local model serving, sets up environment variables, and verifies the installation." + "Installs Ollama for local model serving, sets up environment variables, and verifies the installation." ] }, { @@ -143,7 +143,7 @@ " mlflow_uri = os.getenv(\"MLFLOW_TRACKING_URI\")\n", " if mlflow_uri:\n", " mlflow.set_tracking_uri(mlflow_uri)\n", - " mlflow.set_experiment(\"ramalama-llm\")\n", + " mlflow.set_experiment(\"ollama-llm\")\n", " USE_MLFLOW = True\n", "except:\n", " pass\n", @@ -399,7 +399,7 @@ " return f\"Error: {e}\"\n", "\n", "print(\"=\" * 60)\n", - "print(\"Running Multi-Agent Workflow with RamaLama/Ollama\")\n", + "print(\"Running Multi-Agent Workflow with Ollama\")\n", "print(\"=\" * 60)\n", "\n", "task = \"Research the latest advancements in quantum computing as of 2025.\"\n", @@ -465,7 +465,7 @@ "mlflow.set_tracking_uri(tracking_uri)\n", "print(f\"MLflow Tracking URI: {tracking_uri}\")\n", "\n", - "exp_name = \"ramalama-llm\"\n", + "exp_name = \"ollama-llm\"\n", "exp = mlflow.set_experiment(exp_name)\n", "print(f\"Experiment: {exp.name} (ID: {exp.experiment_id})\")\n", "print(\"-\" * 60)\n", diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/generic-device-plugin.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/generic-device-plugin.yaml new file mode 100644 index 000000000..3e387f5ce --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/generic-device-plugin.yaml @@ -0,0 +1,65 @@ +{{- if .Values.genericDevicePlugin.enabled }} +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "ai-starter-kit.fullname" . }}-generic-device-plugin + namespace: kube-system + labels: + {{- include "ai-starter-kit.labels" . | nindent 4 }} + app.kubernetes.io/component: generic-device-plugin +spec: + selector: + matchLabels: + {{- include "ai-starter-kit.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: generic-device-plugin + template: + metadata: + labels: + {{- include "ai-starter-kit.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: generic-device-plugin + spec: + priorityClassName: system-node-critical + tolerations: + - operator: "Exists" + effect: "NoExecute" + - operator: "Exists" + effect: "NoSchedule" + containers: + - image: {{ .Values.genericDevicePlugin.image.repository | default "squat/generic-device-plugin" }}:{{ .Values.genericDevicePlugin.image.tag | default "latest" }} + imagePullPolicy: {{ .Values.genericDevicePlugin.image.pullPolicy | default "IfNotPresent" }} + name: generic-device-plugin + args: + - --device + - | + name: dri + groups: + - count: {{ .Values.genericDevicePlugin.device.count | default 4 }} + paths: + - path: /dev/dri + resources: + requests: + cpu: {{ .Values.genericDevicePlugin.resources.requests.cpu | default "50m" }} + memory: {{ .Values.genericDevicePlugin.resources.requests.memory | default "10Mi" }} + limits: + cpu: {{ .Values.genericDevicePlugin.resources.limits.cpu | default "50m" }} + memory: {{ .Values.genericDevicePlugin.resources.limits.memory | default "20Mi" }} + ports: + - containerPort: 8080 + name: http + securityContext: + privileged: true + volumeMounts: + - name: device-plugin + mountPath: /var/lib/kubelet/device-plugins + - name: dev + mountPath: /dev + volumes: + - name: device-plugin + hostPath: + path: /var/lib/kubelet/device-plugins + - name: dev + hostPath: + path: /dev + updateStrategy: + type: RollingUpdate +{{- end }} \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-mc-only.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-mc-only.yaml new file mode 100644 index 000000000..99179f9f1 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-mc-only.yaml @@ -0,0 +1,28 @@ +{{- if .Values.modelsCacheOnlyPvc.enabled -}} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ .Release.Name }}-models-cache-only-pvc + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" +spec: + accessModes: + {{- toYaml .Values.modelsCacheOnlyPvc.accessModes | nindent 4 }} + {{- if .Values.localPersistence.enabled }} + storageClassName: manual + {{- else }} + {{- /* + If storageClassName is set to a specific class, it will be used. + If storageClassName is set to an empty string (""), no storage class will be used for provisioning. + If storageClassName is null or omitted, the default storage class will be used. + */}} + {{- if or .Values.modelsCacheOnlyPvc.storageClassName (eq .Values.modelsCacheOnlyPvc.storageClassName "") }} + storageClassName: {{ .Values.modelsCacheOnlyPvc.storageClassName | quote }} + {{- end }} + {{- end }} + resources: + requests: + storage: {{ .Values.modelsCacheOnlyPvc.size }} +{{- end -}} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/ramalama-deployment.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/ramalama-deployment.yaml index 0d88703d8..5650c35c4 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/ramalama-deployment.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/ramalama-deployment.yaml @@ -1,4 +1,3 @@ ---- {{- if .Values.ramalama.enabled }} apiVersion: apps/v1 kind: Deployment @@ -8,6 +7,7 @@ metadata: {{- include "ai-starter-kit.labels" . | nindent 4 }} app.kubernetes.io/component: ramalama spec: + replicas: 1 selector: matchLabels: {{- include "ai-starter-kit.selectorLabels" . | nindent 6 }} @@ -18,16 +18,25 @@ spec: {{- include "ai-starter-kit.selectorLabels" . | nindent 8 }} app.kubernetes.io/component: ramalama spec: + {{- if .Values.ramalama.nodeSelector }} + nodeSelector: + {{- toYaml .Values.ramalama.nodeSelector | nindent 8 }} + {{- end }} containers: - name: ramalama image: "{{ .Values.ramalama.image.repository }}:{{ .Values.ramalama.image.tag }}" imagePullPolicy: {{ .Values.ramalama.image.pullPolicy }} + {{- if .Values.ramalama.command }} command: {{- toYaml .Values.ramalama.command | nindent 10 }} + {{- end }} ports: - - name: http - containerPort: 8080 - protocol: TCP + - containerPort: 8080 + protocol: TCP + {{- if .Values.ramalama.resources }} + resources: + {{- toYaml .Values.ramalama.resources | nindent 10 }} + {{- end }} --- apiVersion: v1 kind: Service @@ -39,10 +48,10 @@ metadata: spec: type: ClusterIP ports: - - name: http - port: 8080 - targetPort: http + - port: 8080 + targetPort: 8080 protocol: TCP + name: http selector: {{- include "ai-starter-kit.selectorLabels" . | nindent 4 }} app.kubernetes.io/component: ramalama diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-addon.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-addon.yaml new file mode 100644 index 000000000..b01d2d6bb --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-addon.yaml @@ -0,0 +1,13 @@ +ray-cluster: + enabled: false + +ramalama: + enabled: true + command: + - /bin/sh + - -c + - ramalama pull qwen2.5:1.5b && ramalama serve qwen2.5:1.5b --port 8080 + image: + repository: "quay.io/ramalama/ramalama" + tag: "latest" + pullPolicy: IfNotPresent diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-gpu.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-gpu.yaml new file mode 100644 index 000000000..b2d48a624 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-gpu.yaml @@ -0,0 +1,219 @@ +jupyterhub: + singleuser: + initContainers: + - name: chown-home-mount-dir + image: jupyterhub/k8s-singleuser-sample:4.2.0 + securityContext: + runAsUser: 0 + command: ["chown", "jovyan", "/home/jovyan"] + volumeMounts: + - name: home + mountPath: /home/jovyan + subPath: jupyterhub_workspace + + - name: model-initializer + image: jupyterhub/k8s-singleuser-sample:4.2.0 + env: + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: ai-starter-kit-hf-token-secret + key: token + command: + - /bin/sh + - -c + - | + set -e + pip install -r /tmp/requirements.txt + python /tmp/download_models.py "/mnt/jovyan/jupyter_models" + for f in /tmp/*.ipynb; do + if [ -f "$f" ]; then + cp -n "$f" /home/jovyan/ + fi + done + volumeMounts: + - name: home + mountPath: /home/jovyan + subPath: jupyterhub_workspace + - name: init-files + mountPath: /tmp + readOnly: true + resources: + requests: + cpu: "2" + memory: 16Gi + ephemeral-storage: 10Gi + limits: + cpu: "4" + memory: 32Gi + ephemeral-storage: 10Gi + + extraResource: + limits: + ephemeral-storage: '10G' + nvidia.com/gpu: 1 + guarantees: + ephemeral-storage: '10G' + nvidia.com/gpu: 1 + + nodeSelector: + cloud.google.com/gke-accelerator: nvidia-l4 + + image: + name: jupyter/tensorflow-notebook # - gpu optimzied img + tag: "latest" + + storage: + type: static + static: + pvcName: "ai-starter-kit-models-cache-pvc" + subPath: "jupyterhub_workspace" + capacity: 20Gi + homeMountPath: /home/jovyan + extraVolumes: + - name: init-files + configMap: + name: "ai-starter-kit-init-files" + - name: models-cache + persistentVolumeClaim: + claimName: ai-starter-kit-models-cache-only-pvc + extraVolumeMounts: + - mountPath: "/mnt/jovyan" + name: models-cache + + cloudMetadata: + blockWithIptables: false + memory: + limit: 32G + guarantee: 16G + + profileList: + - display_name: "GPU Environment" + description: "Jupyter environment with GPU" + default: true + kubespawner_override: + extra_resource_limits: + nvidia.com/gpu: "1" + extra_resource_guarantees: + nvidia.com/gpu: "1" + node_selector: + cloud.google.com/gke-accelerator: nvidia-l4 + #priority_class_name: "high-priority" + + scheduling: + userScheduler: + enabled: false + +ray-cluster: + enabled: true + image: + tag: 2.41.0-py312-gpu + + head: + nodeSelector: + cloud.google.com/gke-accelerator: nvidia-l4 + resources: + requests: + cpu: "4" + memory: "4G" + ephemeral-storage: 10Gi + nvidia.com/gpu: 1 + limits: + cpu: "8" + memory: "6G" + ephemeral-storage: 10Gi + nvidia.com/gpu: 1 + + worker: + nodeSelector: + cloud.google.com/gke-accelerator: nvidia-l4 + containerEnv: + - name: PYTHONPATH + value: "/mnt/ray-storage/libraries" + - name: TMPDIR + value: "/mnt/ray-storage/temp" + - name: CUDA_VISIBLE_DEVICES + value: "0" + - name: NVIDIA_VISIBLE_DEVICES + value: "all" + resources: + requests: + cpu: "4" + memory: "4G" + ephemeral-storage: 10Gi + nvidia.com/gpu: 1 + limits: + cpu: "8" + memory: "6G" + ephemeral-storage: 10Gi + nvidia.com/gpu: 1 + volumes: + - name: ray-pvc-storage + persistentVolumeClaim: + claimName: "{{ .Release.Name }}-ray-pvc" + volumeMounts: + - name: ray-pvc-storage + mountPath: /mnt/ray-storage + podSecurityContext: + fsGroup: 1000 + +ollama: + enabled: true + ollama: + models: + pull: + - gemma3 + persistentVolume: + enabled: true + existingClaim: "ai-starter-kit-models-cache-only-pvc" + subPath: "ollama" + +ramalama: + enabled: true + nodeSelector: + cloud.google.com/gke-accelerator: nvidia-l4 + command: + - /bin/sh + - -c + - | + echo "Autopilot GPU node - starting GPU server..." + nvidia-smi + echo "Pulling model..." + ramalama pull qwen2.5:1.5b + echo "Starting GPU server..." + ramalama serve qwen2.5:1.5b --port 8080 --device cuda + image: + repository: "quay.io/ramalama/cuda" + tag: "latest" + pullPolicy: IfNotPresent + resources: + requests: + cpu: "2" + memory: "4Gi" + nvidia.com/gpu: 1 + limits: + cpu: "4" + memory: "8Gi" + nvidia.com/gpu: 1 + +rayPvc: + enabled: true + +modelsCachePvc: + storageClassName: "premium-rwo" + accessModes: + - ReadWriteOnce + size: 20Gi + +modelsCacheOnlyPvc: + enabled: true + storageClassName: "standard-rwx" + accessModes: + - ReadWriteMany + size: 100Gi + +localPersistence: + enabled: false + +genericDevicePlugin: + enabled: false diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-ollama-gpu.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-ollama-gpu.yaml new file mode 100644 index 000000000..b600cba99 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-ollama-gpu.yaml @@ -0,0 +1,44 @@ +ramalama: + enabled: false # to avoid running two model servers (ollama + ramalama) at the same time + +ollama: + enabled: true + + nodeSelector: + cloud.google.com/gke-accelerator: nvidia-l4 + + image: + repository: ollama/ollama + tag: "latest" + pullPolicy: IfNotPresent + + resources: + requests: + cpu: "2" + memory: "8Gi" + nvidia.com/gpu: 1 + limits: + cpu: "4" + memory: "16Gi" + nvidia.com/gpu: 1 + + ## if GPU nodes will be tainted + # tolerations: + # - key: "nvidia.com/gpu" + # operator: "Exists" + # effect: "NoSchedule" + + ollama: + models: + pull: + - qwen2.5:1.5b + - gemma3 + + persistentVolume: + enabled: true + existingClaim: "ai-starter-kit-models-cache-pvc" + subPath: "ollama" + + service: + type: ClusterIP + port: 11434 \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-ramalama-gpu.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-ramalama-gpu.yaml new file mode 100644 index 000000000..dcbead050 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-ramalama-gpu.yaml @@ -0,0 +1,38 @@ +ollama: + enabled: false # to avoid running two model servers (ollama + ramalama) at the same time + +ramalama: + enabled: true + + nodeSelector: + cloud.google.com/gke-accelerator: nvidia-l4 + + image: + repository: quay.io/ramalama/cuda + tag: "latest" + pullPolicy: IfNotPresent + + resources: + requests: + cpu: "2" + memory: "4Gi" + nvidia.com/gpu: 1 + limits: + cpu: "4" + memory: "8Gi" + nvidia.com/gpu: 1 + + ## if GPU nodes will be tainted + # tolerations: + # - key: "nvidia.com/gpu" + # operator: "Exists" + # effect: "NoSchedule" + + command: + - /bin/sh + - -c + - | + set -e + echo "GPU info:" && nvidia-smi || true + echo "Pulling model..." && ramalama pull qwen2.5:1.5b + echo "Starting server..." && exec ramalama serve qwen2.5:1.5b --port 8080 --device cuda \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml index 02e4ea619..22849de1d 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml @@ -27,7 +27,7 @@ jupyterhub: set -e pip install -r /tmp/requirements.txt - python /tmp/download_models.py + python /tmp/download_models.py "/mnt/jovyan/jupyter_models" # populate workspace with initial notebook files for f in /tmp/*.ipynb; do @@ -42,6 +42,8 @@ jupyterhub: - name: home mountPath: /home/jovyan subPath: jupyterhub_workspace + - name: models-cache + mountPath: /mnt/jovyan - name: init-files mountPath: /tmp readOnly: true @@ -54,6 +56,23 @@ jupyterhub: cpu: "4" memory: 32Gi ephemeral-storage: 10Gi + storage: + type: static + static: + pvcName: "ai-starter-kit-models-cache-pvc" + subPath: "jupyterhub_workspace" + capacity: 20Gi + homeMountPath: /home/jovyan + extraVolumes: + - name: init-files + configMap: + name: "ai-starter-kit-init-files" + - name: models-cache + persistentVolumeClaim: + claimName: ai-starter-kit-models-cache-only-pvc + extraVolumeMounts: + - mountPath: "/mnt/jovyan" + name: models-cache cloudMetadata: # Without this disabled, the GKE Autopilot Warden will raise an error about container with escalated privilieges blockWithIptables: false @@ -109,13 +128,35 @@ ray-cluster: podSecurityContext: fsGroup: 1000 +ollama: + enabled: true + ollama: + models: + pull: + - gemma3 + persistentVolume: + enabled: true + existingClaim: "ai-starter-kit-models-cache-only-pvc" + subPath: "ollama" + rayPvc: enabled: true modelsCachePvc: + storageClassName: "premium-rwo" + accessModes: + - ReadWriteOnce + size: 20Gi + +modelsCacheOnlyPvc: + enabled: true storageClassName: "standard-rwx" accessModes: - ReadWriteMany + size: 100Gi localPersistence: enabled: false + +genericDevicePlugin: + enabled: false diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-macos-gpu.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-macos-gpu.yaml new file mode 100644 index 000000000..dfa19f41e --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-macos-gpu.yaml @@ -0,0 +1,98 @@ +genericDevicePlugin: + enabled: true + image: + repository: "squat/generic-device-plugin" + tag: "latest" + pullPolicy: "IfNotPresent" + device: + count: 6 + resources: + requests: + cpu: "100m" + memory: "50Mi" + limits: + cpu: "150m" + memory: "100Mi" + +jupyterhub: + singleuser: + image: + name: jupyterhub/k8s-singleuser-sample + tag: "4.2.0" + storage: + type: static + static: + pvcName: "ai-starter-kit-models-cache-pvc" + subPath: "jupyterhub_workspace" + capacity: 40Gi + homeMountPath: /home/jovyan + extraVolumes: + - name: init-files + configMap: + name: "ai-starter-kit-init-files" + - name: dev-dri + hostPath: + path: /dev/dri + extraVolumeMounts: + - name: dev-dri + mountPath: /dev/dri + + +ollama: + enabled: true + ollama: + models: + pull: + - gemma3 + - llama3.2 + resources: + requests: + cpu: 1 + memory: 4Gi + squat.ai/dri: "1" + limits: + cpu: 4 + memory: 8Gi + squat.ai/dri: "1" + + persistentVolume: + enabled: true + existingClaim: "ai-starter-kit-models-cache-pvc" + subPath: "ollama" + +ramalama: + enabled: true + command: + - /bin/sh + - -c + - ramalama pull qwen2.5:1.5b && ramalama serve qwen2.5:1.5b --port 8080 --device metal + + resources: + requests: + cpu: 1 + memory: 2Gi + squat.ai/dri: "1" + limits: + cpu: 4 + memory: 8Gi + squat.ai/dri: "1" + + extraVolumeMounts: + - name: dev-dri + mountPath: /dev/dri + + extraVolumes: + - name: dev-dri + hostPath: + path: /dev/dri + +modelsCachePvc: + enabled: true + storageClassName: "standard" + accessModes: + - ReadWriteOnce + size: 40Gi + +localPersistence: + enabled: true + hostPath: "/home/models-cache" \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml index c770d519f..3155ea642 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml @@ -102,6 +102,7 @@ jupyterhub: enabled: false ray-cluster: + enabled: false image: tag: "2.41.0-py312-cpu-aarch64" head: @@ -158,6 +159,9 @@ modelsCachePvc: - ReadWriteOnce size: 10Gi +modelsCacheOnlyPvc: + enabled: false + localPersistence: # For local development with minikube, this allows persisting the models-cache # on the host machine, surviving `minikube stop/start`. @@ -190,3 +194,6 @@ ramalama: repository: "quay.io/ramalama/ramalama" tag: "latest" pullPolicy: IfNotPresent + +genericDevicePlugin: + enabled: false \ No newline at end of file diff --git a/ai/ai-starter-kit/notebooks/multi-agent.ipynb b/ai/ai-starter-kit/notebooks/multi-agent.ipynb deleted file mode 100644 index ea2f3caa1..000000000 --- a/ai/ai-starter-kit/notebooks/multi-agent.ipynb +++ /dev/null @@ -1,621 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "079fadd2-200e-4d37-8ae2-be2792e3a24e", - "metadata": {}, - "source": [ - "### Cell 1 - Initialize Ray endpoints and verify dashboard\n", - "\n", - "Installs requests, derives the Ray head host from RAY_ADDRESS, builds Dashboard/Serve/MLflow URLs, reads an Hugging Face token, and prints the endpoints plus the Jobs API version for a quick health check." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "79db57cd-fb72-4b10-b0fb-5e9cd5c007b6", - "metadata": {}, - "outputs": [], - "source": [ - "!pip -q install requests==2.* --disable-pip-version-check\n", - "\n", - "import os, textwrap, base64, time, json, requests\n", - "from string import Template\n", - "\n", - "raw_addr = os.getenv(\"RAY_ADDRESS\", \"ray://ai-starter-kit-kuberay-head-svc:10001\")\n", - "if raw_addr.startswith(\"ray://\"):\n", - " HEAD_HOST = raw_addr.split(\"://\", 1)[1].split(\":\", 1)[0]\n", - "else:\n", - " HEAD_HOST = raw_addr.split(\":\", 1)[0] or \"ai-starter-kit-kuberay-head-svc\"\n", - "\n", - "DASH_URL = f\"http://{HEAD_HOST}:8265\"\n", - "SERVE_PORT = int(os.getenv(\"SERVE_PORT\", \"8000\"))\n", - "SERVE_ROUTE = \"/v1\"\n", - "\n", - "HF_TOKEN_PATH = \"/etc/secrets/huggingface/token\"\n", - "HF_TOKEN = \"\"\n", - "if os.path.exists(HF_TOKEN_PATH):\n", - " try:\n", - " HF_TOKEN = open(HF_TOKEN_PATH).read().strip()\n", - " except Exception:\n", - " HF_TOKEN = \"\"\n", - "\n", - "print(\"Head host:\", HEAD_HOST)\n", - "print(\"Jobs API :\", f\"{DASH_URL}/api/jobs/\")\n", - "print(\"Serve URL:\", f\"http://{HEAD_HOST}:{SERVE_PORT}{SERVE_ROUTE}\")\n", - "print(\"MLflow :\", os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\"))\n", - "\n", - "print(\"Jobs API version:\", requests.get(f\"{DASH_URL}/api/version\", timeout=10).json())\n" - ] - }, - { - "cell_type": "markdown", - "id": "fe862173-fd9a-41ae-a27b-63875f788024", - "metadata": {}, - "source": [ - "### Cell 2 - Deploy a minimal Ray Serve smoke test and verify readiness\n", - "\n", - "Submits a tiny FastAPI app to Ray Serve (one /healthz endpoint under /smoke) as a Ray Job, installing FastAPI on the fly. It polls the Jobs API for status and hits :8000/smoke/healthz up to 60 seconds, printing when the service responds 200 (i.e., smoke test passes)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "34da3e26-6276-48b7-b3ac-c90359df6547", - "metadata": {}, - "outputs": [], - "source": [ - "import os, base64, textwrap, time, requests\n", - "\n", - "DASH_URL = \"http://ai-starter-kit-kuberay-head-svc:8265\"\n", - "\n", - "print(\"Jobs API:\", requests.get(f\"{DASH_URL}/api/version\", timeout=10).json())\n", - "\n", - "serve_py = textwrap.dedent(\"\"\"\n", - " from fastapi import FastAPI\n", - " from ray import serve\n", - " serve.start(detached=True, http_options={\"host\":\"0.0.0.0\",\"port\":8000})\n", - " app = FastAPI()\n", - "\n", - " @serve.deployment(name=\"smoke\", num_replicas=1)\n", - " @serve.ingress(app)\n", - " class Smoke:\n", - " @app.get(\"/healthz\")\n", - " async def health(self): return {\"ok\": True}\n", - "\n", - " serve.run(Smoke.bind(), route_prefix=\"/smoke\")\n", - " print(\"READY: smoke\", flush=True)\n", - "\"\"\").strip()\n", - "\n", - "b64 = base64.b64encode(serve_py.encode()).decode()\n", - "entry = f'python -c \"import base64; exec(base64.b64decode(\\'{b64}\\'))\"'\n", - "submit = requests.post(f\"{DASH_URL}/api/jobs/\", json={\"entrypoint\": entry, \"runtime_env\": {\"pip\": [\"fastapi>=0.110\"]}}, timeout=60).json()\n", - "job_id = submit[\"job_id\"]\n", - "print(\"Job:\", job_id)\n", - "\n", - "svc = \"http://ai-starter-kit-kuberay-head-svc:8000/smoke/healthz\"\n", - "for i in range(60):\n", - " s = requests.get(f\"{DASH_URL}/api/jobs/{job_id}\", timeout=10).json()[\"status\"]\n", - " try:\n", - " r = requests.get(svc, timeout=2)\n", - " print(f\"tick {i:02d}: job={s}, health={r.status_code}\")\n", - " if r.status_code == 200:\n", - " print(\"Smoke OK\")\n", - " break\n", - " except Exception as e:\n", - " print(f\"tick {i:02d}: job={s}, health=ERR {e}\")\n", - " time.sleep(1)" - ] - }, - { - "cell_type": "markdown", - "id": "8111d705-595e-4e65-8479-bdc76191fa31", - "metadata": {}, - "source": [ - "### Cell 3 - Deploy model on Ray Serve with llama-cpp\n", - "\n", - "Packages and submits a Ray Job that spins up a Ray Serve app exposing /v1/healthz and /v1/chat/completions. It downloads the preferred GGUF from Hugging Face, initializes llama-cpp-python, logs to MLflow, and prints the deployed health/chat URLs." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bbea1539-e9ab-460a-9cfc-20a42807f616", - "metadata": {}, - "outputs": [], - "source": [ - "import os, base64, textwrap, requests\n", - "\n", - "HEAD = os.environ.get(\"RAY_HEAD_SVC\", \"ai-starter-kit-kuberay-head-svc\")\n", - "DASH_URL = f\"http://{HEAD}:8265\"\n", - "SERVE_PORT = 8000\n", - "SERVE_ROUTE = \"/v1\"\n", - "\n", - "runtime_env = {\n", - " \"pip\": [\n", - " \"fastapi==0.110.0\",\n", - " \"uvicorn==0.23.2\",\n", - " \"huggingface_hub==0.25.2\",\n", - " \"llama-cpp-python==0.3.16\", \n", - " \"hf_transfer==0.1.6\",\n", - " \"mlflow==2.14.3\", \n", - " ],\n", - " \"env_vars\": {\n", - " \"HF_HUB_ENABLE_HF_TRANSFER\": \"1\",\n", - " \"HUGGINGFACE_HUB_TOKEN\": os.environ.get(\"HUGGINGFACE_HUB_TOKEN\", \"\"),\n", - " \"SERVE_PORT\": str(SERVE_PORT),\n", - "\n", - " \"MODEL_REPO\": \"Qwen/Qwen2.5-1.5B-Instruct-GGUF\",\n", - " \"GGUF_PREF_ORDER\": \"q4_k_m,q4_0,q3_k_m,q2_k\",\n", - "\n", - " \"LLM_CONTEXT\": os.environ.get(\"LLM_CONTEXT\", \"1024\"),\n", - " \"LLM_MAX_TOKENS\": os.environ.get(\"LLM_MAX_TOKENS\", \"256\"),\n", - " \"SERVER_MAX_NEW_TOKENS\": os.environ.get(\"SERVER_MAX_NEW_TOKENS\", \"512\"),\n", - "\n", - " \"LLM_THREADS\": os.environ.get(\"LLM_THREADS\", \"6\"),\n", - " \"OMP_NUM_THREADS\": os.environ.get(\"OMP_NUM_THREADS\", \"6\"),\n", - " \"GPU_LAYERS\": \"0\", \n", - " \n", - " \"PIP_PREFER_BINARY\": \"1\",\n", - " \"CMAKE_ARGS\": \"-DGGML_OPENMP=OFF -DLLAMA_NATIVE=OFF\",\n", - "\n", - " \"HF_HOME\": \"/tmp/hf-cache\",\n", - " \"TRANSFORMERS_CACHE\": \"/tmp/hf-cache\",\n", - "\n", - " \"MLFLOW_TRACKING_URI\": os.environ.get(\"MLFLOW_TRACKING_URI\", \"\"),\n", - " \"MLFLOW_EXPERIMENT_NAME\": os.environ.get(\"MLFLOW_EXPERIMENT_NAME\", \"ray-llama-cpp\"),\n", - " },\n", - "}\n", - "\n", - "serve_py = textwrap.dedent(f\"\"\"\n", - "import os, time, multiprocessing, uuid\n", - "from typing import List, Dict, Any\n", - "from fastapi import FastAPI, Request\n", - "from fastapi.responses import JSONResponse\n", - "from huggingface_hub import HfApi, hf_hub_download\n", - "from ray import serve\n", - "from llama_cpp import Llama\n", - "\n", - "USE_MLFLOW = False\n", - "try:\n", - " import mlflow\n", - " if os.getenv(\"MLFLOW_TRACKING_URI\"):\n", - " mlflow.set_tracking_uri(os.getenv(\"MLFLOW_TRACKING_URI\"))\n", - " mlflow.set_experiment(os.getenv(\"MLFLOW_EXPERIMENT_NAME\",\"ray-llama-cpp\"))\n", - " USE_MLFLOW = True\n", - "except Exception as _e:\n", - " USE_MLFLOW = False\n", - "\n", - "SERVE_PORT = int(os.getenv(\"SERVE_PORT\", \"{SERVE_PORT}\"))\n", - "SERVE_ROUTE = \"{SERVE_ROUTE}\"\n", - "MODEL_REPO = os.getenv(\"MODEL_REPO\", \"Qwen/Qwen2.5-1.5B-Instruct-GGUF\")\n", - "GGUF_PREFS = [s.strip() for s in os.getenv(\"GGUF_PREF_ORDER\",\"q4_k_m,q4_0,q3_k_m,q2_k\").split(\",\") if s.strip()]\n", - "CTX_LEN = int(os.getenv(\"LLM_CONTEXT\", \"2048\"))\n", - "MAX_TOKENS = int(os.getenv(\"LLM_MAX_TOKENS\", \"256\"))\n", - "HF_TOKEN = os.getenv(\"HUGGINGFACE_HUB_TOKEN\") or None\n", - "\n", - "serve.start(detached=True, http_options={{\"host\":\"0.0.0.0\", \"port\":SERVE_PORT}})\n", - "app = FastAPI()\n", - "\n", - "def pick_one_file(repo_id: str, prefs):\n", - " api = HfApi()\n", - " files = api.list_repo_files(repo_id=repo_id, repo_type=\"model\", token=HF_TOKEN)\n", - " ggufs = [f for f in files if f.lower().endswith(\".gguf\")]\n", - " if not ggufs:\n", - " raise RuntimeError(f\"No .gguf files visible in {{repo_id}}\")\n", - " for pref in prefs:\n", - " for f in ggufs:\n", - " if pref.lower() in f.lower():\n", - " return f\n", - " return ggufs[0]\n", - "\n", - "def pick_chat_format(repo: str, fname: str) -> str:\n", - " return \"qwen\"\n", - "\n", - "@serve.deployment(name=\"qwen\", num_replicas=1, ray_actor_options={{\"num_cpus\": 6}})\n", - "@serve.ingress(app)\n", - "class OpenAICompatLlama:\n", - " def __init__(self, repo_id: str = MODEL_REPO):\n", - " target = pick_one_file(repo_id, GGUF_PREFS)\n", - " print(f\"[env] model repo: {{repo_id}} file: {{target}}\", flush=True)\n", - " local_dir = \"/tmp/hf-gguf\"; os.makedirs(local_dir, exist_ok=True)\n", - "\n", - " gguf_path = hf_hub_download(\n", - " repo_id=repo_id, filename=target, token=HF_TOKEN,\n", - " local_dir=local_dir, local_dir_use_symlinks=False,\n", - " force_download=False, resume_download=True\n", - " )\n", - " print(f\"[download] done: {{gguf_path}}\", flush=True)\n", - "\n", - " n_threads = int(os.getenv(\"LLM_THREADS\", max(2, (multiprocessing.cpu_count() or 4)//2)))\n", - " print(f\"[load] llama-cpp-python | ctx={{CTX_LEN}} threads={{n_threads}} gpu_layers={{int(os.getenv('GPU_LAYERS','0'))}}\", flush=True)\n", - "\n", - " self.model_file = os.path.basename(gguf_path)\n", - " self.model_repo = repo_id\n", - " chat_format = pick_chat_format(self.model_repo, self.model_file)\n", - " print(f\"[load] chat_format={{chat_format}}\", flush=True)\n", - "\n", - " self.llm = Llama(\n", - " model_path=gguf_path,\n", - " n_ctx=CTX_LEN,\n", - " n_threads=n_threads,\n", - " n_batch=256, \n", - " n_gpu_layers=int(os.getenv(\"GPU_LAYERS\",\"0\")),\n", - " chat_format=chat_format,\n", - " verbose=False\n", - " )\n", - " print(\"[ready] model loaded\", flush=True)\n", - "\n", - " @app.get(\"/healthz\")\n", - " async def health(self):\n", - " return {{\"status\":\"ok\"}}\n", - "\n", - " @app.post(\"/chat/completions\")\n", - " async def chat_completions(self, request: Request):\n", - " t0 = time.time()\n", - " body = await request.json()\n", - "\n", - " messages = body.get(\"messages\", [])\n", - " temperature = float(body.get(\"temperature\", 0.2))\n", - " req_max = body.get(\"max_tokens\", None)\n", - " stop_words = (body.get(\"stop\", []) or []) + [\"<|im_end|>\", \"\"]\n", - "\n", - " SERVER_MAX = int(os.getenv(\"SERVER_MAX_NEW_TOKENS\", \"512\"))\n", - " max_tokens = int(req_max if isinstance(req_max, int) else MAX_TOKENS)\n", - " max_tokens = max(32, min(max_tokens, CTX_LEN - 128, SERVER_MAX))\n", - "\n", - " rid = \"chatcmpl-\" + uuid.uuid4().hex[:24]\n", - " created = int(time.time())\n", - " model_name = f\"{{self.model_repo}}/{{self.model_file}}\"\n", - "\n", - " try:\n", - " result = self.llm.create_chat_completion(\n", - " messages=messages,\n", - " temperature=temperature,\n", - " max_tokens=max_tokens,\n", - " top_k=50,\n", - " top_p=0.9,\n", - " repeat_penalty=1.1,\n", - " stop=stop_words,\n", - " )\n", - " out_text = (result[\"choices\"][0][\"message\"][\"content\"] or \"\").strip()\n", - " usage_raw = result.get(\"usage\") or {{}}\n", - " p_tokens = int(usage_raw.get(\"prompt_tokens\") or 0)\n", - " c_tokens = int(usage_raw.get(\"completion_tokens\") or 0)\n", - " err = None\n", - " except Exception as e:\n", - " out_text = \"\"\n", - " p_tokens = c_tokens = 0\n", - " err = str(e)\n", - "\n", - " if USE_MLFLOW:\n", - " try:\n", - " dur_ms = int((time.time()-t0) * 1000)\n", - " with mlflow.start_run(run_name=\"chat\"):\n", - " mlflow.set_tags({{\n", - " \"model_repo\": self.model_repo,\n", - " \"model_file\": self.model_file,\n", - " \"framework\": \"llama-cpp-python\",\n", - " }})\n", - " mlflow.log_params({{\n", - " \"temperature\": temperature,\n", - " \"max_tokens\": max_tokens,\n", - " \"ctx\": CTX_LEN,\n", - " }})\n", - " if not (p_tokens and c_tokens):\n", - " p_tokens = p_tokens or max(1, len(\" \".join(m.get(\"content\",\"\") for m in messages).split()))\n", - " c_tokens = c_tokens or max(0, len(out_text.split()))\n", - " mlflow.log_metrics({{\n", - " \"duration_ms\": dur_ms,\n", - " \"prompt_tokens_approx\": p_tokens,\n", - " \"completion_tokens_approx\": c_tokens,\n", - " \"total_tokens_approx\": p_tokens + c_tokens,\n", - " }})\n", - " except Exception:\n", - " pass\n", - "\n", - " if err:\n", - " return JSONResponse(status_code=500, content={{\"error\": err, \"type\":\"generation_error\"}})\n", - "\n", - " usage = {{\n", - " \"prompt_tokens\": p_tokens,\n", - " \"completion_tokens\": c_tokens,\n", - " \"total_tokens\": p_tokens + c_tokens,\n", - " }}\n", - " return {{\n", - " \"id\": rid,\n", - " \"object\": \"chat.completion\",\n", - " \"created\": created,\n", - " \"model\": model_name,\n", - " \"choices\": [\n", - " {{\n", - " \"index\": 0,\n", - " \"message\": {{\"role\":\"assistant\",\"content\": out_text}},\n", - " \"finish_reason\": \"stop\"\n", - " }}\n", - " ],\n", - " \"usage\": usage\n", - " }}\n", - "\n", - "serve.run(OpenAICompatLlama.bind(), route_prefix=SERVE_ROUTE)\n", - "print(\"READY\", flush=True)\n", - "\"\"\").strip()\n", - "\n", - "payload = base64.b64encode(serve_py.encode()).decode()\n", - "entrypoint = 'python -c \"import base64,sys;exec(base64.b64decode(\\'{}\\').decode())\"'.format(payload)\n", - "\n", - "job = requests.post(\n", - " f\"{DASH_URL}/api/jobs/\",\n", - " json={\n", - " \"entrypoint\": entrypoint,\n", - " \"runtime_env\": runtime_env,\n", - " \"metadata\": {\"job_name\": \"serve-qwen2_5-llama_cpp-openai\"},\n", - " },\n", - " timeout=45\n", - ").json()\n", - "\n", - "print(\"Job:\", job.get(\"job_id\"))\n", - "print(\"Health:\", f\"http://{HEAD}:{SERVE_PORT}{SERVE_ROUTE}/healthz\")\n", - "print(\"Chat: \", f\"http://{HEAD}:{SERVE_PORT}{SERVE_ROUTE}/chat/completions\")" - ] - }, - { - "cell_type": "markdown", - "id": "a411c015-c802-4ca1-81bb-3f4790d9626a", - "metadata": {}, - "source": [ - "### Cell 4 - Basic client + latency test\n", - "\n", - "Calls /v1/healthz and then sends an OpenAI-style chat request to /v1/chat/completions with a short prompt. Prints latency and token usage, returning the assistant text." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3be634e2-a82f-42c9-8e31-57e6868a86ee", - "metadata": {}, - "outputs": [], - "source": [ - "import os, time, requests, json\n", - "\n", - "HEAD = os.environ.get(\"RAY_HEAD_SVC\", \"ai-starter-kit-kuberay-head-svc\")\n", - "SERVE_PORT = 8000\n", - "BASE_URL = f\"http://{HEAD}:{SERVE_PORT}/v1\"\n", - "\n", - "def health():\n", - " r = requests.get(f\"{BASE_URL}/healthz\", timeout=10)\n", - " print(\"Health:\", r.status_code, r.json())\n", - "\n", - "def chat(prompt, temperature=0.4, max_tokens=220, stop=None):\n", - " body = {\n", - " \"model\": \"qwen2.5-1.5b-instruct-gguf\",\n", - " \"temperature\": float(temperature),\n", - " \"max_tokens\": int(max_tokens),\n", - " \"messages\": [\n", - " {\"role\": \"system\", \"content\": \"You are Qwen2.5 Instruct running on a tiny CPU host. Be concise, complete sentences.\"},\n", - " {\"role\": \"user\", \"content\": prompt},\n", - " ],\n", - " }\n", - " if stop:\n", - " body[\"stop\"] = stop\n", - "\n", - " t0 = time.time()\n", - " r = requests.post(f\"{BASE_URL}/chat/completions\", json=body, timeout=300)\n", - " dt = time.time() - t0\n", - " r.raise_for_status()\n", - " out = r.json()[\"choices\"][0][\"message\"][\"content\"]\n", - " usage = r.json().get(\"usage\", {})\n", - " print(f\"\\nLatency: {dt:.2f}s | usage: {usage}\")\n", - " print(\"\\n---\\n\", out)\n", - " return out\n", - "\n", - "health()\n", - "_ = chat(\"Say 'test ok' then give me one short fun fact about llamas.\", stop=[\"<|im_end|>\"])" - ] - }, - { - "cell_type": "markdown", - "id": "553d2756-8949-43e3-8342-71387688e0fa", - "metadata": {}, - "source": [ - "### Cell 5 - Multi-agent (Autogen) pipeline\n", - "\n", - "Installs Autogen, configures OpenAIWrapper to hit Ray Serve /v1 endpoint, warms up the model, then runs a simple three-agent workflow (Researcher -> Writer -> Critic) to produce and refine a short report." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0f6713f3-8b60-40b2-ad3c-ebf6db4f66e1", - "metadata": {}, - "outputs": [], - "source": [ - "!pip -q install pyautogen~=0.2.35 \"flaml[automl]\" --disable-pip-version-check\n", - "\n", - "import os, sys\n", - "\n", - "for p in [\n", - " \"/tmp/models-cache/lib/python3.11/site-packages\", \n", - " os.path.expanduser(\"~/.local/lib/python3.11/site-packages\"), \n", - "]:\n", - " if os.path.isdir(p) and p not in sys.path:\n", - " sys.path.insert(0, p)\n", - "\n", - "import os, autogen\n", - "from autogen import AssistantAgent, UserProxyAgent\n", - "\n", - "HEAD = os.environ.get(\"RAY_HEAD_SVC\", \"ai-starter-kit-kuberay-head-svc\")\n", - "SERVE_PORT = 8000\n", - "BASE_URL = f\"http://{HEAD}:{SERVE_PORT}/v1\" \n", - "\n", - "config_list = [\n", - " {\n", - " \"model\": \"qwen2.5-1.5b-instruct-gguf\", \n", - " \"base_url\": BASE_URL, \n", - " \"api_key\": \"local\", \n", - " \"price\": [0.0, 0.0],\n", - " }\n", - "]\n", - "\n", - "llm = autogen.OpenAIWrapper(config_list=config_list)\n", - "try:\n", - " r = llm.create(messages=[{\"role\":\"user\",\"content\":\"Say 'test ok'.\"}], temperature=0.2, max_tokens=16)\n", - " print(\"Warmup:\", r.choices[0].message.content)\n", - "except Exception as e:\n", - " print(\"Warmup failed:\", e)\n", - "\n", - "user_proxy = UserProxyAgent(\n", - " name=\"UserProxy\",\n", - " system_message=\"You are the human admin. Initiate the task.\",\n", - " code_execution_config=False,\n", - " human_input_mode=\"NEVER\",\n", - ")\n", - "\n", - "researcher = AssistantAgent(\n", - " name=\"Researcher\",\n", - " system_message=(\n", - " \"You are a researcher. Gather concise, verified facts on the topic. \"\n", - " \"Return several bullet points with inline source domains (e.g., nature.com, ibm.com). \"\n", - " \"Keep under 100 words total. No made-up sources. \"\n", - " \"Do not include any special end token.\"\n", - " ),\n", - " llm_config={\"config_list\": config_list, \"temperature\": 0.35, \"max_tokens\": 140, \"timeout\": 300},\n", - ")\n", - "\n", - "writer = AssistantAgent(\n", - " name=\"Writer\",\n", - " system_message=(\n", - " \"You are a writer. Using the Researcher’s notes, produce a clear word report under 160 words. \"\n", - " \"Avoid speculation. Keep it structured and readable. \"\n", - " \"Do not include any special end token.\"\n", - " ),\n", - " llm_config={\"config_list\": config_list, \"temperature\": 0.55, \"max_tokens\": 220, \"timeout\": 180},\n", - ")\n", - "\n", - "critic = AssistantAgent(\n", - " name=\"Critic\",\n", - " system_message=(\n", - " \"You are a critic. Review the Writer’s report for accuracy, clarity, and flow.\"\n", - " \"Present the tightened final text and keep it under 140 words. On a new last line output exactly: <|END|>\"\n", - " ),\n", - " llm_config={\"config_list\": config_list, \"temperature\": 0.45, \"max_tokens\": 160, \"timeout\": 300},\n", - ")\n", - "\n", - "def run_sequential(task):\n", - " research_response = researcher.generate_reply(messages=[{\"content\": task, \"role\": \"user\"}])\n", - " research_notes = research_response if isinstance(research_response, str) else research_response.get(\"content\", \"[no output]\")\n", - " print(\"\\nResearch Notes:\\n\", research_notes)\n", - "\n", - " writer_prompt = f\"Using these research notes, write the report:\\n{research_notes}\"\n", - " writer_response = writer.generate_reply(messages=[{\"content\": writer_prompt, \"role\": \"user\"}])\n", - " report = writer_response if isinstance(writer_response, str) else writer_response.get(\"content\", \"[no output]\")\n", - " print(\"\\nDraft Report:\\n\", report)\n", - "\n", - " critic_prompt = f\"Review this report:\\n{report}\"\n", - " critic_response = critic.generate_reply(messages=[{\"content\": critic_prompt, \"role\": \"user\"}])\n", - " final_text = critic_response if isinstance(critic_response, str) else critic_response.get(\"content\", \"[no output]\")\n", - " print(\"\\nFinal Review:\\n\", final_text)\n", - " return final_text\n", - "\n", - "task = \"Research the latest advancements in quantum computing as of 2025. Gather key facts, then write a short report (200–300 words). Have the Critic review and finalize.\"\n", - "final_output = run_sequential(task)" - ] - }, - { - "cell_type": "markdown", - "id": "0af596cf-5ba6-42df-a030-61d7a20d6f7b", - "metadata": {}, - "source": [ - "### Cell 6 - MLFlow: connect to tracking server and list recent chat runs\n", - "\n", - "Installs MLflow, sets the tracking URI and experiment, then queries and prints the latest runs with key params/metrics (temperature, max_tokens, duration) to verify Serve logging." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "03a1b042-04df-4cd0-9099-4cc763ecfe9d", - "metadata": {}, - "outputs": [], - "source": [ - "!pip -q install mlflow==2.14.3 --disable-pip-version-check\n", - "\n", - "import os, mlflow\n", - "from datetime import datetime\n", - "\n", - "tracking_uri = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n", - "mlflow.set_tracking_uri(tracking_uri)\n", - "print(f\"MLflow Tracking URI: {tracking_uri}\")\n", - "\n", - "exp_name = os.getenv(\"MLFLOW_EXPERIMENT_NAME\", \"ray-llama-cpp\")\n", - "exp = mlflow.set_experiment(exp_name)\n", - "print(f\"Experiment: {exp.name} (ID: {exp.experiment_id})\")\n", - "print(\"-\" * 60)\n", - "\n", - "client = mlflow.tracking.MlflowClient()\n", - "runs = client.search_runs(\n", - " exp.experiment_id, \n", - " order_by=[\"attributes.start_time DESC\"], \n", - " max_results=10\n", - ")\n", - "\n", - "if not runs:\n", - " print(\"No runs found. Run cells 4 or 5 first to generate inference requests.\")\n", - "else:\n", - " print(f\"\\nFound {len(runs)} recent runs:\")\n", - " print(\"-\" * 60)\n", - " \n", - " for i, run in enumerate(runs, 1):\n", - " start_time = datetime.fromtimestamp(run.info.start_time/1000).strftime('%Y-%m-%d %H:%M:%S')\n", - " duration = run.data.metrics.get('duration_ms', 'N/A')\n", - " temp = run.data.params.get('temperature', 'N/A')\n", - " max_tokens = run.data.params.get('max_tokens', 'N/A')\n", - " total_tokens = run.data.metrics.get('total_tokens_approx', 'N/A')\n", - " \n", - " print(f\"\\nRun {i}:\")\n", - " print(f\" ID: {run.info.run_id[:12]}...\")\n", - " print(f\" Time: {start_time}\")\n", - " print(f\" Status: {run.info.status}\")\n", - " print(f\" Temperature: {temp}\")\n", - " print(f\" Max Tokens: {max_tokens}\")\n", - " print(f\" Duration: {duration} ms\")\n", - " print(f\" Total Tokens: {total_tokens}\")\n", - " \n", - " print(\"\\n\" + \"=\" * 60)\n", - " print(\"SUMMARY:\")\n", - " successful = sum(1 for r in runs if r.info.status == 'FINISHED')\n", - " durations = [r.data.metrics.get('duration_ms', 0) for r in runs if r.data.metrics.get('duration_ms')]\n", - " avg_duration = sum(durations) / len(durations) if durations else 0\n", - " \n", - " print(f\" Total Runs: {len(runs)}\")\n", - " print(f\" Successful: {successful}\")\n", - " print(f\" Failed: {len(runs) - successful}\")\n", - " print(f\" Avg Duration: {avg_duration:.1f} ms\" if avg_duration else \" Avg Duration: N/A\")\n", - "\n", - "print(\"\\n\" + \"=\" * 60)\n", - "print(\"MLflow verification complete\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/ai/ai-starter-kit/notebooks/test_ollama.py b/ai/ai-starter-kit/notebooks/test_ollama.py deleted file mode 100644 index 58cf22e05..000000000 --- a/ai/ai-starter-kit/notebooks/test_ollama.py +++ /dev/null @@ -1,11 +0,0 @@ -from ollama import Client -client = Client( - host='http://ai-starter-kit-ollama:11434', - headers={'x-some-header': 'some-value'} -) -response = client.chat(model='gemma3', messages=[ - { - 'role': 'user', - 'content': 'Why is the sky blue?', - }, -]) diff --git a/ai/ai-starter-kit/notebooks/test_ray.py b/ai/ai-starter-kit/notebooks/test_ray.py deleted file mode 100644 index b04ef8cd8..000000000 --- a/ai/ai-starter-kit/notebooks/test_ray.py +++ /dev/null @@ -1,12 +0,0 @@ -!pip install ray -from ray.job_submission import JobSubmissionClient - -# If using a remote cluster, replace 127.0.0.1 with the head node's IP address or set up port forwarding. -client = JobSubmissionClient() -job_id = client.submit_job( - # Entrypoint shell command to execute - entrypoint="python script.py", - # Path to the local directory that contains the script.py file - runtime_env={"working_dir": "./","excludes": ["/.cache","/.local"]} -) -print(job_id) \ No newline at end of file From 05c0644abe5caf046390d2776b7f55f78882239e Mon Sep 17 00:00:00 2001 From: Vlado Djerek Date: Mon, 29 Sep 2025 16:02:14 +0200 Subject: [PATCH 03/14] remove example values and ci --- .../ci/terraform/default_env.tfvars | 9 - ai/ai-starter-kit/ci/terraform/main.tf | 108 --------- ai/ai-starter-kit/ci/terraform/outputs.tf | 15 -- ai/ai-starter-kit/ci/terraform/variables.tf | 26 --- ai/ai-starter-kit/ci/test_hub.py | 59 ----- ai/ai-starter-kit/cloudbuild.yaml | 208 ----------------- .../helm-chart/ai-starter-kit/Chart.yaml | 2 +- .../ai-starter-kit/values-addon.yaml | 13 -- .../ai-starter-kit/values-gke-gpu.yaml | 219 ------------------ .../ai-starter-kit/values-gke-ollama-gpu.yaml | 44 ---- .../values-gke-ramalama-gpu.yaml | 38 --- .../helm-chart/ai-starter-kit/values-gke.yaml | 162 ------------- .../ai-starter-kit/values-macos-gpu.yaml | 98 -------- 13 files changed, 1 insertion(+), 1000 deletions(-) delete mode 100644 ai/ai-starter-kit/ci/terraform/default_env.tfvars delete mode 100644 ai/ai-starter-kit/ci/terraform/main.tf delete mode 100644 ai/ai-starter-kit/ci/terraform/outputs.tf delete mode 100644 ai/ai-starter-kit/ci/terraform/variables.tf delete mode 100644 ai/ai-starter-kit/ci/test_hub.py delete mode 100644 ai/ai-starter-kit/cloudbuild.yaml delete mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/values-addon.yaml delete mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-gpu.yaml delete mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-ollama-gpu.yaml delete mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-ramalama-gpu.yaml delete mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml delete mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/values-macos-gpu.yaml diff --git a/ai/ai-starter-kit/ci/terraform/default_env.tfvars b/ai/ai-starter-kit/ci/terraform/default_env.tfvars deleted file mode 100644 index 573c66bab..000000000 --- a/ai/ai-starter-kit/ci/terraform/default_env.tfvars +++ /dev/null @@ -1,9 +0,0 @@ -project_id = "" -default_resource_name = "" - -cluster_name = "" # Leave empty to use the default name (default_resource_name) -cluster_location = "us-central1" -private_cluster = false -autopilot_cluster = true - -service_account_name = "" # Leave empty to use the default name diff --git a/ai/ai-starter-kit/ci/terraform/main.tf b/ai/ai-starter-kit/ci/terraform/main.tf deleted file mode 100644 index 5ab94ee89..000000000 --- a/ai/ai-starter-kit/ci/terraform/main.tf +++ /dev/null @@ -1,108 +0,0 @@ -terraform { - - required_providers { - kubectl = { - source = "gavinbunney/kubectl" - version = ">= 1.19.0" - } - } -} -data "google_client_config" "default" {} - - -data "google_project" "project" { - project_id = var.project_id -} - - -locals { - cluster_name = var.cluster_name != "" ? var.cluster_name : var.default_resource_name -} - -module "gke_cluster" { - source = "github.com/ai-on-gke/common-infra/common/infrastructure?ref=main" - - project_id = var.project_id - cluster_name = local.cluster_name - cluster_location = var.cluster_location - autopilot_cluster = var.autopilot_cluster - private_cluster = var.private_cluster - create_network = false - network_name = "default" - subnetwork_name = "default" - enable_gpu = true - gpu_pools = [ - { - name = "gpu-pool-l4" - machine_type = "g2-standard-24" - node_locations = "us-central1-a" ## comment to autofill node_location based on cluster_location - autoscaling = true - min_count = 1 - max_count = 3 - disk_size_gb = 100 - disk_type = "pd-balanced" - enable_gcfs = true - logging_variant = "DEFAULT" - accelerator_count = 2 - accelerator_type = "nvidia-l4" - gpu_driver_version = "DEFAULT" - } - ] - ray_addon_enabled = false -} - -locals { - #ca_certificate = base64decode(module.gke_cluster.ca_certificate) - cluster_membership_id = var.cluster_membership_id == "" ? local.cluster_name : var.cluster_membership_id - host = var.private_cluster ? "https://connectgateway.googleapis.com/v1/projects/${data.google_project.project.number}/locations/${var.cluster_location}/gkeMemberships/${local.cluster_membership_id}" : "https://${module.gke_cluster.endpoint}" - -} - -provider "kubernetes" { - alias = "ai_starter_kit" - host = local.host - token = data.google_client_config.default.access_token - cluster_ca_certificate = var.private_cluster ? "" : base64decode(module.gke_cluster.ca_certificate) - - dynamic "exec" { - for_each = var.private_cluster ? [1] : [] - content { - api_version = "client.authentication.k8s.io/v1beta1" - command = "gke-gcloud-auth-plugin" - } - } -} - -locals { - service_account_name = var.service_account_name != "" ? var.service_account_name : var.default_resource_name -} - - -module "ai_starter_kit_workload_identity" { - providers = { - kubernetes = kubernetes.ai_starter_kit - } - source = "terraform-google-modules/kubernetes-engine/google//modules/workload-identity" - name = local.service_account_name - namespace = "default" - roles = ["roles/storage.objectUser"] - project_id = var.project_id - depends_on = [module.gke_cluster] -} - -provider "kubectl" { - alias = "ai_starter_kit" - apply_retry_count = 15 - host = local.host - token = data.google_client_config.default.access_token - cluster_ca_certificate = var.private_cluster ? "" : base64decode(module.gke_cluster.ca_certificate) - load_config_file = true - - dynamic "exec" { - for_each = var.private_cluster ? [1] : [] - content { - api_version = "client.authentication.k8s.io/v1beta1" - command = "gke-gcloud-auth-plugin" - } - } -} diff --git a/ai/ai-starter-kit/ci/terraform/outputs.tf b/ai/ai-starter-kit/ci/terraform/outputs.tf deleted file mode 100644 index 006f5b55b..000000000 --- a/ai/ai-starter-kit/ci/terraform/outputs.tf +++ /dev/null @@ -1,15 +0,0 @@ - -output "gke_cluster_name" { - value = local.cluster_name - description = "GKE cluster name" -} - -output "gke_cluster_location" { - value = var.cluster_location - description = "GKE cluster location" -} - -output "project_id" { - value = var.project_id - description = "GKE cluster location" -} diff --git a/ai/ai-starter-kit/ci/terraform/variables.tf b/ai/ai-starter-kit/ci/terraform/variables.tf deleted file mode 100644 index 10c4d6390..000000000 --- a/ai/ai-starter-kit/ci/terraform/variables.tf +++ /dev/null @@ -1,26 +0,0 @@ -variable "project_id" { - type = string -} -variable "default_resource_name" { - type = string -} -variable "cluster_name" { - type = string -} -variable "cluster_location" { - type = string -} -variable "autopilot_cluster" { - type = bool -} -variable "private_cluster" { - type = bool -} -variable "cluster_membership_id" { - type = string - description = "require to use connectgateway for private clusters, default: cluster_name" - default = "" -} -variable "service_account_name" { - type = string -} diff --git a/ai/ai-starter-kit/ci/test_hub.py b/ai/ai-starter-kit/ci/test_hub.py deleted file mode 100644 index a7a40436b..000000000 --- a/ai/ai-starter-kit/ci/test_hub.py +++ /dev/null @@ -1,59 +0,0 @@ -import sys -import requests -from packaging.version import Version as V - - -def test_hub_up(hub_url): - r = requests.get(hub_url) - r.raise_for_status() - print("JupyterHub up.") - - -def test_api_root(hub_url): - """ - Tests the hub api's root endpoint (/). The hub's version should be returned. - - A typical jupyterhub logging response to this test: - - [I 2019-09-25 12:03:12.051 JupyterHub log:174] 200 GET /hub/api (test@127.0.0.1) 9.57ms - """ - r = requests.get(hub_url + "/hub/api") - r.raise_for_status() - info = r.json() - version = info["version"] - assert V("4") <= V(version) <= V("5.5"), f"version {version} must be between 4 and 5.5" - print("JupyterHub Rest API is working.") - - -def test_hub_login(hub_url): - """ - Tests the hub dummy authenticator login credentials. Login credentials retrieve - from /jupyter_config/config.yaml. After successfully login, user will be - redirected to /hub/spawn. - """ - username, password = "user", "sneakypass" - session = requests.Session() - - response = session.get(hub_url + "/hub/login") - response.raise_for_status() - - auth_params = {} - if "_xsrf" in session.cookies: - auth_params = {"_xsrf": session.cookies["_xsrf"]} - - response = session.post( - hub_url + "/hub/login", - params=auth_params, - data={"username": username, "password": password}, - allow_redirects=True, - ) - response.raise_for_status() - assert (hub_url + "/hub/spawn-pending/user") in response.url, f"unexpected response url: got {response.url}, expected {hub_url}/hub/spawn-pending/user" - print("JupyterHub login success.") - - -hub_url = "http://" + sys.argv[1] - -test_hub_up(hub_url) -test_api_root(hub_url) -test_hub_login(hub_url) diff --git a/ai/ai-starter-kit/cloudbuild.yaml b/ai/ai-starter-kit/cloudbuild.yaml deleted file mode 100644 index 332063066..000000000 --- a/ai/ai-starter-kit/cloudbuild.yaml +++ /dev/null @@ -1,208 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -steps: -- id: "ai-starter-kit: validate platform" - name: "gcr.io/${PROJECT_ID}/terraform" - dir: "/workspace/ci/terraform" - script: | - terraform init -no-color - terraform validate -no-color - waitFor: ["-"] - -- id: 'ai-starter-kit: create gke cluster' - name: "gcr.io/${PROJECT_ID}/terraform" - dir: "/workspace/ci/terraform" - env: - - "KUBE_LOAD_CONFIG_FILE=false" - entrypoint: 'sh' - args: - - '-c' - - | - set -e - - echo "fail" > /workspace/ai_starter_kit_cluster_result.txt - terraform apply \ - -var-file=default_env.tfvars \ - -var=project_id=$PROJECT_ID \ - -var=default_resource_name=ml-ai-starter-kit-${SHORT_SHA}-${_BUILD_ID} \ - -var=cluster_name=ml-ai-starter-kit-${SHORT_SHA}-${_BUILD_ID}-cluster \ - -var=cluster_location=${_REGION} \ - -var=private_cluster=false \ - -var=autopilot_cluster=${_AUTOPILOT_CLUSTER} \ - -var=service_account_name=ml-ai-starter-kit-${SHORT_SHA}-${_BUILD_ID}-sa \ - -auto-approve -no-color - echo "pass" > /workspace/ai_starter_kit_cluster_result.txt - allowFailure: true - waitFor: ['ai-starter-kit: validate platform'] - -- id: 'ai-starter-kit: generate kubeconfig' - name: 'gcr.io/cloud-builders/gcloud' - args: - - 'container' - - 'clusters' - - 'get-credentials' - - 'ml-ai-starter-kit-${SHORT_SHA}-${_BUILD_ID}-cluster' - - '--region=${_REGION}' - - '--project=${PROJECT_ID}' - waitFor: ['ai-starter-kit: create gke cluster'] - -- id: 'ai-starter-kit: make install_gke' - name: "gcr.io/cloud-builders/kubectl" - env: - - "CLOUDSDK_COMPUTE_ZONE=${_REGION}" - - "CLOUDSDK_CONTAINER_CLUSTER=ml-${SHORT_SHA}-${_BUILD_ID}-cluster" - entrypoint: 'sh' - args: - - '-c' - - | - set -e - - echo "fail" > /workspace/ai_starter_kit_make_install_gke_result.txt - apt update - apt install curl make --assume-yes - curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 - chmod 700 get_helm.sh - /workspace/get_helm.sh - make install_gke - sleep 300s - echo "pass" > /workspace/ai_starter_kit_make_install_gke_result.txt - allowFailure: true - waitFor: ['ai-starter-kit: generate kubeconfig'] - -- id: 'ai-starter-kit: test jupyterhub' - name: "gcr.io/cloud-builders/kubectl" - env: - - "CLOUDSDK_COMPUTE_ZONE=${_REGION}" - - "CLOUDSDK_CONTAINER_CLUSTER=ml-${SHORT_SHA}-${_BUILD_ID}-cluster" - entrypoint: 'sh' - args: - - '-c' - - | - set -e - - echo "fail" > /workspace/ai_starter_kit_jupyterhub_test_result.txt - - apt update - apt install make python3-venv --assume-yes - apt install python3-pip --assume-yes - pip install pyyaml requests packaging - - make validate_jupyterhub - - echo "pass" > /workspace/ai_starter_kit_jupyterhub_test_result.txt - allowFailure: true - waitFor: ['ai-starter-kit: make install_gke'] - -- id: "ai-starter-kit: test ray cluster" - name: "gcr.io/cloud-builders/kubectl" - env: - - "CLOUDSDK_COMPUTE_ZONE=${_REGION}" - - "CLOUDSDK_CONTAINER_CLUSTER=ml-${SHORT_SHA}-${_PR_NUMBER}-${_BUILD_ID}-cluster" - entrypoint: 'sh' - args: - - '-c' - - | - set -e - echo "fail" > /workspace/ai_starter_kit_ray_result.txt - - apt update - apt install make python3-venv --assume-yes - apt install python3-pip --assume-yes - pip install ray==2.41.0 "ray[data,train,tune,serve]" - - make validate_ray - - echo "pass" > /workspace/ai_starter_kit_ray_result.txt - allowFailure: true - waitFor: ['ai-starter-kit: make install_gke'] - -- id: 'ai-starter-kit: cleanup gke cluster' - name: "gcr.io/${PROJECT_ID}/terraform" - dir: "/workspace/ci/terraform" - env: - - "KUBE_LOAD_CONFIG_FILE=false" - entrypoint: 'sh' - args: - - '-c' - - | - set -e - - echo "fail" > /workspace/ai_starter_kit_cleanup_result.txt - terraform destroy \ - -var-file=default_env.tfvars \ - -var=project_id=$PROJECT_ID \ - -var=default_resource_name=ml-ai-starter-kit-${SHORT_SHA}-${_BUILD_ID} \ - -var=cluster_name=ml-ai-starter-kit-${SHORT_SHA}-${_BUILD_ID}-cluster \ - -var=cluster_location=${_REGION} \ - -var=private_cluster=false \ - -var=autopilot_cluster=${_AUTOPILOT_CLUSTER} \ - -var=service_account_name=ml-ai-starter-kit-${SHORT_SHA}-${_BUILD_ID}-sa \ - -auto-approve -no-color - echo "pass" > /workspace/ai_starter_kit_cleanup_result.txt - allowFailure: true - waitFor: ['ai-starter-kit: test jupyterhub', 'ai-starter-kit: test ray cluster'] - -- id: 'check result' - name: "ubuntu" - entrypoint: 'bash' - args: - - '-c' - - | - set -e - - echo "pass" > /workspace/check_result.txt - - if [[ $(cat /workspace/ai_starter_kit_cluster_result.txt) != "pass" ]]; then - echo "ai starter kit cluster creation failed" - echo "error" > /workspace/check_result.txt - fi - - if [[ $(cat /workspace/ai_starter_kit_make_install_gke_result.txt) != "pass" ]]; then - echo "ai starter kit make install_gke failed" - echo "error" > /workspace/check_result.txt - fi - - if [[ $(cat /workspace/ai_starter_kit_jupyterhub_test_result.txt) != "pass" ]]; then - echo "ai starter kit jupyterhub test failed" - echo "error" > /workspace/check_result.txt - fi - - if [[ $(cat /workspace/ai_starter_kit_ray_result.txt) != "pass" ]]; then - echo "ai starter kit ray test failed" - echo "error" > /workspace/check_result.txt - fi - - if [[ $(cat /workspace/ai_starter_kit_cleanup_result.txt) != "pass" ]]; then - echo "ai starter kit clean up failed" - echo "error" > /workspace/check_result.txt - fi - - if [[ $(cat /workspace/check_result.txt) != "pass" ]]; then - cat /workspace/check_result.txt - exit 1 - fi - waitFor: ['ai-starter-kit: cleanup gke cluster'] - -substitutions: - _AUTOPILOT_CLUSTER: "true" - _REGION: us-east4 - # _USER_NAME: github - _BUILD_ID: "1234567" #${BUILD_ID:0:8} -# logsBucket: gs://ai-on-gke-qss-build-logs -options: - logging: CLOUD_LOGGING_ONLY - substitutionOption: "ALLOW_LOOSE" - machineType: "E2_HIGHCPU_8" -timeout: 5400s diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/Chart.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/Chart.yaml index 64f9e5f13..9bf77a3b5 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/Chart.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/Chart.yaml @@ -21,7 +21,7 @@ version: 0.1.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "1.16.0" +appVersion: "0.1.0" dependencies: diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-addon.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-addon.yaml deleted file mode 100644 index b01d2d6bb..000000000 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-addon.yaml +++ /dev/null @@ -1,13 +0,0 @@ -ray-cluster: - enabled: false - -ramalama: - enabled: true - command: - - /bin/sh - - -c - - ramalama pull qwen2.5:1.5b && ramalama serve qwen2.5:1.5b --port 8080 - image: - repository: "quay.io/ramalama/ramalama" - tag: "latest" - pullPolicy: IfNotPresent diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-gpu.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-gpu.yaml deleted file mode 100644 index b2d48a624..000000000 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-gpu.yaml +++ /dev/null @@ -1,219 +0,0 @@ -jupyterhub: - singleuser: - initContainers: - - name: chown-home-mount-dir - image: jupyterhub/k8s-singleuser-sample:4.2.0 - securityContext: - runAsUser: 0 - command: ["chown", "jovyan", "/home/jovyan"] - volumeMounts: - - name: home - mountPath: /home/jovyan - subPath: jupyterhub_workspace - - - name: model-initializer - image: jupyterhub/k8s-singleuser-sample:4.2.0 - env: - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: ai-starter-kit-hf-token-secret - key: token - command: - - /bin/sh - - -c - - | - set -e - pip install -r /tmp/requirements.txt - python /tmp/download_models.py "/mnt/jovyan/jupyter_models" - for f in /tmp/*.ipynb; do - if [ -f "$f" ]; then - cp -n "$f" /home/jovyan/ - fi - done - volumeMounts: - - name: home - mountPath: /home/jovyan - subPath: jupyterhub_workspace - - name: init-files - mountPath: /tmp - readOnly: true - resources: - requests: - cpu: "2" - memory: 16Gi - ephemeral-storage: 10Gi - limits: - cpu: "4" - memory: 32Gi - ephemeral-storage: 10Gi - - extraResource: - limits: - ephemeral-storage: '10G' - nvidia.com/gpu: 1 - guarantees: - ephemeral-storage: '10G' - nvidia.com/gpu: 1 - - nodeSelector: - cloud.google.com/gke-accelerator: nvidia-l4 - - image: - name: jupyter/tensorflow-notebook # - gpu optimzied img - tag: "latest" - - storage: - type: static - static: - pvcName: "ai-starter-kit-models-cache-pvc" - subPath: "jupyterhub_workspace" - capacity: 20Gi - homeMountPath: /home/jovyan - extraVolumes: - - name: init-files - configMap: - name: "ai-starter-kit-init-files" - - name: models-cache - persistentVolumeClaim: - claimName: ai-starter-kit-models-cache-only-pvc - extraVolumeMounts: - - mountPath: "/mnt/jovyan" - name: models-cache - - cloudMetadata: - blockWithIptables: false - memory: - limit: 32G - guarantee: 16G - - profileList: - - display_name: "GPU Environment" - description: "Jupyter environment with GPU" - default: true - kubespawner_override: - extra_resource_limits: - nvidia.com/gpu: "1" - extra_resource_guarantees: - nvidia.com/gpu: "1" - node_selector: - cloud.google.com/gke-accelerator: nvidia-l4 - #priority_class_name: "high-priority" - - scheduling: - userScheduler: - enabled: false - -ray-cluster: - enabled: true - image: - tag: 2.41.0-py312-gpu - - head: - nodeSelector: - cloud.google.com/gke-accelerator: nvidia-l4 - resources: - requests: - cpu: "4" - memory: "4G" - ephemeral-storage: 10Gi - nvidia.com/gpu: 1 - limits: - cpu: "8" - memory: "6G" - ephemeral-storage: 10Gi - nvidia.com/gpu: 1 - - worker: - nodeSelector: - cloud.google.com/gke-accelerator: nvidia-l4 - containerEnv: - - name: PYTHONPATH - value: "/mnt/ray-storage/libraries" - - name: TMPDIR - value: "/mnt/ray-storage/temp" - - name: CUDA_VISIBLE_DEVICES - value: "0" - - name: NVIDIA_VISIBLE_DEVICES - value: "all" - resources: - requests: - cpu: "4" - memory: "4G" - ephemeral-storage: 10Gi - nvidia.com/gpu: 1 - limits: - cpu: "8" - memory: "6G" - ephemeral-storage: 10Gi - nvidia.com/gpu: 1 - volumes: - - name: ray-pvc-storage - persistentVolumeClaim: - claimName: "{{ .Release.Name }}-ray-pvc" - volumeMounts: - - name: ray-pvc-storage - mountPath: /mnt/ray-storage - podSecurityContext: - fsGroup: 1000 - -ollama: - enabled: true - ollama: - models: - pull: - - gemma3 - persistentVolume: - enabled: true - existingClaim: "ai-starter-kit-models-cache-only-pvc" - subPath: "ollama" - -ramalama: - enabled: true - nodeSelector: - cloud.google.com/gke-accelerator: nvidia-l4 - command: - - /bin/sh - - -c - - | - echo "Autopilot GPU node - starting GPU server..." - nvidia-smi - echo "Pulling model..." - ramalama pull qwen2.5:1.5b - echo "Starting GPU server..." - ramalama serve qwen2.5:1.5b --port 8080 --device cuda - image: - repository: "quay.io/ramalama/cuda" - tag: "latest" - pullPolicy: IfNotPresent - resources: - requests: - cpu: "2" - memory: "4Gi" - nvidia.com/gpu: 1 - limits: - cpu: "4" - memory: "8Gi" - nvidia.com/gpu: 1 - -rayPvc: - enabled: true - -modelsCachePvc: - storageClassName: "premium-rwo" - accessModes: - - ReadWriteOnce - size: 20Gi - -modelsCacheOnlyPvc: - enabled: true - storageClassName: "standard-rwx" - accessModes: - - ReadWriteMany - size: 100Gi - -localPersistence: - enabled: false - -genericDevicePlugin: - enabled: false diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-ollama-gpu.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-ollama-gpu.yaml deleted file mode 100644 index b600cba99..000000000 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-ollama-gpu.yaml +++ /dev/null @@ -1,44 +0,0 @@ -ramalama: - enabled: false # to avoid running two model servers (ollama + ramalama) at the same time - -ollama: - enabled: true - - nodeSelector: - cloud.google.com/gke-accelerator: nvidia-l4 - - image: - repository: ollama/ollama - tag: "latest" - pullPolicy: IfNotPresent - - resources: - requests: - cpu: "2" - memory: "8Gi" - nvidia.com/gpu: 1 - limits: - cpu: "4" - memory: "16Gi" - nvidia.com/gpu: 1 - - ## if GPU nodes will be tainted - # tolerations: - # - key: "nvidia.com/gpu" - # operator: "Exists" - # effect: "NoSchedule" - - ollama: - models: - pull: - - qwen2.5:1.5b - - gemma3 - - persistentVolume: - enabled: true - existingClaim: "ai-starter-kit-models-cache-pvc" - subPath: "ollama" - - service: - type: ClusterIP - port: 11434 \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-ramalama-gpu.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-ramalama-gpu.yaml deleted file mode 100644 index dcbead050..000000000 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke-ramalama-gpu.yaml +++ /dev/null @@ -1,38 +0,0 @@ -ollama: - enabled: false # to avoid running two model servers (ollama + ramalama) at the same time - -ramalama: - enabled: true - - nodeSelector: - cloud.google.com/gke-accelerator: nvidia-l4 - - image: - repository: quay.io/ramalama/cuda - tag: "latest" - pullPolicy: IfNotPresent - - resources: - requests: - cpu: "2" - memory: "4Gi" - nvidia.com/gpu: 1 - limits: - cpu: "4" - memory: "8Gi" - nvidia.com/gpu: 1 - - ## if GPU nodes will be tainted - # tolerations: - # - key: "nvidia.com/gpu" - # operator: "Exists" - # effect: "NoSchedule" - - command: - - /bin/sh - - -c - - | - set -e - echo "GPU info:" && nvidia-smi || true - echo "Pulling model..." && ramalama pull qwen2.5:1.5b - echo "Starting server..." && exec ramalama serve qwen2.5:1.5b --port 8080 --device cuda \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml deleted file mode 100644 index 22849de1d..000000000 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml +++ /dev/null @@ -1,162 +0,0 @@ -jupyterhub: - singleuser: - initContainers: - # This init cntainer makes sure that home folder that we mount has correct owner - - name: chown-home-mount-dir - image: jupyterhub/k8s-singleuser-sample:4.2.0 - securityContext: - runAsUser: 0 - command: ["chown", "jovyan", "/home/jovyan"] - volumeMounts: - - name: home - mountPath: /home/jovyan - subPath: jupyterhub_workspace - - - name: model-initializer - image: jupyterhub/k8s-singleuser-sample:4.2.0 - env: - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: ai-starter-kit-hf-token-secret - key: token - command: - - /bin/sh - - -c - - | - set -e - pip install -r /tmp/requirements.txt - - python /tmp/download_models.py "/mnt/jovyan/jupyter_models" - - # populate workspace with initial notebook files - for f in /tmp/*.ipynb; do - if [ -f "$f" ]; then - # Use cp -n to not overwrite existing files. - cp -n "$f" /home/jovyan/ - fi - done - volumeMounts: - # This 'home' volume is created by the helm chart's 'homeMountPath' option. - # We mount it to initContainer too, so all downloads and installations are persisted in this mounted home folder. - - name: home - mountPath: /home/jovyan - subPath: jupyterhub_workspace - - name: models-cache - mountPath: /mnt/jovyan - - name: init-files - mountPath: /tmp - readOnly: true - resources: - requests: - cpu: "2" - memory: 16Gi - ephemeral-storage: 10Gi - limits: - cpu: "4" - memory: 32Gi - ephemeral-storage: 10Gi - storage: - type: static - static: - pvcName: "ai-starter-kit-models-cache-pvc" - subPath: "jupyterhub_workspace" - capacity: 20Gi - homeMountPath: /home/jovyan - extraVolumes: - - name: init-files - configMap: - name: "ai-starter-kit-init-files" - - name: models-cache - persistentVolumeClaim: - claimName: ai-starter-kit-models-cache-only-pvc - extraVolumeMounts: - - mountPath: "/mnt/jovyan" - name: models-cache - cloudMetadata: - # Without this disabled, the GKE Autopilot Warden will raise an error about container with escalated privilieges - blockWithIptables: false - memory: - limit: 32G - guarantee: 16G - extraResource: - limits: - ephemeral-storage: '10G' - guarantees: - ephemeral-storage: '10G' - scheduling: - userScheduler: - # For now we disable the scheduler because GKE Autopilot Warden does not allow using of a custom scheduler - enabled: false - -ray-cluster: - enabled: true - image: - tag: 2.41.0-py312-gpu - head: - resources: - requests: - cpu: "4" - memory: "4G" - ephemeral-storage: 10Gi - limits: - cpu: "8" - memory: "6G" - ephemeral-storage: 10Gi - worker: - containerEnv: - - name: PYTHONPATH - value: "/mnt/ray-storage/libraries" - - name: TMPDIR - value: "/mnt/ray-storage/temp" - resources: - requests: - cpu: "4" - memory: "4G" - ephemeral-storage: 10Gi - limits: - cpu: "8" - memory: "6G" - ephemeral-storage: 10Gi - volumes: - - name: ray-pvc-storage - persistentVolumeClaim: - claimName: "ai-starter-kit-ray-pvc" # this value should'n be hardcoded. The actual value should be: {{ .Release.Name }}-models-cache-pvc - volumeMounts: - - name: ray-pvc-storage - mountPath: /mnt/ray-storage - podSecurityContext: - fsGroup: 1000 - -ollama: - enabled: true - ollama: - models: - pull: - - gemma3 - persistentVolume: - enabled: true - existingClaim: "ai-starter-kit-models-cache-only-pvc" - subPath: "ollama" - -rayPvc: - enabled: true - -modelsCachePvc: - storageClassName: "premium-rwo" - accessModes: - - ReadWriteOnce - size: 20Gi - -modelsCacheOnlyPvc: - enabled: true - storageClassName: "standard-rwx" - accessModes: - - ReadWriteMany - size: 100Gi - -localPersistence: - enabled: false - -genericDevicePlugin: - enabled: false diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-macos-gpu.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-macos-gpu.yaml deleted file mode 100644 index dfa19f41e..000000000 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-macos-gpu.yaml +++ /dev/null @@ -1,98 +0,0 @@ -genericDevicePlugin: - enabled: true - image: - repository: "squat/generic-device-plugin" - tag: "latest" - pullPolicy: "IfNotPresent" - device: - count: 6 - resources: - requests: - cpu: "100m" - memory: "50Mi" - limits: - cpu: "150m" - memory: "100Mi" - -jupyterhub: - singleuser: - image: - name: jupyterhub/k8s-singleuser-sample - tag: "4.2.0" - storage: - type: static - static: - pvcName: "ai-starter-kit-models-cache-pvc" - subPath: "jupyterhub_workspace" - capacity: 40Gi - homeMountPath: /home/jovyan - extraVolumes: - - name: init-files - configMap: - name: "ai-starter-kit-init-files" - - name: dev-dri - hostPath: - path: /dev/dri - extraVolumeMounts: - - name: dev-dri - mountPath: /dev/dri - - -ollama: - enabled: true - ollama: - models: - pull: - - gemma3 - - llama3.2 - resources: - requests: - cpu: 1 - memory: 4Gi - squat.ai/dri: "1" - limits: - cpu: 4 - memory: 8Gi - squat.ai/dri: "1" - - persistentVolume: - enabled: true - existingClaim: "ai-starter-kit-models-cache-pvc" - subPath: "ollama" - -ramalama: - enabled: true - command: - - /bin/sh - - -c - - ramalama pull qwen2.5:1.5b && ramalama serve qwen2.5:1.5b --port 8080 --device metal - - resources: - requests: - cpu: 1 - memory: 2Gi - squat.ai/dri: "1" - limits: - cpu: 4 - memory: 8Gi - squat.ai/dri: "1" - - extraVolumeMounts: - - name: dev-dri - mountPath: /dev/dri - - extraVolumes: - - name: dev-dri - hostPath: - path: /dev/dri - -modelsCachePvc: - enabled: true - storageClassName: "standard" - accessModes: - - ReadWriteOnce - size: 40Gi - -localPersistence: - enabled: true - hostPath: "/home/models-cache" \ No newline at end of file From 6995e3c14996764195417f10dc62cd075010d97a Mon Sep 17 00:00:00 2001 From: Vlado Djerek Date: Mon, 6 Oct 2025 13:56:17 +0200 Subject: [PATCH 04/14] clean up makefile --- ai/ai-starter-kit/Makefile | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/ai/ai-starter-kit/Makefile b/ai/ai-starter-kit/Makefile index 40717cb20..4fc7fd583 100644 --- a/ai/ai-starter-kit/Makefile +++ b/ai/ai-starter-kit/Makefile @@ -25,15 +25,9 @@ dep_update: install: check_hf_token helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="$$HF_TOKEN" --timeout 10m -f helm-chart/ai-starter-kit/values.yaml -install_gke: check_hf_token - helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="$$HF_TOKEN" --timeout 10m -f helm-chart/ai-starter-kit/values-gke.yaml - -install_gke_gpu: check_hf_token - helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="$$HF_TOKEN" --timeout 10m -f helm-chart/ai-starter-kit/values-gke-gpu.yaml - start: mkdir -p /tmp/models-cache - minikube start --cpus 4 --memory 15000 --mount --mount-string="/tmp/models-cache:/tmp/models-cache" + minikube start --cpus 4 --memory 15000 --mount --mount-string="$$HOME/models-cache:/tmp/models-cache" start_gpu: mkdir -p $HOME/models-cache From 74322fcae16e7f4175608a954f8f188050f329b4 Mon Sep 17 00:00:00 2001 From: Vlado Djerek Date: Tue, 14 Oct 2025 16:47:17 +0200 Subject: [PATCH 05/14] changes to readme --- .../helm-chart/ai-starter-kit/README.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md index 741d27007..5ef16c23e 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md @@ -41,20 +41,26 @@ The AI Starter Kit simplifies the deployment of AI infrastructure by providing: ### Quick Start (Minikube) -1. **Start minikube with persistent storage:** +1. **Create a folder for the persistent storage:** +```bash +mkdir -p /$HOME/models-cache +``` + +2. **Start minikube with persistent storage:** ```bash minikube start --cpus 4 --memory 15000 \ - --mount --mount-string="/tmp/models-cache:/tmp/models-cache" + --mount --mount-string="/$HOME/models-cache:/tmp/models-cache" ``` -2. **Install the chart:** +3. **Install the chart:** ```bash +helm dependency build helm install ai-starter-kit . \ --set huggingface.token="YOUR_HF_TOKEN" \ -f values.yaml ``` -3. **Access JupyterHub:** +4. **Access JupyterHub:** ```bash kubectl port-forward svc/ai-starter-kit-jupyterhub-proxy-public 8080:80 ``` From 2755940c07e430705f8eeb9d29766810d2c2f9e0 Mon Sep 17 00:00:00 2001 From: Vlado Djerek Date: Wed, 15 Oct 2025 16:09:30 +0200 Subject: [PATCH 06/14] update readme and change default password --- ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md | 3 ++- ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md index 5ef16c23e..965a0636d 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md @@ -54,7 +54,8 @@ minikube start --cpus 4 --memory 15000 \ 3. **Install the chart:** ```bash -helm dependency build +cd ai/ai-starter-kit/helm-chart/ai-starter-kit +helm dependency update helm install ai-starter-kit . \ --set huggingface.token="YOUR_HF_TOKEN" \ -f values.yaml diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml index 3155ea642..f708de297 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml @@ -90,7 +90,7 @@ jupyterhub: helm.sh/resource-policy: keep extraConfig: 00-dummy-authenticator: | - c.DummyAuthenticator.password = "sneakypass" + c.DummyAuthenticator.password = "password" 01-spawner-timeouts: | c.KubeSpawner.start_timeout = 1800 proxy: From 2781c923b43e524778fa9d8f9314439285574820 Mon Sep 17 00:00:00 2001 From: Dima Drogovoz Date: Wed, 15 Oct 2025 16:09:22 +0100 Subject: [PATCH 07/14] remove output from ray.ipynb --- .../helm-chart/ai-starter-kit/files/ray.ipynb | 625 +----------------- 1 file changed, 11 insertions(+), 614 deletions(-) diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb index dae93a357..387c320b7 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb @@ -2,549 +2,36 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "e4a6ac7c-5c73-42a9-8b74-420788321543", "metadata": { - "collapsed": true, - "jupyter": { - "outputs_hidden": true - }, "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Defaulting to user installation because normal site-packages is not writeable\n", - "Collecting ray==2.41.0\n", - " Downloading ray-2.41.0-cp311-cp311-manylinux2014_x86_64.whl.metadata (18 kB)\n", - "Collecting click>=7.0 (from ray==2.41.0)\n", - " Downloading click-8.2.1-py3-none-any.whl.metadata (2.5 kB)\n", - "Collecting filelock (from ray==2.41.0)\n", - " Downloading filelock-3.19.1-py3-none-any.whl.metadata (2.1 kB)\n", - "Requirement already satisfied: jsonschema in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (4.22.0)\n", - "Collecting msgpack<2.0.0,>=1.0.0 (from ray==2.41.0)\n", - " Downloading msgpack-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB)\n", - "Requirement already satisfied: packaging in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (23.2)\n", - "Collecting protobuf!=3.19.5,>=3.15.3 (from ray==2.41.0)\n", - " Downloading protobuf-6.32.0-cp39-abi3-manylinux2014_x86_64.whl.metadata (593 bytes)\n", - "Requirement already satisfied: pyyaml in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (6.0.1)\n", - "Requirement already satisfied: aiosignal in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (1.3.1)\n", - "Requirement already satisfied: frozenlist in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (1.4.1)\n", - "Requirement already satisfied: requests in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (2.31.0)\n", - "Requirement already satisfied: attrs>=22.2.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray==2.41.0) (23.2.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray==2.41.0) (2023.12.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray==2.41.0) (0.35.1)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray==2.41.0) (0.18.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests->ray==2.41.0) (2.0.4)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests->ray==2.41.0) (3.7)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests->ray==2.41.0) (2.1.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests->ray==2.41.0) (2024.2.2)\n", - "Downloading ray-2.41.0-cp311-cp311-manylinux2014_x86_64.whl (67.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 MB\u001b[0m \u001b[31m25.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading click-8.2.1-py3-none-any.whl (102 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m102.2/102.2 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading msgpack-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (429 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m430.0/430.0 kB\u001b[0m \u001b[31m41.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading protobuf-6.32.0-cp39-abi3-manylinux2014_x86_64.whl (322 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m322.0/322.0 kB\u001b[0m \u001b[31m33.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading filelock-3.19.1-py3-none-any.whl (15 kB)\n", - "Installing collected packages: protobuf, msgpack, filelock, click, ray\n", - "\u001b[33m WARNING: The scripts ray, rllib, serve and tune are installed in '/tmp/models-cache/bin' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[0mSuccessfully installed click-8.2.1 filelock-3.19.1 msgpack-1.1.1 protobuf-6.32.0 ray-2.41.0\n" - ] - } - ], + "outputs": [], "source": [ "!pip install ray==2.41.0" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "788f1517-251c-4171-af7d-f4c7a5073d71", "metadata": { - "collapsed": true, - "jupyter": { - "outputs_hidden": true - }, "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Defaulting to user installation because normal site-packages is not writeable\n", - "Collecting numpy\n", - " Downloading numpy-2.3.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (62 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.1/62.1 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting mlflow\n", - " Downloading mlflow-3.3.1-py3-none-any.whl.metadata (30 kB)\n", - "Collecting tensorflow\n", - " Downloading tensorflow-2.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.5 kB)\n", - "Requirement already satisfied: ray[client,default,serve] in ./models-cache/lib/python3.11/site-packages (2.41.0)\n", - "Collecting mlflow-skinny==3.3.1 (from mlflow)\n", - " Downloading mlflow_skinny-3.3.1-py3-none-any.whl.metadata (31 kB)\n", - "Collecting mlflow-tracing==3.3.1 (from mlflow)\n", - " Downloading mlflow_tracing-3.3.1-py3-none-any.whl.metadata (19 kB)\n", - "Collecting Flask<4 (from mlflow)\n", - " Downloading flask-3.1.2-py3-none-any.whl.metadata (3.2 kB)\n", - "Requirement already satisfied: alembic!=1.10.0,<2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow) (1.13.1)\n", - "Collecting cryptography<46,>=43.0.0 (from mlflow)\n", - " Downloading cryptography-45.0.6-cp311-abi3-manylinux_2_34_x86_64.whl.metadata (5.7 kB)\n", - "Requirement already satisfied: docker<8,>=4.0.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow) (7.1.0)\n", - "Collecting graphene<4 (from mlflow)\n", - " Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)\n", - "Collecting gunicorn<24 (from mlflow)\n", - " Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)\n", - "Collecting matplotlib<4 (from mlflow)\n", - " Downloading matplotlib-3.10.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)\n", - "Collecting pandas<3 (from mlflow)\n", - " Downloading pandas-2.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (91 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m91.2/91.2 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting pyarrow<22,>=4.0.0 (from mlflow)\n", - " Downloading pyarrow-21.0.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (3.3 kB)\n", - "Collecting scikit-learn<2 (from mlflow)\n", - " Downloading scikit_learn-1.7.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)\n", - "Collecting scipy<2 (from mlflow)\n", - " Downloading scipy-1.16.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (61 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.0/62.0 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: sqlalchemy<3,>=1.4.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow) (2.0.30)\n", - "Collecting cachetools<7,>=5.0.0 (from mlflow-skinny==3.3.1->mlflow)\n", - " Downloading cachetools-6.2.0-py3-none-any.whl.metadata (5.4 kB)\n", - "Requirement already satisfied: click<9,>=7.0 in ./models-cache/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (8.2.1)\n", - "Collecting cloudpickle<4 (from mlflow-skinny==3.3.1->mlflow)\n", - " Downloading cloudpickle-3.1.1-py3-none-any.whl.metadata (7.1 kB)\n", - "Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==3.3.1->mlflow)\n", - " Downloading databricks_sdk-0.64.0-py3-none-any.whl.metadata (39 kB)\n", - "Collecting fastapi<1 (from mlflow-skinny==3.3.1->mlflow)\n", - " Downloading fastapi-0.116.1-py3-none-any.whl.metadata (28 kB)\n", - "Collecting gitpython<4,>=3.1.9 (from mlflow-skinny==3.3.1->mlflow)\n", - " Downloading gitpython-3.1.45-py3-none-any.whl.metadata (13 kB)\n", - "Collecting importlib_metadata!=4.7.0,<9,>=3.7.0 (from mlflow-skinny==3.3.1->mlflow)\n", - " Downloading importlib_metadata-8.7.0-py3-none-any.whl.metadata (4.8 kB)\n", - "Collecting opentelemetry-api<3,>=1.9.0 (from mlflow-skinny==3.3.1->mlflow)\n", - " Downloading opentelemetry_api-1.36.0-py3-none-any.whl.metadata (1.5 kB)\n", - "Collecting opentelemetry-sdk<3,>=1.9.0 (from mlflow-skinny==3.3.1->mlflow)\n", - " Downloading opentelemetry_sdk-1.36.0-py3-none-any.whl.metadata (1.5 kB)\n", - "Requirement already satisfied: packaging<26 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (23.2)\n", - "Requirement already satisfied: protobuf<7,>=3.12.0 in ./models-cache/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (6.32.0)\n", - "Requirement already satisfied: pydantic<3,>=1.10.8 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (2.7.3)\n", - "Requirement already satisfied: pyyaml<7,>=5.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (6.0.1)\n", - "Requirement already satisfied: requests<3,>=2.17.3 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (2.31.0)\n", - "Collecting sqlparse<1,>=0.4.0 (from mlflow-skinny==3.3.1->mlflow)\n", - " Downloading sqlparse-0.5.3-py3-none-any.whl.metadata (3.9 kB)\n", - "Requirement already satisfied: typing-extensions<5,>=4.0.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (4.12.2)\n", - "Collecting uvicorn<1 (from mlflow-skinny==3.3.1->mlflow)\n", - " Downloading uvicorn-0.35.0-py3-none-any.whl.metadata (6.5 kB)\n", - "Collecting absl-py>=1.0.0 (from tensorflow)\n", - " Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)\n", - "Collecting astunparse>=1.6.0 (from tensorflow)\n", - " Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)\n", - "Collecting flatbuffers>=24.3.25 (from tensorflow)\n", - " Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)\n", - "Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)\n", - " Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)\n", - "Collecting google_pasta>=0.1.1 (from tensorflow)\n", - " Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)\n", - "Collecting libclang>=13.0.0 (from tensorflow)\n", - " Downloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl.metadata (5.2 kB)\n", - "Collecting opt_einsum>=2.3.2 (from tensorflow)\n", - " Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)\n", - "Requirement already satisfied: setuptools in /opt/bitnami/miniconda/lib/python3.11/site-packages (from tensorflow) (69.5.1)\n", - "Requirement already satisfied: six>=1.12.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from tensorflow) (1.16.0)\n", - "Collecting termcolor>=1.1.0 (from tensorflow)\n", - " Downloading termcolor-3.1.0-py3-none-any.whl.metadata (6.4 kB)\n", - "Collecting wrapt>=1.11.0 (from tensorflow)\n", - " Downloading wrapt-1.17.3-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl.metadata (6.4 kB)\n", - "Collecting grpcio<2.0,>=1.24.3 (from tensorflow)\n", - " Downloading grpcio-1.74.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n", - "Collecting tensorboard~=2.20.0 (from tensorflow)\n", - " Downloading tensorboard-2.20.0-py3-none-any.whl.metadata (1.8 kB)\n", - "Collecting keras>=3.10.0 (from tensorflow)\n", - " Downloading keras-3.11.3-py3-none-any.whl.metadata (5.9 kB)\n", - "Collecting h5py>=3.11.0 (from tensorflow)\n", - " Downloading h5py-3.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.7 kB)\n", - "Collecting ml_dtypes<1.0.0,>=0.5.1 (from tensorflow)\n", - " Downloading ml_dtypes-0.5.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (8.9 kB)\n", - "Requirement already satisfied: filelock in ./models-cache/lib/python3.11/site-packages (from ray[client,default,serve]) (3.19.1)\n", - "Requirement already satisfied: jsonschema in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (4.22.0)\n", - "Requirement already satisfied: msgpack<2.0.0,>=1.0.0 in ./models-cache/lib/python3.11/site-packages (from ray[client,default,serve]) (1.1.1)\n", - "Requirement already satisfied: aiosignal in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (1.3.1)\n", - "Requirement already satisfied: frozenlist in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (1.4.1)\n", - "Requirement already satisfied: aiohttp>=3.7 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (3.9.5)\n", - "Collecting aiohttp-cors (from ray[client,default,serve])\n", - " Downloading aiohttp_cors-0.8.1-py3-none-any.whl.metadata (20 kB)\n", - "Collecting colorful (from ray[client,default,serve])\n", - " Downloading colorful-0.5.7-py2.py3-none-any.whl.metadata (16 kB)\n", - "Collecting opencensus (from ray[client,default,serve])\n", - " Downloading opencensus-0.11.4-py2.py3-none-any.whl.metadata (12 kB)\n", - "Requirement already satisfied: prometheus-client>=0.7.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (0.20.0)\n", - "Collecting smart-open (from ray[client,default,serve])\n", - " Downloading smart_open-7.3.0.post1-py3-none-any.whl.metadata (24 kB)\n", - "Collecting virtualenv!=20.21.1,>=20.0.24 (from ray[client,default,serve])\n", - " Downloading virtualenv-20.34.0-py3-none-any.whl.metadata (4.6 kB)\n", - "Collecting py-spy>=0.2.0 (from ray[client,default,serve])\n", - " Downloading py_spy-0.4.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl.metadata (510 bytes)\n", - "Collecting starlette (from ray[client,default,serve])\n", - " Downloading starlette-0.47.3-py3-none-any.whl.metadata (6.2 kB)\n", - "Collecting watchfiles (from ray[client,default,serve])\n", - " Downloading watchfiles-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n", - "Requirement already satisfied: attrs>=17.3.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (23.2.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (6.0.5)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (1.9.4)\n", - "Requirement already satisfied: Mako in /opt/bitnami/miniconda/lib/python3.11/site-packages (from alembic!=1.10.0,<2->mlflow) (1.3.5)\n", - "Requirement already satisfied: wheel<1.0,>=0.23.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from astunparse>=1.6.0->tensorflow) (0.43.0)\n", - "Requirement already satisfied: cffi>=1.14 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from cryptography<46,>=43.0.0->mlflow) (1.16.0)\n", - "Requirement already satisfied: urllib3>=1.26.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from docker<8,>=4.0.0->mlflow) (2.1.0)\n", - "Collecting blinker>=1.9.0 (from Flask<4->mlflow)\n", - " Downloading blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)\n", - "Collecting itsdangerous>=2.2.0 (from Flask<4->mlflow)\n", - " Downloading itsdangerous-2.2.0-py3-none-any.whl.metadata (1.9 kB)\n", - "Requirement already satisfied: jinja2>=3.1.2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from Flask<4->mlflow) (3.1.4)\n", - "Requirement already satisfied: markupsafe>=2.1.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from Flask<4->mlflow) (2.1.5)\n", - "Collecting werkzeug>=3.1.0 (from Flask<4->mlflow)\n", - " Downloading werkzeug-3.1.3-py3-none-any.whl.metadata (3.7 kB)\n", - "Collecting graphql-core<3.3,>=3.1 (from graphene<4->mlflow)\n", - " Downloading graphql_core-3.2.6-py3-none-any.whl.metadata (11 kB)\n", - "Collecting graphql-relay<3.3,>=3.1 (from graphene<4->mlflow)\n", - " Downloading graphql_relay-3.2.0-py3-none-any.whl.metadata (12 kB)\n", - "Requirement already satisfied: python-dateutil<3,>=2.7.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from graphene<4->mlflow) (2.9.0.post0)\n", - "Collecting rich (from keras>=3.10.0->tensorflow)\n", - " Downloading rich-14.1.0-py3-none-any.whl.metadata (18 kB)\n", - "Collecting namex (from keras>=3.10.0->tensorflow)\n", - " Downloading namex-0.1.0-py3-none-any.whl.metadata (322 bytes)\n", - "Collecting optree (from keras>=3.10.0->tensorflow)\n", - " Downloading optree-0.17.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (33 kB)\n", - "Collecting contourpy>=1.0.1 (from matplotlib<4->mlflow)\n", - " Downloading contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.5 kB)\n", - "Collecting cycler>=0.10 (from matplotlib<4->mlflow)\n", - " Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)\n", - "Collecting fonttools>=4.22.0 (from matplotlib<4->mlflow)\n", - " Downloading fonttools-4.59.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (108 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m108.9/108.9 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting kiwisolver>=1.3.1 (from matplotlib<4->mlflow)\n", - " Downloading kiwisolver-1.4.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (6.3 kB)\n", - "Collecting pillow>=8 (from matplotlib<4->mlflow)\n", - " Downloading pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (9.0 kB)\n", - "Collecting pyparsing>=2.3.1 (from matplotlib<4->mlflow)\n", - " Downloading pyparsing-3.2.3-py3-none-any.whl.metadata (5.0 kB)\n", - "Collecting pytz>=2020.1 (from pandas<3->mlflow)\n", - " Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)\n", - "Collecting tzdata>=2022.7 (from pandas<3->mlflow)\n", - " Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)\n", - "Requirement already satisfied: annotated-types>=0.4.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from pydantic<3,>=1.10.8->mlflow-skinny==3.3.1->mlflow) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.18.4 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from pydantic<3,>=1.10.8->mlflow-skinny==3.3.1->mlflow) (2.18.4)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests<3,>=2.17.3->mlflow-skinny==3.3.1->mlflow) (2.0.4)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests<3,>=2.17.3->mlflow-skinny==3.3.1->mlflow) (3.7)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests<3,>=2.17.3->mlflow-skinny==3.3.1->mlflow) (2024.2.2)\n", - "Collecting joblib>=1.2.0 (from scikit-learn<2->mlflow)\n", - " Downloading joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)\n", - "Collecting threadpoolctl>=3.1.0 (from scikit-learn<2->mlflow)\n", - " Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)\n", - "Requirement already satisfied: greenlet!=0.4.17 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from sqlalchemy<3,>=1.4.0->mlflow) (3.0.3)\n", - "Requirement already satisfied: anyio<5,>=3.6.2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from starlette->ray[client,default,serve]) (4.4.0)\n", - "Collecting markdown>=2.6.8 (from tensorboard~=2.20.0->tensorflow)\n", - " Downloading markdown-3.8.2-py3-none-any.whl.metadata (5.1 kB)\n", - "Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard~=2.20.0->tensorflow)\n", - " Downloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl.metadata (1.1 kB)\n", - "Collecting distlib<1,>=0.3.7 (from virtualenv!=20.21.1,>=20.0.24->ray[client,default,serve])\n", - " Downloading distlib-0.4.0-py2.py3-none-any.whl.metadata (5.2 kB)\n", - "Requirement already satisfied: platformdirs<5,>=3.9.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from virtualenv!=20.21.1,>=20.0.24->ray[client,default,serve]) (3.10.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray[client,default,serve]) (2023.12.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray[client,default,serve]) (0.35.1)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray[client,default,serve]) (0.18.1)\n", - "Collecting opencensus-context>=0.1.3 (from opencensus->ray[client,default,serve])\n", - " Downloading opencensus_context-0.1.3-py2.py3-none-any.whl.metadata (3.3 kB)\n", - "Collecting google-api-core<3.0.0,>=1.0.0 (from opencensus->ray[client,default,serve])\n", - " Downloading google_api_core-2.25.1-py3-none-any.whl.metadata (3.0 kB)\n", - "Requirement already satisfied: h11>=0.8 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve]) (0.14.0)\n", - "Collecting httptools>=0.6.3 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n", - " Downloading httptools-0.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)\n", - "Collecting python-dotenv>=0.13 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n", - " Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)\n", - "Collecting uvloop>=0.15.1 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n", - " Downloading uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n", - "Collecting websockets>=10.4 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n", - " Downloading websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)\n", - "Requirement already satisfied: sniffio>=1.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from anyio<5,>=3.6.2->starlette->ray[client,default,serve]) (1.3.1)\n", - "Requirement already satisfied: pycparser in /opt/bitnami/miniconda/lib/python3.11/site-packages (from cffi>=1.14->cryptography<46,>=43.0.0->mlflow) (2.21)\n", - "Collecting google-auth~=2.0 (from databricks-sdk<1,>=0.20.0->mlflow-skinny==3.3.1->mlflow)\n", - " Downloading google_auth-2.40.3-py2.py3-none-any.whl.metadata (6.2 kB)\n", - "Collecting gitdb<5,>=4.0.1 (from gitpython<4,>=3.1.9->mlflow-skinny==3.3.1->mlflow)\n", - " Downloading gitdb-4.0.12-py3-none-any.whl.metadata (1.2 kB)\n", - "Collecting googleapis-common-protos<2.0.0,>=1.56.2 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve])\n", - " Downloading googleapis_common_protos-1.70.0-py3-none-any.whl.metadata (9.3 kB)\n", - "Collecting proto-plus<2.0.0,>=1.22.3 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve])\n", - " Downloading proto_plus-1.26.1-py3-none-any.whl.metadata (2.2 kB)\n", - "Collecting zipp>=3.20 (from importlib_metadata!=4.7.0,<9,>=3.7.0->mlflow-skinny==3.3.1->mlflow)\n", - " Downloading zipp-3.23.0-py3-none-any.whl.metadata (3.6 kB)\n", - "Collecting opentelemetry-semantic-conventions==0.57b0 (from opentelemetry-sdk<3,>=1.9.0->mlflow-skinny==3.3.1->mlflow)\n", - " Downloading opentelemetry_semantic_conventions-0.57b0-py3-none-any.whl.metadata (2.4 kB)\n", - "Collecting markdown-it-py>=2.2.0 (from rich->keras>=3.10.0->tensorflow)\n", - " Downloading markdown_it_py-4.0.0-py3-none-any.whl.metadata (7.3 kB)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from rich->keras>=3.10.0->tensorflow) (2.18.0)\n", - "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython<4,>=3.1.9->mlflow-skinny==3.3.1->mlflow)\n", - " Downloading smmap-5.0.2-py3-none-any.whl.metadata (4.3 kB)\n", - "Collecting cachetools<7,>=5.0.0 (from mlflow-skinny==3.3.1->mlflow)\n", - " Downloading cachetools-5.5.2-py3-none-any.whl.metadata (5.4 kB)\n", - "Collecting pyasn1-modules>=0.2.1 (from google-auth~=2.0->databricks-sdk<1,>=0.20.0->mlflow-skinny==3.3.1->mlflow)\n", - " Downloading pyasn1_modules-0.4.2-py3-none-any.whl.metadata (3.5 kB)\n", - "Collecting rsa<5,>=3.1.4 (from google-auth~=2.0->databricks-sdk<1,>=0.20.0->mlflow-skinny==3.3.1->mlflow)\n", - " Downloading rsa-4.9.1-py3-none-any.whl.metadata (5.6 kB)\n", - "Collecting mdurl~=0.1 (from markdown-it-py>=2.2.0->rich->keras>=3.10.0->tensorflow)\n", - " Downloading mdurl-0.1.2-py3-none-any.whl.metadata (1.6 kB)\n", - "Collecting pyasn1<0.7.0,>=0.6.1 (from pyasn1-modules>=0.2.1->google-auth~=2.0->databricks-sdk<1,>=0.20.0->mlflow-skinny==3.3.1->mlflow)\n", - " Downloading pyasn1-0.6.1-py3-none-any.whl.metadata (8.4 kB)\n", - "Downloading numpy-2.3.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (16.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.9/16.9 MB\u001b[0m \u001b[31m119.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading mlflow-3.3.1-py3-none-any.whl (26.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m26.4/26.4 MB\u001b[0m \u001b[31m87.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading mlflow_skinny-3.3.1-py3-none-any.whl (2.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m104.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading mlflow_tracing-3.3.1-py3-none-any.whl (1.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m67.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading tensorflow-2.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (620.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m620.6/620.6 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading absl_py-2.3.1-py3-none-any.whl (135 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m135.8/135.8 kB\u001b[0m \u001b[31m16.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)\n", - "Downloading cryptography-45.0.6-cp311-abi3-manylinux_2_34_x86_64.whl (4.5 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m121.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading fastapi-0.116.1-py3-none-any.whl (95 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m95.6/95.6 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading flask-3.1.2-py3-none-any.whl (103 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m103.3/103.3 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading flatbuffers-25.2.10-py2.py3-none-any.whl (30 kB)\n", - "Downloading gast-0.6.0-py3-none-any.whl (21 kB)\n", - "Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.5/57.5 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading graphene-3.4.3-py2.py3-none-any.whl (114 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.9/114.9 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading grpcio-1.74.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.2 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m114.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hDownloading gunicorn-23.0.0-py3-none-any.whl (85 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.0/85.0 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading h5py-3.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.5 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m128.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading keras-3.11.3-py3-none-any.whl (1.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m81.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl (24.5 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.5/24.5 MB\u001b[0m \u001b[31m102.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading matplotlib-3.10.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (8.7 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.7/8.7 MB\u001b[0m \u001b[31m133.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hDownloading ml_dtypes-0.5.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (4.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m108.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading opt_einsum-3.4.0-py3-none-any.whl (71 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pandas-2.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.4/12.4 MB\u001b[0m \u001b[31m138.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n", - "\u001b[?25hDownloading py_spy-0.4.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl (2.8 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.8/2.8 MB\u001b[0m \u001b[31m116.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pyarrow-21.0.0-cp311-cp311-manylinux_2_28_x86_64.whl (42.8 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.8/42.8 MB\u001b[0m \u001b[31m44.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading scikit_learn-1.7.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (9.7 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.7/9.7 MB\u001b[0m \u001b[31m144.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hDownloading scipy-1.16.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (35.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m35.4/35.4 MB\u001b[0m \u001b[31m56.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading starlette-0.47.3-py3-none-any.whl (72 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.0/73.0 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading tensorboard-2.20.0-py3-none-any.whl (5.5 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m121.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading termcolor-3.1.0-py3-none-any.whl (7.7 kB)\n", - "Downloading virtualenv-20.34.0-py3-none-any.whl (6.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m125.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading wrapt-1.17.3-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl (82 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.4/82.4 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading aiohttp_cors-0.8.1-py3-none-any.whl (25 kB)\n", - "Downloading colorful-0.5.7-py2.py3-none-any.whl (201 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m201.5/201.5 kB\u001b[0m \u001b[31m24.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading opencensus-0.11.4-py2.py3-none-any.whl (128 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m128.2/128.2 kB\u001b[0m \u001b[31m18.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading smart_open-7.3.0.post1-py3-none-any.whl (61 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.9/61.9 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading watchfiles-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (453 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m453.1/453.1 kB\u001b[0m \u001b[31m49.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading blinker-1.9.0-py3-none-any.whl (8.5 kB)\n", - "Downloading cloudpickle-3.1.1-py3-none-any.whl (20 kB)\n", - "Downloading contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (355 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m355.2/355.2 kB\u001b[0m \u001b[31m40.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading cycler-0.12.1-py3-none-any.whl (8.3 kB)\n", - "Downloading databricks_sdk-0.64.0-py3-none-any.whl (703 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m703.4/703.4 kB\u001b[0m \u001b[31m52.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading distlib-0.4.0-py2.py3-none-any.whl (469 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m469.0/469.0 kB\u001b[0m \u001b[31m44.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading fonttools-4.59.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (5.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m118.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading gitpython-3.1.45-py3-none-any.whl (208 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m208.2/208.2 kB\u001b[0m \u001b[31m25.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading google_api_core-2.25.1-py3-none-any.whl (160 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m160.8/160.8 kB\u001b[0m \u001b[31m21.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading graphql_core-3.2.6-py3-none-any.whl (203 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m203.4/203.4 kB\u001b[0m \u001b[31m28.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading graphql_relay-3.2.0-py3-none-any.whl (16 kB)\n", - "Downloading httptools-0.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (459 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m459.8/459.8 kB\u001b[0m \u001b[31m44.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading importlib_metadata-8.7.0-py3-none-any.whl (27 kB)\n", - "Downloading itsdangerous-2.2.0-py3-none-any.whl (16 kB)\n", - "Downloading joblib-1.5.1-py3-none-any.whl (307 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m33.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading kiwisolver-1.4.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (1.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m80.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading markdown-3.8.2-py3-none-any.whl (106 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.8/106.8 kB\u001b[0m \u001b[31m14.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading opencensus_context-0.1.3-py2.py3-none-any.whl (5.1 kB)\n", - "Downloading opentelemetry_api-1.36.0-py3-none-any.whl (65 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m65.6/65.6 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading opentelemetry_sdk-1.36.0-py3-none-any.whl (119 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m120.0/120.0 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading opentelemetry_semantic_conventions-0.57b0-py3-none-any.whl (201 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m201.6/201.6 kB\u001b[0m \u001b[31m23.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.6/6.6 MB\u001b[0m \u001b[31m136.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading pyparsing-3.2.3-py3-none-any.whl (111 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.1/111.1 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading python_dotenv-1.1.1-py3-none-any.whl (20 kB)\n", - "Downloading pytz-2025.2-py2.py3-none-any.whl (509 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m509.2/509.2 kB\u001b[0m \u001b[31m52.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading sqlparse-0.5.3-py3-none-any.whl (44 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.4/44.4 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl (6.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.6/6.6 MB\u001b[0m \u001b[31m133.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading threadpoolctl-3.6.0-py3-none-any.whl (18 kB)\n", - "Downloading tzdata-2025.2-py2.py3-none-any.whl (347 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m347.8/347.8 kB\u001b[0m \u001b[31m40.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading uvicorn-0.35.0-py3-none-any.whl (66 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.4/66.4 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.0/4.0 MB\u001b[0m \u001b[31m135.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (182 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m182.3/182.3 kB\u001b[0m \u001b[31m24.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading werkzeug-3.1.3-py3-none-any.whl (224 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m224.5/224.5 kB\u001b[0m \u001b[31m29.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading namex-0.1.0-py3-none-any.whl (5.9 kB)\n", - "Downloading optree-0.17.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (402 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m402.0/402.0 kB\u001b[0m \u001b[31m43.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading rich-14.1.0-py3-none-any.whl (243 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m243.4/243.4 kB\u001b[0m \u001b[31m31.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading gitdb-4.0.12-py3-none-any.whl (62 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.8/62.8 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading google_auth-2.40.3-py2.py3-none-any.whl (216 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m216.1/216.1 kB\u001b[0m \u001b[31m25.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading cachetools-5.5.2-py3-none-any.whl (10 kB)\n", - "Downloading googleapis_common_protos-1.70.0-py3-none-any.whl (294 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.5/294.5 kB\u001b[0m \u001b[31m39.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading markdown_it_py-4.0.0-py3-none-any.whl (87 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.3/87.3 kB\u001b[0m \u001b[31m12.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading proto_plus-1.26.1-py3-none-any.whl (50 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.2/50.2 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading zipp-3.23.0-py3-none-any.whl (10 kB)\n", - "Downloading mdurl-0.1.2-py3-none-any.whl (10.0 kB)\n", - "Downloading pyasn1_modules-0.4.2-py3-none-any.whl (181 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m181.3/181.3 kB\u001b[0m \u001b[31m23.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading rsa-4.9.1-py3-none-any.whl (34 kB)\n", - "Downloading smmap-5.0.2-py3-none-any.whl (24 kB)\n", - "Downloading pyasn1-0.6.1-py3-none-any.whl (83 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.1/83.1 kB\u001b[0m \u001b[31m10.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hInstalling collected packages: pytz, py-spy, opencensus-context, namex, libclang, flatbuffers, distlib, colorful, zipp, wrapt, werkzeug, websockets, virtualenv, uvloop, uvicorn, tzdata, threadpoolctl, termcolor, tensorboard-data-server, sqlparse, smmap, python-dotenv, pyparsing, pyasn1, pyarrow, proto-plus, pillow, optree, opt_einsum, numpy, mdurl, markdown, kiwisolver, joblib, itsdangerous, httptools, gunicorn, grpcio, graphql-core, googleapis-common-protos, google_pasta, gast, fonttools, cycler, cloudpickle, cachetools, blinker, astunparse, absl-py, watchfiles, tensorboard, starlette, smart-open, scipy, rsa, pyasn1-modules, pandas, ml_dtypes, markdown-it-py, importlib_metadata, h5py, graphql-relay, gitdb, Flask, cryptography, contourpy, scikit-learn, rich, opentelemetry-api, matplotlib, graphene, google-auth, gitpython, fastapi, aiohttp-cors, opentelemetry-semantic-conventions, keras, google-api-core, databricks-sdk, tensorflow, opentelemetry-sdk, opencensus, mlflow-tracing, mlflow-skinny, mlflow\n", - "\u001b[33m WARNING: The script websockets is installed in '/tmp/models-cache/bin' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33m WARNING: The script virtualenv is installed in '/tmp/models-cache/bin' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33m WARNING: The script uvicorn is installed in '/tmp/models-cache/bin' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33m WARNING: The script sqlformat is installed in '/tmp/models-cache/bin' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33m WARNING: The script dotenv is installed in '/tmp/models-cache/bin' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33m WARNING: The scripts f2py and numpy-config are installed in '/tmp/models-cache/bin' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33m WARNING: The script markdown_py is installed in '/tmp/models-cache/bin' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33m WARNING: The script gunicorn is installed in '/tmp/models-cache/bin' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33m WARNING: The scripts fonttools, pyftmerge, pyftsubset and ttx are installed in '/tmp/models-cache/bin' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33m WARNING: The script watchfiles is installed in '/tmp/models-cache/bin' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33m WARNING: The script tensorboard is installed in '/tmp/models-cache/bin' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33m WARNING: The scripts pyrsa-decrypt, pyrsa-encrypt, pyrsa-keygen, pyrsa-priv2pub, pyrsa-sign and pyrsa-verify are installed in '/tmp/models-cache/bin' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33m WARNING: The script markdown-it is installed in '/tmp/models-cache/bin' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33m WARNING: The script flask is installed in '/tmp/models-cache/bin' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33m WARNING: The script fastapi is installed in '/tmp/models-cache/bin' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33m WARNING: The scripts import_pb_to_tensorboard, saved_model_cli, tensorboard, tf_upgrade_v2, tflite_convert and toco are installed in '/tmp/models-cache/bin' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33m WARNING: The script mlflow is installed in '/tmp/models-cache/bin' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33m WARNING: The script mlflow is installed in '/tmp/models-cache/bin' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "pyopenssl 24.1.0 requires cryptography<43,>=41.0.5, but you have cryptography 45.0.6 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed Flask-3.1.2 absl-py-2.3.1 aiohttp-cors-0.8.1 astunparse-1.6.3 blinker-1.9.0 cachetools-5.5.2 cloudpickle-3.1.1 colorful-0.5.7 contourpy-1.3.3 cryptography-45.0.6 cycler-0.12.1 databricks-sdk-0.64.0 distlib-0.4.0 fastapi-0.116.1 flatbuffers-25.2.10 fonttools-4.59.1 gast-0.6.0 gitdb-4.0.12 gitpython-3.1.45 google-api-core-2.25.1 google-auth-2.40.3 google_pasta-0.2.0 googleapis-common-protos-1.70.0 graphene-3.4.3 graphql-core-3.2.6 graphql-relay-3.2.0 grpcio-1.74.0 gunicorn-23.0.0 h5py-3.14.0 httptools-0.6.4 importlib_metadata-8.7.0 itsdangerous-2.2.0 joblib-1.5.1 keras-3.11.3 kiwisolver-1.4.9 libclang-18.1.1 markdown-3.8.2 markdown-it-py-4.0.0 matplotlib-3.10.5 mdurl-0.1.2 ml_dtypes-0.5.3 mlflow-3.3.1 mlflow-skinny-3.3.1 mlflow-tracing-3.3.1 namex-0.1.0 numpy-2.3.2 opencensus-0.11.4 opencensus-context-0.1.3 opentelemetry-api-1.36.0 opentelemetry-sdk-1.36.0 opentelemetry-semantic-conventions-0.57b0 opt_einsum-3.4.0 optree-0.17.0 pandas-2.3.2 pillow-11.3.0 proto-plus-1.26.1 py-spy-0.4.1 pyarrow-21.0.0 pyasn1-0.6.1 pyasn1-modules-0.4.2 pyparsing-3.2.3 python-dotenv-1.1.1 pytz-2025.2 rich-14.1.0 rsa-4.9.1 scikit-learn-1.7.1 scipy-1.16.1 smart-open-7.3.0.post1 smmap-5.0.2 sqlparse-0.5.3 starlette-0.47.3 tensorboard-2.20.0 tensorboard-data-server-0.7.2 tensorflow-2.20.0 termcolor-3.1.0 threadpoolctl-3.6.0 tzdata-2025.2 uvicorn-0.35.0 uvloop-0.21.0 virtualenv-20.34.0 watchfiles-1.1.0 websockets-15.0.1 werkzeug-3.1.3 wrapt-1.17.3 zipp-3.23.0\n" - ] - } - ], + "outputs": [], "source": [ "!pip install numpy mlflow tensorflow \"ray[serve,default,client]\"" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "e8deec5c-6239-4087-8a4d-27c091e9fc3c", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2025-08-27 12:00:23.577265: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n", - "2025-08-27 12:00:23.626853: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", - "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2025-08-27 12:00:25.157402: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n", - "2025/08/27 12:00:27 INFO mlflow.tracking.fluent: Experiment with name 'Diabetes_Prediction_TensorFlow' does not exist. Creating a new experiment.\n", - "2025/08/27 12:00:27 WARNING mlflow.tracking.context.registry: Encountered unexpected error during resolving tags: 'getpwuid(): uid not found: 1001'\n", - "2025/08/27 12:00:27 WARNING mlflow.utils.git_utils: Failed to import Git (the Git executable is probably not on your PATH), so Git SHA is not available. Error: Failed to initialize: Bad git executable.\n", - "The git executable must be specified in one of the following ways:\n", - " - be included in your $PATH\n", - " - be set via $GIT_PYTHON_GIT_EXECUTABLE\n", - " - explicitly set via git.refresh()\n", - "\n", - "All git commands will error until this is rectified.\n", - "\n", - "This initial message can be silenced or aggravated in the future by setting the\n", - "$GIT_PYTHON_REFRESH environment variable. Use one of the following values:\n", - " - quiet|q|silence|s|silent|none|n|0: for no message or exception\n", - " - warn|w|warning|log|l|1: for a warning message (logging level CRITICAL, displayed by default)\n", - " - error|e|exception|raise|r|2: for a raised exception\n", - "\n", - "Example:\n", - " export GIT_PYTHON_REFRESH=quiet\n", - "\n", - "/tmp/models-cache/lib/python3.11/site-packages/keras/src/layers/core/dense.py:92: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n", - " super().__init__(activity_regularizer=activity_regularizer, **kwargs)\n", - "2025-08-27 12:00:29.352582: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "🏃 View run classy-wren-479 at: http://ai-starter-kit-mlflow:5000/#/experiments/1/runs/7ca28b8521a049dc8a014d4235909db3\n", - "🧪 View experiment at: http://ai-starter-kit-mlflow:5000/#/experiments/1\n" - ] - } - ], + "outputs": [], "source": [ "import mlflow\n", "import mlflow.tensorflow\n", @@ -615,91 +102,12 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "556ae0b2-6fa6-4271-9e7d-553cd7056aab", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/bitnami/miniconda/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", - "2025-08-27 12:00:35,162\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n", - "2025-08-27 12:00:35,747\tINFO worker.py:1514 -- Using address ray://ai-starter-kit-kuberay-head-svc:10001 set in the environment variable RAY_ADDRESS\n", - "2025-08-27 12:00:35,748\tINFO client_builder.py:244 -- Passing the following kwargs to ray.init() on the server: log_to_driver\n", - "SIGTERM handler is not set because current thread is not the main thread.\n", - "2025-08-27 12:00:40,043\tWARNING utils.py:1591 -- Python patch version mismatch: The cluster was started with:\n", - " Ray: 2.41.0\n", - " Python: 3.11.11\n", - "This process on Ray Client was started with:\n", - " Ray: 2.41.0\n", - " Python: 3.11.9\n", - "\n", - "\u001b[36m(ProxyActor pid=2818, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:48,855 proxy 10.23.2.212 -- Proxy starting on node 08fb6ed7e6841998dfb9cc9f99c999cd4317663f09d65db617dbd3dc (HTTP port: 8000).\n", - "\u001b[36m(ProxyActor pid=2818)\u001b[0m INFO 2025-08-27 05:00:48,855 proxy 10.23.2.212 -- Proxy starting on node 08fb6ed7e6841998dfb9cc9f99c999cd4317663f09d65db617dbd3dc (HTTP port: 8000).\n", - "INFO 2025-08-27 12:00:49,116 serve 124 -- Started Serve in namespace \"serve\".\n", - "\u001b[36m(ProxyActor pid=2818, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:49,087 proxy 10.23.2.212 -- Got updated endpoints: {}.\n", - "\u001b[36m(ProxyActor pid=2818)\u001b[0m INFO 2025-08-27 05:00:49,087 proxy 10.23.2.212 -- Got updated endpoints: {}.\n", - "\u001b[36m(ServeController pid=2739, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:49,258 controller 2739 -- Deploying new version of Deployment(name='TensorFlowMLflowDeployment', app='default') (initial target replicas: 1).\n", - "\u001b[36m(ServeController pid=2739)\u001b[0m INFO 2025-08-27 05:00:49,258 controller 2739 -- Deploying new version of Deployment(name='TensorFlowMLflowDeployment', app='default') (initial target replicas: 1).\n", - "\u001b[36m(ProxyActor pid=2818, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:49,262 proxy 10.23.2.212 -- Got updated endpoints: {Deployment(name='TensorFlowMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n", - "\u001b[36m(ProxyActor pid=2818)\u001b[0m INFO 2025-08-27 05:00:49,262 proxy 10.23.2.212 -- Got updated endpoints: {Deployment(name='TensorFlowMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n", - "\u001b[36m(ServeController pid=2739, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:49,363 controller 2739 -- Adding 1 replica to Deployment(name='TensorFlowMLflowDeployment', app='default').\n", - "\u001b[36m(ServeController pid=2739)\u001b[0m INFO 2025-08-27 05:00:49,363 controller 2739 -- Adding 1 replica to Deployment(name='TensorFlowMLflowDeployment', app='default').\n", - "\u001b[36m(ServeController pid=2739, ip=10.23.2.212)\u001b[0m WARNING 2025-08-27 05:01:19,429 controller 2739 -- Deployment 'TensorFlowMLflowDeployment' in application 'default' has 1 replicas that have taken more than 30s to be scheduled. This may be due to waiting for the cluster to auto-scale or for a runtime environment to be installed. Resources required for each replica: {\"CPU\": 1}, total resources available: {\"CPU\": 7.0}. Use `ray status` for more details.\n", - "\u001b[36m(ServeController pid=2739)\u001b[0m WARNING 2025-08-27 05:01:19,429 controller 2739 -- Deployment 'TensorFlowMLflowDeployment' in application 'default' has 1 replicas that have taken more than 30s to be scheduled. This may be due to waiting for the cluster to auto-scale or for a runtime environment to be installed. Resources required for each replica: {\"CPU\": 1}, total resources available: {\"CPU\": 7.0}. Use `ray status` for more details.\n", - "\u001b[36m(ServeController pid=2739, ip=10.23.2.212)\u001b[0m WARNING 2025-08-27 05:01:49,531 controller 2739 -- Deployment 'TensorFlowMLflowDeployment' in application 'default' has 1 replicas that have taken more than 30s to be scheduled. This may be due to waiting for the cluster to auto-scale or for a runtime environment to be installed. Resources required for each replica: {\"CPU\": 1}, total resources available: {\"CPU\": 7.0}. Use `ray status` for more details.\n", - "\u001b[36m(ServeController pid=2739)\u001b[0m WARNING 2025-08-27 05:01:49,531 controller 2739 -- Deployment 'TensorFlowMLflowDeployment' in application 'default' has 1 replicas that have taken more than 30s to be scheduled. This may be due to waiting for the cluster to auto-scale or for a runtime environment to be installed. Resources required for each replica: {\"CPU\": 1}, total resources available: {\"CPU\": 7.0}. Use `ray status` for more details.\n", - "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m 2025-08-27 05:02:02.743930: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", - "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m 2025-08-27 05:02:02.743930: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", - "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m 2025-08-27 05:02:07.748054: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n", - "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m 2025-08-27 05:02:07.748054: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m Loading model...\n", - "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m Model loaded successfully.\n", - "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m Loading model...\n", - "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m Model loaded successfully.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO 2025-08-27 12:02:08,716 serve 124 -- Application 'default' is ready at http://127.0.0.1:8000/predict.\n", - "INFO 2025-08-27 12:02:08,718 serve 124 -- Deployed app 'default' successfully.\n", - "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m INFO 2025-08-27 05:02:08,967 default_TensorFlowMLflowDeployment 0zpbmyix 63d6b1b4-67f7-4a5c-ad92-6d7989e3cf5a -- CALL __call__ OK 150.7ms\n", - "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:02:08,967 default_TensorFlowMLflowDeployment 0zpbmyix 63d6b1b4-67f7-4a5c-ad92-6d7989e3cf5a -- CALL __call__ OK 150.7ms\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 85ms/step\n", - "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 85ms/step3.2.212)\u001b[0m \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[36m(ProxyActor pid=1775)\u001b[0m INFO 2025-08-27 05:02:09,833 proxy 10.23.2.213 -- Proxy starting on node 45179940bb5a43115519b525607191a8f1a059b70c5c61c14cee8a0f (HTTP port: 8000).\n", - "\u001b[36m(ProxyActor pid=1775, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:02:09,833 proxy 10.23.2.213 -- Proxy starting on node 45179940bb5a43115519b525607191a8f1a059b70c5c61c14cee8a0f (HTTP port: 8000).\n", - "\u001b[36m(ProxyActor pid=1775)\u001b[0m INFO 2025-08-27 05:02:09,992 proxy 10.23.2.213 -- Got updated endpoints: {Deployment(name='TensorFlowMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n", - "\u001b[36m(ProxyActor pid=1775, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:02:09,992 proxy 10.23.2.213 -- Got updated endpoints: {Deployment(name='TensorFlowMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n" - ] - } - ], + "outputs": [], "source": [ "import numpy as np\n", "import mlflow.tensorflow\n", @@ -750,23 +158,12 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "e264af73-6634-412b-9cbc-86b79c18e775", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'prediction': [179.46218872070312]}" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "json_data = {\"features\": [0.0380759, 0.0506801, 0.0616962, 0.0218724, -0.0442235, -0.0348208, -0.0434008, -0.00259226, 0.0199084, -0.0176461]}\n", "response = handle.remote(json_data)\n", @@ -790,7 +187,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.12.10" } }, "nbformat": 4, From 7d7df31fd7af92578806c20c5e79edf65dba61c3 Mon Sep 17 00:00:00 2001 From: Vlado Djerek Date: Wed, 22 Oct 2025 16:09:59 +0200 Subject: [PATCH 08/14] update readme --- .../helm-chart/ai-starter-kit/README.md | 109 ++++-------------- 1 file changed, 22 insertions(+), 87 deletions(-) diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md index 965a0636d..033a84916 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md @@ -9,7 +9,6 @@ The AI Starter Kit simplifies the deployment of AI infrastructure by providing: - **JupyterHub**: Multi-user notebook environment with pre-configured AI/ML libraries - **Model Serving**: Support for both Ollama and Ramalama model servers - **MLflow**: Experiment tracking and model management -- **GPU Support**: Configurations for GPU acceleration on GKE and macOS - **Model Caching**: Persistent storage for efficient model management - **Example Notebooks**: Pre-loaded notebooks to get you started immediately @@ -28,15 +27,6 @@ The AI Starter Kit simplifies the deployment of AI infrastructure by providing: - Minimum 4 CPU cores and 16GB RAM available - 40GB+ free disk space -#### GKE (Google Kubernetes Engine) -- Google Cloud CLI (`gcloud`) installed and configured -- Appropriate GCP permissions to create clusters - -#### macOS with GPU (Apple Silicon) -- macOS with Apple Silicon (M1/M2/M3/M4) -- minikube with krunkit driver -- 16GB+ RAM recommended - ## Installation ### Quick Start (Minikube) @@ -65,74 +55,7 @@ helm install ai-starter-kit . \ ```bash kubectl port-forward svc/ai-starter-kit-jupyterhub-proxy-public 8080:80 ``` -Navigate to http://localhost:8080 and login with any username and password `sneakypass` - -### GKE Deployment - -1. **Create a GKE Autopilot cluster:** -```bash -export REGION=us-central1 -export CLUSTER_NAME="ai-starter-cluster" -export PROJECT_ID=$(gcloud config get project) - -gcloud container clusters create-auto ${CLUSTER_NAME} \ - --project=${PROJECT_ID} \ - --region=${REGION} \ - --release-channel=rapid \ - --labels=created-by=ai-on-gke,guide=ai-starter-kit -``` - -2. **Get cluster credentials:** -```bash -gcloud container clusters get-credentials ${CLUSTER_NAME} --location=${REGION} -``` - -3. **Install the chart with GKE-specific values:** -```bash -helm install ai-starter-kit . \ - --set huggingface.token="YOUR_HF_TOKEN" \ - -f values.yaml \ - -f values-gke.yaml -``` - -### GKE with GPU (Ollama) - -For GPU-accelerated model serving with Ollama: - -```bash -helm install ai-starter-kit . \ - --set huggingface.token="YOUR_HF_TOKEN" \ - -f values-gke.yaml \ - -f values-ollama-gpu.yaml -``` - -### GKE with GPU (Ramalama) - -For GPU-accelerated model serving with Ramalama: - -```bash -helm install ai-starter-kit . \ - --set huggingface.token="YOUR_HF_TOKEN" \ - -f values-gke.yaml \ - -f values-ramalama-gpu.yaml -``` - -### macOS with Apple Silicon GPU - -1. **Start minikube with krunkit driver:** -```bash -minikube start --driver krunkit \ - --cpus 8 --memory 16000 --disk-size 40000mb \ - --mount --mount-string="/tmp/models-cache:/tmp/models-cache" -``` - -2. **Install with macOS GPU support:** -```bash -helm install ai-starter-kit . \ - --set huggingface.token="YOUR_HF_TOKEN" \ - -f values.yaml \ - -f values-macos.yaml -``` +Navigate to http://localhost:8080 and login with any username and password `password` ## Configuration @@ -152,9 +75,25 @@ helm install ai-starter-kit . \ The chart supports different storage configurations: - **Local Development**: Uses hostPath volumes with minikube mount -- **GKE**: Uses standard GKE storage classes (`standard-rwo`, `standard-rwx`) - **Custom**: Configure via `modelsCachePvc.storageClassName` +### Using GPUs + +In order to use GPUs for AI/ML workloads we need to add the necessary config to the services. Check the dependency charts documentation for the values. For example jupyterhub config would be: + + ```yaml + juypterhub: + ... + extraResource: + limits: + nvidia.com/gpu: 1 + guarantees: + nvidia.com/gpu: 1 + + nodeSelector: + cloud.google.com/gke-accelerator: nvidia-l4 + ``` + ### Model Servers #### Ollama @@ -170,13 +109,7 @@ Ramalama provides: - Support for CUDA and Metal (macOS) acceleration - Lightweight deployment option -You can run either Ollama or Ramalama, but not both simultaneously. Toggle using: -```yaml -ollama: - enabled: true/false -ramalama: - enabled: true/false -``` + ## Usage @@ -209,8 +142,10 @@ kubectl port-forward svc/ai-starter-kit-ramalama 8080:8080 ### Pre-loaded Example Notebooks The JupyterHub environment comes with pre-loaded example notebooks: +- `ray.ipynb`: Simple Ray nad MLflow example - `chat_bot.ipynb`: Simple chatbot interface using Ollama for conversational AI. -- `multi-agent-ollama.ipynb`: Multi-agent workflow demonstration using Ollama. +- `multi-agent.ipynb`:Multi-agent workflow demonstration using Ray. +- `multi-agent-ollama.ipynb`: Similar multi-agent workflow demonstration using Ollama. - `multi-agent-ramalama.ipynb`: Similar multi-agent workflow using RamaLama runtime for comparison. - `welcome.ipynb`: Introduction notebook with embedding model examples using Qwen models. From 6bf740d133a1ab6149d130bc35f5aef7b99c3a0f Mon Sep 17 00:00:00 2001 From: Vlado Djerek Date: Fri, 24 Oct 2025 18:46:56 +0200 Subject: [PATCH 09/14] add pre delete hook for singleuser env --- ai/ai-starter-kit/Makefile | 2 -- .../templates/delete-pods-job.yaml | 25 +++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/delete-pods-job.yaml diff --git a/ai/ai-starter-kit/Makefile b/ai/ai-starter-kit/Makefile index 4fc7fd583..da5b7b2f8 100644 --- a/ai/ai-starter-kit/Makefile +++ b/ai/ai-starter-kit/Makefile @@ -35,8 +35,6 @@ start_gpu: uninstall: helm uninstall ai-starter-kit - kubectl delete pod jupyter-user - kubectl delete pvc ai-starter-kit-jupyterhub-hub-db-dir destroy: minikube delete diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/delete-pods-job.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/delete-pods-job.yaml new file mode 100644 index 000000000..d25283a4b --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/delete-pods-job.yaml @@ -0,0 +1,25 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: "{{ include "ai-starter-kit.fullname" . }}-delete-jupyter-admin-pod" + labels: + {{- include "ai-starter-kit.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": pre-delete + "helm.sh/hook-delete-policy": hook-succeeded +spec: + template: + metadata: + name: "{{ include "ai-starter-kit.fullname" . }}-delete-jupyter-admin-pod" + labels: + {{- include "ai-starter-kit.labels" . | nindent 8 }} + spec: + serviceAccountName: {{ include "ai-starter-kit.fullname" . }}-jupyterhub-hub + restartPolicy: OnFailure + containers: + - name: delete-jupyter-admin-pod + image: alpine/kubectl:1.33.4 + command: ["/bin/sh", "-c"] + args: + - | + kubectl delete pod -l app.kubernetes.io/component=singleuser-server -n {{ .Release.Namespace }} From ceb424ea352d0a687c67db9b1345e07f10b0143e Mon Sep 17 00:00:00 2001 From: Vlado Djerek Date: Fri, 24 Oct 2025 18:56:38 +0200 Subject: [PATCH 10/14] readme fixes, added hook to delete singleuser pod --- ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md | 8 ++++++-- .../helm-chart/ai-starter-kit/files/welcome.ipynb | 10 +++++++++- .../helm-chart/ai-starter-kit/values.yaml | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md index 033a84916..8a24d0a96 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md @@ -43,6 +43,9 @@ minikube start --cpus 4 --memory 15000 \ ``` 3. **Install the chart:** + +Inside of the checked out git repository run: + ```bash cd ai/ai-starter-kit/helm-chart/ai-starter-kit helm dependency update @@ -69,6 +72,7 @@ Navigate to http://localhost:8080 and login with any username and password `pass | `modelsCachePvc.size` | Size of model cache storage | `10Gi` | | `jupyterhub.singleuser.defaultUrl` | Default notebook path | `/lab/tree/welcome.ipynb` | | `mlflow.enabled` | Enable MLflow tracking server | `true` | +| `ray-cluster.enabled` | Enable Ray operator and server | `false` | ### Storage Configuration @@ -120,7 +124,7 @@ Ramalama provides: # Port forward to access JupyterHub kubectl port-forward svc/ai-starter-kit-jupyterhub-proxy-public 8080:80 # Access at: http://localhost:8080 -# Default password: sneakypass +# Default password: password ``` #### MLflow @@ -144,7 +148,7 @@ kubectl port-forward svc/ai-starter-kit-ramalama 8080:8080 The JupyterHub environment comes with pre-loaded example notebooks: - `ray.ipynb`: Simple Ray nad MLflow example - `chat_bot.ipynb`: Simple chatbot interface using Ollama for conversational AI. -- `multi-agent.ipynb`:Multi-agent workflow demonstration using Ray. +- `multi-agent.ipynb`: Multi-agent workflow demonstration using Ray. - `multi-agent-ollama.ipynb`: Similar multi-agent workflow demonstration using Ollama. - `multi-agent-ramalama.ipynb`: Similar multi-agent workflow using RamaLama runtime for comparison. - `welcome.ipynb`: Introduction notebook with embedding model examples using Qwen models. diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb index 19bc4bdb6..4d5f154bf 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb @@ -1,8 +1,16 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "5af4f666", + "metadata": {}, + "source": [ + "Welcome Notebook - Notebook used to verify basic jupyterhub functionality and inference" + ] + }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "8048aa56-4549-4afa-b8b0-d111cc7020c3", "metadata": {}, "outputs": [ diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml index f708de297..6a91fcc26 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml @@ -104,7 +104,7 @@ jupyterhub: ray-cluster: enabled: false image: - tag: "2.41.0-py312-cpu-aarch64" + tag: "2.41.0-py312-cpu" head: serviceType: ClusterIP resources: From 42ca13874ee5edf64e95d1689547216a300f32a0 Mon Sep 17 00:00:00 2001 From: Aleksandar Stefanovic Date: Mon, 27 Oct 2025 21:42:36 +0100 Subject: [PATCH 11/14] Applying fixes to resolve PR comments --- .../files/multi-agent-ollama.ipynb | 109 +++++++++++--- .../files/multi-agent-ramalama.ipynb | 139 ++++++++++++++---- .../ai-starter-kit/files/welcome.ipynb | 17 ++- 3 files changed, 211 insertions(+), 54 deletions(-) diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb index a25cfbe2a..a384bc8f0 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb @@ -228,36 +228,91 @@ "with open('/tmp/ollama_wrapper.py', 'w') as f:\n", " f.write(api_wrapper_code)\n", "\n", - "!pkill -f ollama_wrapper.py 2>/dev/null || true\n", - "\n", - "env_vars = f\"\"\"\n", - "export OLLAMA_HOST=\"{os.getenv('OLLAMA_HOST', 'http://ai-starter-kit-ollama:11434')}\"\n", - "export MODEL_NAME=\"qwen2.5:1.5b\"\n", - "export MLFLOW_TRACKING_URI=\"{os.getenv('MLFLOW_TRACKING_URI', 'http://ai-starter-kit-mlflow:5000')}\"\n", - "\"\"\"\n", - "\n", - "!echo '{env_vars}' > /tmp/env_vars.sh\n", - "!bash -c 'source /tmp/env_vars.sh && nohup python /tmp/ollama_wrapper.py > /tmp/wrapper.log 2>&1 &'\n", + "print(\"Checking if wrapper script was written...\")\n", + "if not os.path.exists('/tmp/ollama_wrapper.py'):\n", + " print(\"ERROR: Failed to write wrapper script!\")\n", + " raise Exception(\"Cannot proceed without wrapper script\")\n", + "print(\" Wrapper script created\")\n", "\n", - "print(\"Starting API wrapper...\")\n", + "print(\"\\nKilling existing wrapper processes...\")\n", + "!pkill -f ollama_wrapper.py 2>/dev/null || true\n", + "time.sleep(2)\n", + "\n", + "log_file = '/tmp/wrapper.log'\n", + "print(f\"\\nPreparing log file: {log_file}\")\n", + "!touch /tmp/wrapper.log\n", + "!chmod 666 /tmp/wrapper.log\n", + "\n", + "if not os.path.exists(log_file):\n", + " print(f\"ERROR: Could not create log file at {log_file}\")\n", + " raise Exception(\"Cannot create log file\")\n", + "print(\" Log file ready\")\n", + "\n", + "env_vars = {\n", + " 'OLLAMA_HOST': os.getenv('OLLAMA_HOST', 'http://ai-starter-kit-ollama:11434'),\n", + " 'MODEL_NAME': 'qwen2.5:1.5b',\n", + " 'MLFLOW_TRACKING_URI': os.getenv('MLFLOW_TRACKING_URI', 'http://ai-starter-kit-mlflow:5000')\n", + "}\n", + "\n", + "print(\"\\nEnvironment variables:\")\n", + "for k, v in env_vars.items():\n", + " print(f\" {k}={v}\")\n", + "\n", + "print(\"\\nStarting API wrapper...\")\n", + "with open(log_file, 'w') as log:\n", + " process = subprocess.Popen(\n", + " ['python', '/tmp/ollama_wrapper.py'],\n", + " stdout=log,\n", + " stderr=subprocess.STDOUT,\n", + " env={**os.environ, **env_vars},\n", + " start_new_session=True\n", + " )\n", + " \n", + "print(f\"Process started with PID: {process.pid}\")\n", + "\n", + "time.sleep(2)\n", + "if process.poll() is not None:\n", + " print(f\"\\nERROR: Process died immediately with exit code {process.poll()}\")\n", + " print(\"\\nLog contents:\")\n", + " !cat /tmp/wrapper.log\n", + " raise Exception(\"API wrapper failed to start\")\n", + "print(\" Process is running\")\n", + "\n", + "print(\"\\nWaiting for API to respond...\")\n", + "api_ready = False\n", "for i in range(30):\n", " time.sleep(1)\n", " try:\n", " r = requests.get(\"http://localhost:8000/v1/healthz\", timeout=1)\n", " if r.status_code == 200:\n", - " print(\"API Status:\", r.json())\n", + " print(f\"\\n API is ready! Response: {r.json()}\")\n", " print(f\"\\nOpenAI-compatible API running at: http://localhost:8000/v1\")\n", " print(f\"Health: http://localhost:8000/v1/healthz\")\n", " print(f\"Chat: http://localhost:8000/v1/chat/completions\")\n", + " api_ready = True\n", " break\n", - " except:\n", + " except requests.exceptions.ConnectionError:\n", " if i % 5 == 0:\n", - " print(f\" Waiting for API to start... ({i}s)\")\n", - " continue\n", - "else:\n", - " print(\"\\nAPI wrapper failed to start. Checking logs:\")\n", - " !tail -20 /tmp/wrapper.log\n", - " print(\"\\nYou can still use direct Ollama API in the next cells.\")" + " print(f\" Waiting for API... ({i}s)\")\n", + " except Exception as e:\n", + " print(f\" Unexpected error: {e}\")\n", + "\n", + "if not api_ready:\n", + " print(\"\\nAPI wrapper failed to start within 30 seconds\")\n", + " print(\"\\nChecking if process is still alive...\")\n", + " if process.poll() is not None:\n", + " print(f\"Process died with exit code: {process.poll()}\")\n", + " else:\n", + " print(\"Process is still running but not responding\")\n", + " \n", + " print(\"\\nLast 50 lines of logs:\")\n", + " !tail -50 /tmp/wrapper.log\n", + " \n", + " print(\"\\nChecking if port 8000 is in use:\")\n", + " !netstat -tlnp 2>/dev/null | grep 8000 || echo \"No process on port 8000\"\n", + " \n", + " print(\"\\nChecking Python processes:\")\n", + " !ps aux | grep python | grep -v grep" ] }, { @@ -279,15 +334,23 @@ "source": [ "import os, time, requests, json\n", "\n", - "USE_WRAPPER = True\n", - "BASE_URL = \"http://localhost:8000/v1\" if USE_WRAPPER else os.getenv(\"OLLAMA_HOST\", \"http://ai-starter-kit-ollama:11434\")\n", + "BASE_URL = \"http://localhost:8000/v1\"\n", + "OLLAMA_DIRECT = os.getenv(\"OLLAMA_HOST\", \"http://ai-starter-kit-ollama:11434\")\n", + "\n", + "try:\n", + " r = requests.get(f\"{BASE_URL}/healthz\", timeout=2)\n", + " USE_WRAPPER = r.status_code == 200\n", + " print(\"✓ Using: OpenAI-compatible wrapper\")\n", + "except:\n", + " USE_WRAPPER = False\n", + " print(\"✓ Using: Direct Ollama API (wrapper not available)\")\n", "\n", "def health():\n", " if USE_WRAPPER:\n", " r = requests.get(f\"{BASE_URL}/healthz\", timeout=10)\n", " print(\"Health:\", r.status_code, r.json())\n", " else:\n", - " r = requests.get(f\"{BASE_URL}/api/tags\", timeout=10)\n", + " r = requests.get(f\"{OLLAMA_DIRECT}/api/tags\", timeout=10)\n", " print(\"Health:\", r.status_code, \"Models available:\", len(r.json().get('models', [])))\n", "\n", "def chat(prompt, temperature=0.4, max_tokens=220):\n", @@ -315,7 +378,7 @@ " \"num_predict\": max_tokens\n", " }\n", " }\n", - " endpoint = f\"{BASE_URL}/api/chat\"\n", + " endpoint = f\"{OLLAMA_DIRECT}/api/chat\"\n", " \n", " t0 = time.time()\n", " r = requests.post(endpoint, json=body, timeout=120)\n", diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ramalama.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ramalama.ipynb index 07aff13cc..32a718556 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ramalama.ipynb +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ramalama.ipynb @@ -59,7 +59,6 @@ "outputs": [], "source": [ "import requests, os, json\n", - "\n", "RAMALAMA_HOST = os.environ.get('RAMALAMA_HOST', 'http://ai-starter-kit-ramalama:8080')\n", "MODEL_NAME = \"qwen2.5:1.5b\"\n", "\n", @@ -73,14 +72,38 @@ "if check_ramalama():\n", " print(\"RamaLama service is running\")\n", " \n", + " print(f\"\\nChecking model {MODEL_NAME}...\")\n", " try:\n", " r = requests.get(f\"{RAMALAMA_HOST}/v1/models\")\n", " models = r.json().get('data', [])\n", - " model_exists = any(m.get('id') == MODEL_NAME for m in models) \n", - " if model_exists:\n", - " print(f\"Model {MODEL_NAME} already available\")\n", + " model_exists = any(m.get('id') == MODEL_NAME for m in models)\n", + " \n", + " if not model_exists:\n", + " print(f\"Pulling model {MODEL_NAME}...\")\n", + " \n", + " try:\n", + " test_body = {\n", + " \"model\": MODEL_NAME,\n", + " \"messages\": [{\"role\": \"user\", \"content\": \"test\"}],\n", + " \"max_tokens\": 1\n", + " }\n", + " r = requests.post(f\"{RAMALAMA_HOST}/v1/chat/completions\", json=test_body, timeout=300)\n", + " \n", + " if r.status_code == 200:\n", + " print(f\"Model {MODEL_NAME} loaded successfully\")\n", + " else:\n", + " print(f\"Failed to load model. Status: {r.status_code}\")\n", + " print(\"You may need to pull the model manually in the RamaLama deployment\")\n", + " \n", + " except requests.exceptions.Timeout:\n", + " print(\"Model pull timed out. Large models may take longer.\")\n", + " print(\"Check RamaLama logs to monitor progress\")\n", + " except Exception as e:\n", + " print(f\"Error pulling model: {e}\")\n", + " print(\"You may need to pull the model manually in the RamaLama deployment\")\n", " else:\n", - " print(f\"Model {MODEL_NAME} not found; ensure it's pulled in the deployment\")\n", + " print(f\"Model {MODEL_NAME} already available\")\n", + " \n", " except Exception as e:\n", " print(f\"Error checking model: {e}\")\n", "else:\n", @@ -107,7 +130,7 @@ "source": [ "!pip -q install fastapi uvicorn mlflow --disable-pip-version-check\n", "\n", - "import os, threading, time, json\n", + "import os, subprocess, time, json, requests\n", "from pathlib import Path\n", "\n", "api_wrapper_code = '''\n", @@ -128,7 +151,7 @@ " pass\n", "\n", "app = FastAPI()\n", - "RAMALAMA_HOST = os.getenv(\"RAMALAMA_HOST\", \"http://127.0.0.1:8080\")\n", + "RAMALAMA_HOST = os.getenv(\"RAMALAMA_HOST\", \"http://ai-starter-kit-ramalama:8080\")\n", "MODEL_NAME = os.getenv(\"MODEL_NAME\", \"qwen2.5:1.5b\")\n", "\n", "@app.get(\"/v1/healthz\")\n", @@ -206,32 +229,88 @@ "with open('/tmp/ramalama_wrapper.py', 'w') as f:\n", " f.write(api_wrapper_code)\n", "\n", - "def run_api():\n", - " subprocess.run([\"python\", \"/tmp/ramalama_wrapper.py\"], capture_output=True)\n", - "\n", - "import subprocess\n", - "api_process = subprocess.Popen(\n", - " [\"python\", \"/tmp/ramalama_wrapper.py\"],\n", - " env={**os.environ, \n", - " \"RAMALAMA_HOST\": os.getenv(\"RAMALAMA_HOST\", \"http://127.0.0.1:8080\"),\n", - " \"MODEL_NAME\": MODEL_NAME,\n", - " \"MLFLOW_TRACKING_URI\": MLFLOW_URI},\n", - " stdout=subprocess.DEVNULL,\n", - " stderr=subprocess.DEVNULL\n", - ")\n", + "print(\"Wrapper script created\")\n", + "\n", + "print(\"Killing existing wrapper processes...\")\n", + "!pkill -f ramalama_wrapper.py 2>/dev/null || true\n", + "time.sleep(2)\n", "\n", - "time.sleep(3)\n", + "log_file = '/tmp/ramalama_wrapper.log'\n", + "!touch /tmp/ramalama_wrapper.log\n", + "!chmod 666 /tmp/ramalama_wrapper.log\n", + "print(\"Log file ready\")\n", "\n", + "MODEL_NAME = \"qwen2.5:1.5b\"\n", + "MLFLOW_URI = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n", + "RAMALAMA_HOST = os.getenv(\"RAMALAMA_HOST\", \"http://ai-starter-kit-ramalama:8080\")\n", + "\n", + "env_vars = {\n", + " 'RAMALAMA_HOST': RAMALAMA_HOST,\n", + " 'MODEL_NAME': MODEL_NAME,\n", + " 'MLFLOW_TRACKING_URI': MLFLOW_URI\n", + "}\n", + "\n", + "print(\"\\nEnvironment variables:\")\n", + "for k, v in env_vars.items():\n", + " print(f\" {k}={v}\")\n", + "\n", + "print(\"\\nStarting API wrapper...\")\n", + "with open(log_file, 'w') as log:\n", + " api_process = subprocess.Popen(\n", + " [\"python\", \"/tmp/ramalama_wrapper.py\"],\n", + " stdout=log,\n", + " stderr=subprocess.STDOUT,\n", + " env={**os.environ, **env_vars},\n", + " start_new_session=True\n", + " )\n", + "\n", + "print(f\"Process started with PID: {api_process.pid}\")\n", + "\n", + "time.sleep(2)\n", + "if api_process.poll() is not None:\n", + " print(f\"\\nERROR: Process died immediately with exit code {api_process.poll()}\")\n", + " print(\"\\nLog contents:\")\n", + " !cat /tmp/ramalama_wrapper.log\n", + " raise Exception(\"API wrapper failed to start\")\n", + "print(\"Process is running\")\n", + "\n", + "print(\"\\nWaiting for API to respond...\")\n", "API_URL = \"http://localhost:8000\"\n", - "try:\n", - " r = requests.get(f\"{API_URL}/v1/healthz\", timeout=5)\n", - " print(\"API Status:\", r.json())\n", - " print(f\"\\nOpenAI-compatible API running at: {API_URL}/v1\")\n", - " print(f\"Health: {API_URL}/v1/healthz\")\n", - " print(f\"Chat: {API_URL}/v1/chat/completions\")\n", - "except Exception as e:\n", - " print(f\"Warning: API wrapper not responding: {e}\")\n", - " print(\"You may need to run the wrapper manually\")" + "api_ready = False\n", + "\n", + "for i in range(30):\n", + " time.sleep(1)\n", + " try:\n", + " r = requests.get(f\"{API_URL}/v1/healthz\", timeout=1)\n", + " if r.status_code == 200:\n", + " print(f\"\\nAPI is ready! Response: {r.json()}\")\n", + " print(f\"\\nOpenAI-compatible API running at: {API_URL}/v1\")\n", + " print(f\"Health: {API_URL}/v1/healthz\")\n", + " print(f\"Chat: {API_URL}/v1/chat/completions\")\n", + " api_ready = True\n", + " break\n", + " except requests.exceptions.ConnectionError:\n", + " if i % 5 == 0:\n", + " print(f\" Waiting for API... ({i}s)\")\n", + " except Exception as e:\n", + " if i % 10 == 0:\n", + " print(f\" Unexpected error: {e}\")\n", + "\n", + "if not api_ready:\n", + " print(\"\\nAPI wrapper failed to start within 30 seconds\")\n", + " print(\"\\nChecking if process is still alive...\")\n", + " if api_process.poll() is not None:\n", + " print(f\"Process died with exit code: {api_process.poll()}\")\n", + " else:\n", + " print(\"Process is still running but not responding\")\n", + " \n", + " print(\"\\nLast 50 lines of logs:\")\n", + " !tail -50 /tmp/ramalama_wrapper.log\n", + " \n", + " print(\"\\nChecking if port 8000 is in use:\")\n", + " !netstat -tlnp 2>/dev/null | grep 8000 || echo \"No process on port 8000\"\n", + " \n", + " print(\"\\nNote: You can re-run this cell - the API might just need more time to start\")" ] }, { diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb index 4d5f154bf..52ef576d0 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb @@ -5,7 +5,22 @@ "id": "5af4f666", "metadata": {}, "source": [ - "Welcome Notebook - Notebook used to verify basic jupyterhub functionality and inference" + "### Welcome Notebook - Notebook used to verify basic jupyterhub functionality and inference" + ] + }, + { + "cell_type": "markdown", + "id": "c85b6901", + "metadata": {}, + "source": [ + "**Purpose**: This notebook demonstrates semantic similarity search using the Qwen3-Embedding-0.6B model. It shows how to:\n", + "\n", + "1. Generate embeddings for search queries and documents.\n", + "2. Use instructed queries (queries with task descriptions) to improve retrieval quality.\n", + "3. Calculate similarity scores between queries and documents.\n", + "4. Identify which documents are most relevant to which queries.\n", + "\n", + "Use Case: Testing embedding model functionality in your JupyterHub environment. The example compares two queries (\"What is the capital of China?\" and \"Explain gravity\") against two documents to find the best matches. High scores (like 0.76) indicate strong semantic similarity." ] }, { From 0672f8365018aebd9768a2900db78a2f17d08c0b Mon Sep 17 00:00:00 2001 From: Vlado Djerek Date: Tue, 28 Oct 2025 16:31:26 +0100 Subject: [PATCH 12/14] remove mlflow enabled from doc --- ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md index 8a24d0a96..0a9ede3ce 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md @@ -71,7 +71,6 @@ Navigate to http://localhost:8080 and login with any username and password `pass | `ramalama.enabled` | Enable Ramalama model server | `true` | | `modelsCachePvc.size` | Size of model cache storage | `10Gi` | | `jupyterhub.singleuser.defaultUrl` | Default notebook path | `/lab/tree/welcome.ipynb` | -| `mlflow.enabled` | Enable MLflow tracking server | `true` | | `ray-cluster.enabled` | Enable Ray operator and server | `false` | ### Storage Configuration From ced46e9999467886b11d0cd46527c2fcf682285f Mon Sep 17 00:00:00 2001 From: Aleksandar Stefanovic Date: Tue, 4 Nov 2025 15:31:14 +0100 Subject: [PATCH 13/14] Applying fixes to resolve PR comments --- ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md | 7 +------ .../helm-chart/ai-starter-kit/files/ray.ipynb | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md index 0a9ede3ce..b1a5d9c05 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md @@ -82,7 +82,7 @@ The chart supports different storage configurations: ### Using GPUs -In order to use GPUs for AI/ML workloads we need to add the necessary config to the services. Check the dependency charts documentation for the values. For example jupyterhub config would be: +In order to use GPUs for AI/ML workloads we need to add the necessary config to the services. Check the dependency charts documentation for the values. The following example uses GKE nodeSelector as an example. For instance jupyterhub config could be: ```yaml juypterhub: @@ -177,11 +177,6 @@ kubectl delete pvc ai-starter-kit-models-cache-pvc kubectl delete pvc ai-starter-kit-jupyterhub-hub-db-dir ``` -### Delete GKE cluster -```bash -gcloud container clusters delete ${CLUSTER_NAME} --region=${REGION} -``` - ### Stop minikube ```bash minikube stop diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb index 387c320b7..885379c7e 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb @@ -21,7 +21,7 @@ }, "outputs": [], "source": [ - "!pip install numpy mlflow tensorflow \"ray[serve,default,client]\"" + "!pip install numpy mlflow tensorflow==2.20.0 \"ray[serve,default,client]\"" ] }, { From 78a03d7ec29c911627a5c1f5d469bf0a4f72e631 Mon Sep 17 00:00:00 2001 From: Aleksandar Stefanovic Date: Wed, 5 Nov 2025 10:58:00 +0100 Subject: [PATCH 14/14] Updating readme with makefile command description --- .../helm-chart/ai-starter-kit/README.md | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md index b1a5d9c05..bc961f2e3 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md @@ -60,6 +60,46 @@ kubectl port-forward svc/ai-starter-kit-jupyterhub-proxy-public 8080:80 ``` Navigate to http://localhost:8080 and login with any username and password `password` +### Makefile Commands + +The project includes a Makefile with convenient commands for managing the deployment: + +#### Development & Testing + +```bash +make start # Start minikube with mounted model cache (4 CPU, 15GB RAM) +make start_gpu # Start minikube with GPU support using krunkit driver +make lint # Run Helm chart linting to validate syntax +make dep_update # Update Helm chart dependencies +``` + +#### Installation & Management + +```bash +make install HF_TOKEN= # Install/upgrade the chart (required: HF_TOKEN env var) +make uninstall # Uninstall the ai-starter-kit release +make destroy # Delete the entire minikube cluster +``` + +#### Validation + +```bash +make validate_jupyterhub # Verify JupyterHub deployment and run connectivity test +make validate_ray # Verify Ray cluster deployment and submit test job +``` + +#### Distribution + +```bash +make package_helm # Package chart into tarball (output: out/) +make push_helm OCI_HELM_TARGET= # Push packaged chart to OCI registry +``` + +#### Required Environment Variables + +- `HF_TOKEN`: HuggingFace token (for `make install`) +- `OCI_HELM_TARGET`: OCI registry path (for `make push_helm`) + ## Configuration ### Key Configuration Options