[AQUA][GPT-OSS] Add Shape-Specific Env Config for GPT-OSS Models in AQUA Deployment Config Reader (#1244)

mrDzurb · web-flow · commit ca1705313e62 · 2025-08-10T09:48:43.000-07:00
diff --git a/ads/aqua/common/utils.py b/ads/aqua/common/utils.py
@@ -997,6 +997,44 @@ def get_container_params_type(container_type_name: str) -> str:
         return UNKNOWN
 
 
+def get_container_env_type(container_type_name: Optional[str]) -> str:
+    """
+    Determine the container environment type based on the container type name.
+
+    This function matches the provided container type name against the known
+    values of `InferenceContainerType`. The check is case-insensitive and
+    allows for partial matches so that changes in container naming conventions
+    (e.g., prefixes or suffixes) will still be matched correctly.
+
+    Examples:
+        >>> get_container_env_type("odsc-vllm-serving")
+        'vllm'
+        >>> get_container_env_type("ODSC-TGI-Serving")
+        'tgi'
+        >>> get_container_env_type("custom-unknown-container")
+        'UNKNOWN'
+
+    Args:
+        container_type_name (Optional[str]):
+            The deployment container type name (e.g., "odsc-vllm-serving").
+
+    Returns:
+        str:
+            - A matching `InferenceContainerType` value string (e.g., "VLLM", "TGI", "LLAMA-CPP").
+            - `"UNKNOWN"` if no match is found or the input is empty/None.
+    """
+    if not container_type_name:
+        return UNKNOWN
+
+    needle = container_type_name.strip().casefold()
+
+    for container_type in InferenceContainerType.values():
+        if container_type and container_type.casefold() in needle:
+            return container_type.upper()
+
+    return UNKNOWN
+
+
 def get_restricted_params_by_container(container_type_name: str) -> set:
     """The utility function accepts the deployment container type name and returns a set of restricted params
         for that container.
diff --git a/ads/aqua/modeldeployment/config_loader.py b/ads/aqua/modeldeployment/config_loader.py
@@ -88,6 +88,7 @@ class MultiModelConfig(Serializable):
         gpu_count (int, optional): Number of GPUs count to this model of this shape.
         parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
             configure the behavior of a particular GPU shape.
+        env (Dict[str, Dict[str, str]]): Environment variables grouped by namespace (e.g., "VLLM": {"VAR": "VAL"}).
     """
 
     gpu_count: Optional[int] = Field(
@@ -97,6 +98,10 @@ class MultiModelConfig(Serializable):
         default_factory=dict,
         description="Key-value pairs for GPU shape parameters (e.g., VLLM_PARAMS).",
     )
+    env: Optional[Dict[str, Dict[str, str]]] = Field(
+        default_factory=dict,
+        description="Environment variables grouped by namespace",
+    )
 
     class Config:
         extra = "allow"
@@ -130,6 +135,7 @@ class ConfigurationItem(Serializable):
             configure the behavior of a particular GPU shape.
         multi_model_deployment (List[MultiModelConfig], optional): A list of multi model configuration details.
         shape_info (DeploymentShapeInfo, optional): The shape information to this model for specific CPU shape.
+        env (Dict[str, Dict[str, str]]): Environment variables grouped by namespace (e.g., "VLLM": {"VAR": "VAL"}).
     """
 
     parameters: Optional[Dict[str, str]] = Field(
@@ -143,6 +149,10 @@ class ConfigurationItem(Serializable):
         default_factory=DeploymentShapeInfo,
         description="The shape information to this model for specific shape",
     )
+    env: Optional[Dict[str, Dict[str, str]]] = Field(
+        default_factory=dict,
+        description="Environment variables grouped by namespace",
+    )
 
     class Config:
         extra = "allow"
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
@@ -27,6 +27,7 @@
     build_pydantic_error_message,
     find_restricted_params,
     get_combined_params,
+    get_container_env_type,
     get_container_params_type,
     get_ocid_substring,
     get_params_list,
@@ -1043,6 +1044,7 @@ def get_deployment_config(self, model_id: str) -> AquaDeploymentConfig:
         config = self.get_config_from_metadata(
             model_id, AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION
         ).config
+
         if config:
             logger.info(
                 f"Fetched {AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION} from defined metadata for model: {model_id}."
@@ -1127,7 +1129,7 @@ def get_deployment_default_params(
         model_id: str,
         instance_shape: str,
         gpu_count: int = None,
-    ) -> List[str]:
+    ) -> Dict:
         """Gets the default params set in the deployment configs for the given model and instance shape.
 
         Parameters
@@ -1149,6 +1151,7 @@ def get_deployment_default_params(
 
         """
         default_params = []
+        default_envs = {}
         config_params = {}
         model = DataScienceModel.from_id(model_id)
         try:
@@ -1158,16 +1161,15 @@ def get_deployment_default_params(
         except ValueError:
             container_type_key = UNKNOWN
             logger.debug(
-                f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the custom metadata field for model {model_id}."
+                f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the "
+                f"custom metadata field for model {model_id}."
             )
 
         if container_type_key:
             deployment_config = self.get_deployment_config(model_id)
-
             instance_shape_config = deployment_config.configuration.get(
                 instance_shape, ConfigurationItem()
             )
-
             if instance_shape_config.multi_model_deployment and gpu_count:
                 gpu_params = instance_shape_config.multi_model_deployment
 
@@ -1176,12 +1178,18 @@ def get_deployment_default_params(
                         config_params = gpu_config.parameters.get(
                             get_container_params_type(container_type_key), UNKNOWN
                         )
+                        default_envs = instance_shape_config.env.get(
+                            get_container_env_type(container_type_key), {}
+                        )
                         break
 
             else:
                 config_params = instance_shape_config.parameters.get(
                     get_container_params_type(container_type_key), UNKNOWN
                 )
+                default_envs = instance_shape_config.env.get(
+                    get_container_env_type(container_type_key), {}
+                )
 
             if config_params:
                 params_list = get_params_list(config_params)
@@ -1194,7 +1202,7 @@ def get_deployment_default_params(
                     if params.split()[0] not in restricted_params_set:
                         default_params.append(params)
 
-        return default_params
+        return {"data": default_params, "env": default_envs}
 
     def validate_deployment_params(
         self,
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json
@@ -1,20 +1,24 @@
 {
   "configuration": {
     "BM.GPU.A100-v2.8": {
+      "env": {},
       "multi_model_deployment": [
         {
+          "env": {},
           "gpu_count": 1,
           "parameters": {
             "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
           }
         },
         {
+          "env": {},
           "gpu_count": 2,
           "parameters": {
             "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
           }
         },
         {
+          "env": {},
           "gpu_count": 8,
           "parameters": {
             "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
@@ -26,6 +30,7 @@
       }
     },
     "BM.GPU.H100.8": {
+      "env": {},
       "multi_model_deployment": [
         {
           "gpu_count": 1
@@ -44,6 +49,7 @@
     "VM.GPU.A10.2": {
       "multi_model_deployment": [
         {
+          "env": {},
           "gpu_count": 2,
           "parameters": {
             "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
@@ -52,8 +58,10 @@
       ]
     },
     "VM.GPU.A10.4": {
+      "env": {},
       "multi_model_deployment": [
         {
+          "env": {},
           "gpu_count": 2,
           "parameters": {
             "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json
@@ -1,6 +1,11 @@
 {
   "configuration": {
     "VM.GPU.A10.4": {
+      "env": {
+        "VLLM": {
+          "VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
+        }
+      },
       "parameters": {
         "TGI_PARAMS": "--max-stop-sequences 6",
         "VLLM_PARAMS": "--max-model-len 4096"
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json
@@ -1,43 +1,58 @@
 {
-    "shape": [
-        "VM.GPU.A10.1",
-        "VM.GPU.A10.2",
-        "BM.GPU.A10.4",
-        "BM.GPU.L40S-NC.4"
-    ],
-    "configuration": {
-        "VM.GPU.A10.2": {
-            "parameters": {
-                "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-            },
-            "multi_model_deployment": [
-                {
-                    "gpu_count": 1
-                }
-            ]
-        },
-        "BM.GPU.A10.4": {
-            "parameters": {
-                "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-            },
-            "multi_model_deployment": [
-                {
-                    "gpu_count": 1
-                },
-                {
-                    "gpu_count": 2
-                }
-            ]
+  "configuration": {
+    "BM.GPU.A10.4": {
+      "env": {
+        "VLLM": {
+          "VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
+        }
+      },
+      "multi_model_deployment": [
+        {
+          "gpu_count": 1
         },
-        "BM.GPU.L40S-NC.4": {
-            "parameters": {
-                "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-            },
-            "multi_model_deployment": [
-                {
-                    "gpu_count": 2
-                }
-            ]
+        {
+          "gpu_count": 2
+        }
+      ],
+      "parameters": {
+        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+      }
+    },
+    "BM.GPU.L40S-NC.4": {
+      "env": {
+        "VLLM": {
+          "VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
+        }
+      },
+      "multi_model_deployment": [
+        {
+          "gpu_count": 2
+        }
+      ],
+      "parameters": {
+        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+      }
+    },
+    "VM.GPU.A10.2": {
+      "env": {
+        "VLLM": {
+          "VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
+        }
+      },
+      "multi_model_deployment": [
+        {
+          "gpu_count": 1
         }
+      ],
+      "parameters": {
+        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+      }
     }
+  },
+  "shape": [
+    "VM.GPU.A10.1",
+    "VM.GPU.A10.2",
+    "BM.GPU.A10.4",
+    "BM.GPU.L40S-NC.4"
+  ]
 }
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py

Original file line number	Diff line number	Diff line change
`@@ -1,20 +1,24 @@`
`1`	`1`	`{`
`2`	`2`	`"configuration": {`
`3`	`3`	`"BM.GPU.A100-v2.8": {`
	`4`	`+ "env": {},`
`4`	`5`	`"multi_model_deployment": [`
`5`	`6`	`{`
	`7`	`+ "env": {},`
`6`	`8`	`"gpu_count": 1,`
`7`	`9`	`"parameters": {`
`8`	`10`	`"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"`
`9`	`11`	`}`
`10`	`12`	`},`
`11`	`13`	`{`
	`14`	`+ "env": {},`
`12`	`15`	`"gpu_count": 2,`
`13`	`16`	`"parameters": {`
`14`	`17`	`"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"`
`15`	`18`	`}`
`16`	`19`	`},`
`17`	`20`	`{`
	`21`	`+ "env": {},`
`18`	`22`	`"gpu_count": 8,`
`19`	`23`	`"parameters": {`
`20`	`24`	`"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"`
`@@ -26,6 +30,7 @@`
`26`	`30`	`}`
`27`	`31`	`},`
`28`	`32`	`"BM.GPU.H100.8": {`
	`33`	`+ "env": {},`
`29`	`34`	`"multi_model_deployment": [`
`30`	`35`	`{`
`31`	`36`	`"gpu_count": 1`
`@@ -44,6 +49,7 @@`
`44`	`49`	`"VM.GPU.A10.2": {`
`45`	`50`	`"multi_model_deployment": [`
`46`	`51`	`{`
	`52`	`+ "env": {},`
`47`	`53`	`"gpu_count": 2,`
`48`	`54`	`"parameters": {`
`49`	`55`	`"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"`
`@@ -52,8 +58,10 @@`
`52`	`58`	`]`
`53`	`59`	`},`
`54`	`60`	`"VM.GPU.A10.4": {`
	`61`	`+ "env": {},`
`55`	`62`	`"multi_model_deployment": [`
`56`	`63`	`{`
	`64`	`+ "env": {},`
`57`	`65`	`"gpu_count": 2,`
`58`	`66`	`"parameters": {`
`59`	`67`	`"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"`