Skip to content

Commit ca17053

Browse files
authored
[AQUA][GPT-OSS] Add Shape-Specific Env Config for GPT-OSS Models in AQUA Deployment Config Reader (#1244)
1 parent 68aa2e7 commit ca17053

File tree

7 files changed

+154
-60
lines changed

7 files changed

+154
-60
lines changed

ads/aqua/common/utils.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,44 @@ def get_container_params_type(container_type_name: str) -> str:
997997
return UNKNOWN
998998

999999

1000+
def get_container_env_type(container_type_name: Optional[str]) -> str:
1001+
"""
1002+
Determine the container environment type based on the container type name.
1003+
1004+
This function matches the provided container type name against the known
1005+
values of `InferenceContainerType`. The check is case-insensitive and
1006+
allows for partial matches so that changes in container naming conventions
1007+
(e.g., prefixes or suffixes) will still be matched correctly.
1008+
1009+
Examples:
1010+
>>> get_container_env_type("odsc-vllm-serving")
1011+
'vllm'
1012+
>>> get_container_env_type("ODSC-TGI-Serving")
1013+
'tgi'
1014+
>>> get_container_env_type("custom-unknown-container")
1015+
'UNKNOWN'
1016+
1017+
Args:
1018+
container_type_name (Optional[str]):
1019+
The deployment container type name (e.g., "odsc-vllm-serving").
1020+
1021+
Returns:
1022+
str:
1023+
- A matching `InferenceContainerType` value string (e.g., "VLLM", "TGI", "LLAMA-CPP").
1024+
- `"UNKNOWN"` if no match is found or the input is empty/None.
1025+
"""
1026+
if not container_type_name:
1027+
return UNKNOWN
1028+
1029+
needle = container_type_name.strip().casefold()
1030+
1031+
for container_type in InferenceContainerType.values():
1032+
if container_type and container_type.casefold() in needle:
1033+
return container_type.upper()
1034+
1035+
return UNKNOWN
1036+
1037+
10001038
def get_restricted_params_by_container(container_type_name: str) -> set:
10011039
"""The utility function accepts the deployment container type name and returns a set of restricted params
10021040
for that container.

ads/aqua/modeldeployment/config_loader.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ class MultiModelConfig(Serializable):
8888
gpu_count (int, optional): Number of GPUs count to this model of this shape.
8989
parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
9090
configure the behavior of a particular GPU shape.
91+
env (Dict[str, Dict[str, str]]): Environment variables grouped by namespace (e.g., "VLLM": {"VAR": "VAL"}).
9192
"""
9293

9394
gpu_count: Optional[int] = Field(
@@ -97,6 +98,10 @@ class MultiModelConfig(Serializable):
9798
default_factory=dict,
9899
description="Key-value pairs for GPU shape parameters (e.g., VLLM_PARAMS).",
99100
)
101+
env: Optional[Dict[str, Dict[str, str]]] = Field(
102+
default_factory=dict,
103+
description="Environment variables grouped by namespace",
104+
)
100105

101106
class Config:
102107
extra = "allow"
@@ -130,6 +135,7 @@ class ConfigurationItem(Serializable):
130135
configure the behavior of a particular GPU shape.
131136
multi_model_deployment (List[MultiModelConfig], optional): A list of multi model configuration details.
132137
shape_info (DeploymentShapeInfo, optional): The shape information to this model for specific CPU shape.
138+
env (Dict[str, Dict[str, str]]): Environment variables grouped by namespace (e.g., "VLLM": {"VAR": "VAL"}).
133139
"""
134140

135141
parameters: Optional[Dict[str, str]] = Field(
@@ -143,6 +149,10 @@ class ConfigurationItem(Serializable):
143149
default_factory=DeploymentShapeInfo,
144150
description="The shape information to this model for specific shape",
145151
)
152+
env: Optional[Dict[str, Dict[str, str]]] = Field(
153+
default_factory=dict,
154+
description="Environment variables grouped by namespace",
155+
)
146156

147157
class Config:
148158
extra = "allow"

ads/aqua/modeldeployment/deployment.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
build_pydantic_error_message,
2828
find_restricted_params,
2929
get_combined_params,
30+
get_container_env_type,
3031
get_container_params_type,
3132
get_ocid_substring,
3233
get_params_list,
@@ -1043,6 +1044,7 @@ def get_deployment_config(self, model_id: str) -> AquaDeploymentConfig:
10431044
config = self.get_config_from_metadata(
10441045
model_id, AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION
10451046
).config
1047+
10461048
if config:
10471049
logger.info(
10481050
f"Fetched {AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION} from defined metadata for model: {model_id}."
@@ -1127,7 +1129,7 @@ def get_deployment_default_params(
11271129
model_id: str,
11281130
instance_shape: str,
11291131
gpu_count: int = None,
1130-
) -> List[str]:
1132+
) -> Dict:
11311133
"""Gets the default params set in the deployment configs for the given model and instance shape.
11321134
11331135
Parameters
@@ -1149,6 +1151,7 @@ def get_deployment_default_params(
11491151
11501152
"""
11511153
default_params = []
1154+
default_envs = {}
11521155
config_params = {}
11531156
model = DataScienceModel.from_id(model_id)
11541157
try:
@@ -1158,16 +1161,15 @@ def get_deployment_default_params(
11581161
except ValueError:
11591162
container_type_key = UNKNOWN
11601163
logger.debug(
1161-
f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the custom metadata field for model {model_id}."
1164+
f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the "
1165+
f"custom metadata field for model {model_id}."
11621166
)
11631167

11641168
if container_type_key:
11651169
deployment_config = self.get_deployment_config(model_id)
1166-
11671170
instance_shape_config = deployment_config.configuration.get(
11681171
instance_shape, ConfigurationItem()
11691172
)
1170-
11711173
if instance_shape_config.multi_model_deployment and gpu_count:
11721174
gpu_params = instance_shape_config.multi_model_deployment
11731175

@@ -1176,12 +1178,18 @@ def get_deployment_default_params(
11761178
config_params = gpu_config.parameters.get(
11771179
get_container_params_type(container_type_key), UNKNOWN
11781180
)
1181+
default_envs = instance_shape_config.env.get(
1182+
get_container_env_type(container_type_key), {}
1183+
)
11791184
break
11801185

11811186
else:
11821187
config_params = instance_shape_config.parameters.get(
11831188
get_container_params_type(container_type_key), UNKNOWN
11841189
)
1190+
default_envs = instance_shape_config.env.get(
1191+
get_container_env_type(container_type_key), {}
1192+
)
11851193

11861194
if config_params:
11871195
params_list = get_params_list(config_params)
@@ -1194,7 +1202,7 @@ def get_deployment_default_params(
11941202
if params.split()[0] not in restricted_params_set:
11951203
default_params.append(params)
11961204

1197-
return default_params
1205+
return {"data": default_params, "env": default_envs}
11981206

11991207
def validate_deployment_params(
12001208
self,

tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
11
{
22
"configuration": {
33
"BM.GPU.A100-v2.8": {
4+
"env": {},
45
"multi_model_deployment": [
56
{
7+
"env": {},
68
"gpu_count": 1,
79
"parameters": {
810
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
911
}
1012
},
1113
{
14+
"env": {},
1215
"gpu_count": 2,
1316
"parameters": {
1417
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
1518
}
1619
},
1720
{
21+
"env": {},
1822
"gpu_count": 8,
1923
"parameters": {
2024
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
@@ -26,6 +30,7 @@
2630
}
2731
},
2832
"BM.GPU.H100.8": {
33+
"env": {},
2934
"multi_model_deployment": [
3035
{
3136
"gpu_count": 1
@@ -44,6 +49,7 @@
4449
"VM.GPU.A10.2": {
4550
"multi_model_deployment": [
4651
{
52+
"env": {},
4753
"gpu_count": 2,
4854
"parameters": {
4955
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
@@ -52,8 +58,10 @@
5258
]
5359
},
5460
"VM.GPU.A10.4": {
61+
"env": {},
5562
"multi_model_deployment": [
5663
{
64+
"env": {},
5765
"gpu_count": 2,
5866
"parameters": {
5967
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"

tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
{
22
"configuration": {
33
"VM.GPU.A10.4": {
4+
"env": {
5+
"VLLM": {
6+
"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
7+
}
8+
},
49
"parameters": {
510
"TGI_PARAMS": "--max-stop-sequences 6",
611
"VLLM_PARAMS": "--max-model-len 4096"
Lines changed: 53 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,58 @@
11
{
2-
"shape": [
3-
"VM.GPU.A10.1",
4-
"VM.GPU.A10.2",
5-
"BM.GPU.A10.4",
6-
"BM.GPU.L40S-NC.4"
7-
],
8-
"configuration": {
9-
"VM.GPU.A10.2": {
10-
"parameters": {
11-
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
12-
},
13-
"multi_model_deployment": [
14-
{
15-
"gpu_count": 1
16-
}
17-
]
18-
},
19-
"BM.GPU.A10.4": {
20-
"parameters": {
21-
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
22-
},
23-
"multi_model_deployment": [
24-
{
25-
"gpu_count": 1
26-
},
27-
{
28-
"gpu_count": 2
29-
}
30-
]
2+
"configuration": {
3+
"BM.GPU.A10.4": {
4+
"env": {
5+
"VLLM": {
6+
"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
7+
}
8+
},
9+
"multi_model_deployment": [
10+
{
11+
"gpu_count": 1
3112
},
32-
"BM.GPU.L40S-NC.4": {
33-
"parameters": {
34-
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
35-
},
36-
"multi_model_deployment": [
37-
{
38-
"gpu_count": 2
39-
}
40-
]
13+
{
14+
"gpu_count": 2
15+
}
16+
],
17+
"parameters": {
18+
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
19+
}
20+
},
21+
"BM.GPU.L40S-NC.4": {
22+
"env": {
23+
"VLLM": {
24+
"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
25+
}
26+
},
27+
"multi_model_deployment": [
28+
{
29+
"gpu_count": 2
30+
}
31+
],
32+
"parameters": {
33+
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
34+
}
35+
},
36+
"VM.GPU.A10.2": {
37+
"env": {
38+
"VLLM": {
39+
"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
40+
}
41+
},
42+
"multi_model_deployment": [
43+
{
44+
"gpu_count": 1
4145
}
46+
],
47+
"parameters": {
48+
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
49+
}
4250
}
51+
},
52+
"shape": [
53+
"VM.GPU.A10.1",
54+
"VM.GPU.A10.2",
55+
"BM.GPU.A10.4",
56+
"BM.GPU.L40S-NC.4"
57+
]
4358
}

0 commit comments

Comments
 (0)