Skip to content

Commit bcd84b1

Browse files
authored
Merge branch 'main' into feature/model_group
2 parents 8fcf6ac + 7ba99be commit bcd84b1

File tree

3 files changed

+99
-40
lines changed

3 files changed

+99
-40
lines changed

ads/aqua/modeldeployment/deployment.py

Lines changed: 75 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
get_preferred_compatible_family,
3838
get_resource_name,
3939
get_restricted_params_by_container,
40+
is_valid_ocid,
4041
load_gpu_shapes_index,
4142
validate_cmd_var,
4243
)
@@ -140,6 +141,9 @@ class AquaDeploymentApp(AquaApp):
140141
the GPU allocations for all compatible shapes.
141142
list_shapes(self, **kwargs) -> List[Dict]:
142143
Lists the valid model deployment shapes.
144+
recommend_shape(self, **kwargs) -> ShapeRecommendationReport:
145+
Generates a recommendation report or table of valid GPU deployment shapes
146+
for the provided model and configuration.
143147
144148
Note:
145149
Use `ads aqua deployment <method_name> --help` to get more details on the parameters available.
@@ -1979,56 +1983,97 @@ def validate_deployment_params(
19791983
)
19801984
return {"valid": True}
19811985

1986+
@cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=1), timer=datetime.now))
19821987
def recommend_shape(self, **kwargs) -> Union[Table, ShapeRecommendationReport]:
19831988
"""
1984-
For the CLI (set by default, generate_table = True), generates the table (in rich diff) with valid
1985-
GPU deployment shapes for the provided model and configuration.
1989+
Generates a recommendation report or table of valid GPU deployment shapes
1990+
for the provided model and configuration.
19861991
1987-
For the API (set generate_table = False), generates the JSON with valid
1988-
GPU deployment shapes for the provided model and configuration.
1992+
For CLI (default `generate_table=True`): generates a rich table.
1993+
For API (`generate_table=False`): returns a structured JSON report.
1994+
Example: ads aqua deployment recommend_shape --model-id meta-llama/Llama-3.3-70B-Instruct --generate_table false
19891995
1990-
Validates the input and determines whether recommendations are available.
1991-
1992-
Parameters
1993-
----------
1994-
**kwargs
1995-
model_ocid : str
1996-
(Required) The OCID of the model to recommend feasible compute shapes for.
1996+
Args:
1997+
model_id : str
1998+
(Required) The OCID or Hugging Face model ID to recommend compute shapes for.
19971999
generate_table : bool, optional
1998-
If True, generate and return a rich-diff table; if False, return a JSON response (default is False).
1999-
compartment_id : str, optional
2000-
The OCID of the user's compartment to use for the recommendation.
2000+
If True, generates and returns a table (default: False).
20012001
20022002
Returns
20032003
-------
2004-
Table (generate_table = True)
2005-
If `generate_table` is True, a table displaying the recommendation report with compatible deployment shapes,
2006-
or troubleshooting info if no shape is suitable.
2004+
Table
2005+
If `generate_table=True`, returns a table of shape recommendations.
20072006
2008-
ShapeRecommendationReport (generate_table = False)
2009-
If `generate_table` is False, a structured recommendation report with compatible deployment shapes,
2010-
or troubleshooting info and citing the largest shapes if no shape is suitable.
2007+
ShapeRecommendationReport
2008+
If `generate_table=False`, returns a structured recommendation report.
20112009
20122010
Raises
20132011
------
20142012
AquaValueError
2015-
If the model type is unsupported and no recommendation report can be generated.
2013+
If required parameters are missing or invalid.
20162014
"""
2017-
deployment_config = self.get_deployment_config(model_id=kwargs.get("model_id"))
2018-
kwargs["deployment_config"] = deployment_config
2015+
model_id = kwargs.pop("model_id", None)
2016+
if not model_id:
2017+
raise AquaValueError(
2018+
"The 'model_id' parameter is required to generate shape recommendations. "
2019+
"Please provide a valid OCID or Hugging Face model identifier."
2020+
)
2021+
2022+
logger.info(f"Starting shape recommendation for model_id: {model_id}")
2023+
2024+
self.telemetry.record_event_async(
2025+
category="aqua/deployment",
2026+
action="recommend_shape",
2027+
detail=get_ocid_substring(model_id, key_len=8)
2028+
if is_valid_ocid(ocid=model_id)
2029+
else model_id,
2030+
**kwargs,
2031+
)
2032+
2033+
if is_valid_ocid(ocid=model_id):
2034+
logger.debug(
2035+
f"Attempting to retrieve deployment configuration for model_id={model_id}"
2036+
)
2037+
try:
2038+
deployment_config = self.get_deployment_config(model_id=model_id)
2039+
kwargs["deployment_config"] = deployment_config
2040+
logger.debug(
2041+
f"Retrieved deployment configuration for model: {model_id}"
2042+
)
2043+
except Exception as e:
2044+
logger.warning(
2045+
f"Failed to retrieve deployment configuration for model_id={model_id}: {e}"
2046+
)
20192047

20202048
try:
2021-
request = RequestRecommend(**kwargs)
2049+
request = RequestRecommend(model_id=model_id, **kwargs)
20222050
except ValidationError as e:
20232051
custom_error = build_pydantic_error_message(e)
2024-
raise AquaValueError( # noqa: B904
2025-
f"Failed to request shape recommendation due to invalid input parameters: {custom_error}"
2052+
logger.error(
2053+
f"Validation failed for shape recommendation request: {custom_error}"
20262054
)
2055+
raise AquaValueError(
2056+
f"Invalid input parameters for shape recommendation: {custom_error}"
2057+
) from e
20272058

2028-
shape_recommend = AquaShapeRecommend()
2029-
shape_recommend_report = shape_recommend.which_shapes(request)
2030-
2031-
return shape_recommend_report
2059+
try:
2060+
shape_recommend = AquaShapeRecommend()
2061+
logger.info(
2062+
f"Running shape recommendation for model '{model_id}' "
2063+
f"with generate_table={getattr(request, 'generate_table', False)}"
2064+
)
2065+
shape_recommend_report = shape_recommend.which_shapes(request)
2066+
logger.info(f"Shape recommendation completed successfully for {model_id}")
2067+
return shape_recommend_report
2068+
except AquaValueError:
2069+
raise
2070+
except Exception as e:
2071+
logger.exception(
2072+
f"Unexpected error while generating shape recommendations: {e}"
2073+
)
2074+
raise AquaValueError(
2075+
f"An unexpected error occurred during shape recommendation: {e}"
2076+
) from e
20322077

20332078
@telemetry(entry_point="plugin=deployment&action=list_shapes", name="aqua")
20342079
@cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now))

ads/aqua/shaperecommend/recommend.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
44

55
import json
6-
import os
76
import re
87
import shutil
98
from typing import Dict, List, Optional, Tuple, Union
@@ -189,7 +188,7 @@ def _fetch_hf_config(self, model_id: str) -> Dict:
189188
"""
190189
try:
191190
config_path = hf_hub_download(repo_id=model_id, filename="config.json")
192-
with open(config_path, "r", encoding="utf-8") as f:
191+
with open(config_path, encoding="utf-8") as f:
193192
return json.load(f)
194193
except HfHubHTTPError as e:
195194
format_hf_custom_error_message(e)
@@ -281,6 +280,13 @@ def _rich_diff_table(shape_report: ShapeRecommendationReport) -> Table:
281280
if name
282281
else "Model Deployment Recommendations"
283282
)
283+
284+
header = (
285+
f"{header}\n"
286+
"Currently, only the VLLM container is supported. "
287+
"All shape and parameter recommendations will be generated for the VLLM container."
288+
)
289+
284290
logger.debug(f"Table header set to: {header!r}")
285291

286292
if shape_report.troubleshoot:

ads/aqua/shaperecommend/shape_report.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,21 +30,29 @@ class RequestRecommend(BaseModel):
3030

3131
model_id: str = Field(
3232
...,
33-
description="The OCID or Hugging Face ID of the model to recommend feasible compute shapes.",
33+
description=(
34+
"The OCID or Hugging Face ID of the model for which to recommend feasible compute shapes."
35+
),
3436
)
35-
generate_table: Optional[bool] = (
36-
Field(
37-
True,
38-
description="True - to generate the rich diff Table, False - generate the JSON response",
37+
38+
generate_table: Optional[bool] = Field(
39+
True,
40+
description=(
41+
"If True, generate a rich formatted table as the response. "
42+
"If False, return the recommendation as a JSON structure."
3943
),
4044
)
45+
4146
compartment_id: Optional[str] = Field(
42-
COMPARTMENT_OCID, description="The OCID of user's compartment"
47+
COMPARTMENT_OCID,
48+
description="The OCID of the user's compartment.",
4349
)
4450

45-
deployment_config: Optional[AquaDeploymentConfig] = Field(
51+
deployment_config: Optional["AquaDeploymentConfig"] = Field(
4652
None,
47-
description="The deployment configuration for model (only available for service models).",
53+
description=(
54+
"The deployment configuration for the model (only available for service models)."
55+
),
4856
)
4957

5058
class Config:

0 commit comments

Comments
 (0)