|
37 | 37 | get_preferred_compatible_family, |
38 | 38 | get_resource_name, |
39 | 39 | get_restricted_params_by_container, |
| 40 | + is_valid_ocid, |
40 | 41 | load_gpu_shapes_index, |
41 | 42 | validate_cmd_var, |
42 | 43 | ) |
@@ -140,6 +141,9 @@ class AquaDeploymentApp(AquaApp): |
140 | 141 | the GPU allocations for all compatible shapes. |
141 | 142 | list_shapes(self, **kwargs) -> List[Dict]: |
142 | 143 | Lists the valid model deployment shapes. |
| 144 | + recommend_shape(self, **kwargs) -> ShapeRecommendationReport: |
| 145 | + Generates a recommendation report or table of valid GPU deployment shapes |
| 146 | + for the provided model and configuration. |
143 | 147 |
|
144 | 148 | Note: |
145 | 149 | Use `ads aqua deployment <method_name> --help` to get more details on the parameters available. |
@@ -1979,56 +1983,97 @@ def validate_deployment_params( |
1979 | 1983 | ) |
1980 | 1984 | return {"valid": True} |
1981 | 1985 |
|
| 1986 | + @cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=1), timer=datetime.now)) |
1982 | 1987 | def recommend_shape(self, **kwargs) -> Union[Table, ShapeRecommendationReport]: |
1983 | 1988 | """ |
1984 | | - For the CLI (set by default, generate_table = True), generates the table (in rich diff) with valid |
1985 | | - GPU deployment shapes for the provided model and configuration. |
| 1989 | + Generates a recommendation report or table of valid GPU deployment shapes |
| 1990 | + for the provided model and configuration. |
1986 | 1991 |
|
1987 | | - For the API (set generate_table = False), generates the JSON with valid |
1988 | | - GPU deployment shapes for the provided model and configuration. |
| 1992 | + For CLI (default `generate_table=True`): generates a rich table. |
| 1993 | + For API (`generate_table=False`): returns a structured JSON report. |
| 1994 | + Example: ads aqua deployment recommend_shape --model-id meta-llama/Llama-3.3-70B-Instruct --generate_table false |
1989 | 1995 |
|
1990 | | - Validates the input and determines whether recommendations are available. |
1991 | | -
|
1992 | | - Parameters |
1993 | | - ---------- |
1994 | | - **kwargs |
1995 | | - model_ocid : str |
1996 | | - (Required) The OCID of the model to recommend feasible compute shapes for. |
| 1996 | + Args: |
| 1997 | + model_id : str |
| 1998 | + (Required) The OCID or Hugging Face model ID to recommend compute shapes for. |
1997 | 1999 | generate_table : bool, optional |
1998 | | - If True, generate and return a rich-diff table; if False, return a JSON response (default is False). |
1999 | | - compartment_id : str, optional |
2000 | | - The OCID of the user's compartment to use for the recommendation. |
| 2000 | + If True, generates and returns a table (default: False). |
2001 | 2001 |
|
2002 | 2002 | Returns |
2003 | 2003 | ------- |
2004 | | - Table (generate_table = True) |
2005 | | - If `generate_table` is True, a table displaying the recommendation report with compatible deployment shapes, |
2006 | | - or troubleshooting info if no shape is suitable. |
| 2004 | + Table |
| 2005 | + If `generate_table=True`, returns a table of shape recommendations. |
2007 | 2006 |
|
2008 | | - ShapeRecommendationReport (generate_table = False) |
2009 | | - If `generate_table` is False, a structured recommendation report with compatible deployment shapes, |
2010 | | - or troubleshooting info and citing the largest shapes if no shape is suitable. |
| 2007 | + ShapeRecommendationReport |
| 2008 | + If `generate_table=False`, returns a structured recommendation report. |
2011 | 2009 |
|
2012 | 2010 | Raises |
2013 | 2011 | ------ |
2014 | 2012 | AquaValueError |
2015 | | - If the model type is unsupported and no recommendation report can be generated. |
| 2013 | + If required parameters are missing or invalid. |
2016 | 2014 | """ |
2017 | | - deployment_config = self.get_deployment_config(model_id=kwargs.get("model_id")) |
2018 | | - kwargs["deployment_config"] = deployment_config |
| 2015 | + model_id = kwargs.pop("model_id", None) |
| 2016 | + if not model_id: |
| 2017 | + raise AquaValueError( |
| 2018 | + "The 'model_id' parameter is required to generate shape recommendations. " |
| 2019 | + "Please provide a valid OCID or Hugging Face model identifier." |
| 2020 | + ) |
| 2021 | + |
| 2022 | + logger.info(f"Starting shape recommendation for model_id: {model_id}") |
| 2023 | + |
| 2024 | + self.telemetry.record_event_async( |
| 2025 | + category="aqua/deployment", |
| 2026 | + action="recommend_shape", |
| 2027 | + detail=get_ocid_substring(model_id, key_len=8) |
| 2028 | + if is_valid_ocid(ocid=model_id) |
| 2029 | + else model_id, |
| 2030 | + **kwargs, |
| 2031 | + ) |
| 2032 | + |
| 2033 | + if is_valid_ocid(ocid=model_id): |
| 2034 | + logger.debug( |
| 2035 | + f"Attempting to retrieve deployment configuration for model_id={model_id}" |
| 2036 | + ) |
| 2037 | + try: |
| 2038 | + deployment_config = self.get_deployment_config(model_id=model_id) |
| 2039 | + kwargs["deployment_config"] = deployment_config |
| 2040 | + logger.debug( |
| 2041 | + f"Retrieved deployment configuration for model: {model_id}" |
| 2042 | + ) |
| 2043 | + except Exception as e: |
| 2044 | + logger.warning( |
| 2045 | + f"Failed to retrieve deployment configuration for model_id={model_id}: {e}" |
| 2046 | + ) |
2019 | 2047 |
|
2020 | 2048 | try: |
2021 | | - request = RequestRecommend(**kwargs) |
| 2049 | + request = RequestRecommend(model_id=model_id, **kwargs) |
2022 | 2050 | except ValidationError as e: |
2023 | 2051 | custom_error = build_pydantic_error_message(e) |
2024 | | - raise AquaValueError( # noqa: B904 |
2025 | | - f"Failed to request shape recommendation due to invalid input parameters: {custom_error}" |
| 2052 | + logger.error( |
| 2053 | + f"Validation failed for shape recommendation request: {custom_error}" |
2026 | 2054 | ) |
| 2055 | + raise AquaValueError( |
| 2056 | + f"Invalid input parameters for shape recommendation: {custom_error}" |
| 2057 | + ) from e |
2027 | 2058 |
|
2028 | | - shape_recommend = AquaShapeRecommend() |
2029 | | - shape_recommend_report = shape_recommend.which_shapes(request) |
2030 | | - |
2031 | | - return shape_recommend_report |
| 2059 | + try: |
| 2060 | + shape_recommend = AquaShapeRecommend() |
| 2061 | + logger.info( |
| 2062 | + f"Running shape recommendation for model '{model_id}' " |
| 2063 | + f"with generate_table={getattr(request, 'generate_table', False)}" |
| 2064 | + ) |
| 2065 | + shape_recommend_report = shape_recommend.which_shapes(request) |
| 2066 | + logger.info(f"Shape recommendation completed successfully for {model_id}") |
| 2067 | + return shape_recommend_report |
| 2068 | + except AquaValueError: |
| 2069 | + raise |
| 2070 | + except Exception as e: |
| 2071 | + logger.exception( |
| 2072 | + f"Unexpected error while generating shape recommendations: {e}" |
| 2073 | + ) |
| 2074 | + raise AquaValueError( |
| 2075 | + f"An unexpected error occurred during shape recommendation: {e}" |
| 2076 | + ) from e |
2032 | 2077 |
|
2033 | 2078 | @telemetry(entry_point="plugin=deployment&action=list_shapes", name="aqua") |
2034 | 2079 | @cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now)) |
|
0 commit comments