From a7f978ba54bd5dec7763b7eee9843ee996aef13d Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Wed, 5 Nov 2025 13:10:56 +0530 Subject: [PATCH] Enable model_list filtering for AUTO_SELECT_SERIES via MetaSelector (from spec.model_kwargs), add tests --- .../lowcode/forecast/meta_selector.py | 26 ++++++- .../lowcode/forecast/model/base_model.py | 2 +- .../lowcode/forecast/model/factory.py | 13 +++- tests/operators/forecast/test_datasets.py | 76 +++++++++++++++++++ 4 files changed, 112 insertions(+), 5 deletions(-) diff --git a/ads/opctl/operator/lowcode/forecast/meta_selector.py b/ads/opctl/operator/lowcode/forecast/meta_selector.py index 76390b279..116648f25 100644 --- a/ads/opctl/operator/lowcode/forecast/meta_selector.py +++ b/ads/opctl/operator/lowcode/forecast/meta_selector.py @@ -13,7 +13,7 @@ class MetaSelector: The rules are based on the meta-features calculated by the FFORMS approach. """ - def __init__(self): + def __init__(self, allowed_models=None): """Initialize the MetaSelector with pre-learned meta rules""" # Pre-learned rules based on meta-features self._meta_rules = { @@ -216,6 +216,22 @@ def __init__(self): }, } + # Normalize and apply allowed_models filter if provided + self._allowed_set = None + if allowed_models: + known = {"prophet", "arima", "neuralprophet", "automlx", "autots"} + if isinstance(allowed_models, (list, tuple, set)): + self._allowed_set = {str(m).lower() for m in allowed_models} + else: + self._allowed_set = {str(allowed_models).lower()} + self._allowed_set = {m for m in self._allowed_set if m in known} + if self._allowed_set: + self._meta_rules = { + name: rule + for name, rule in self._meta_rules.items() + if rule.get("model") in self._allowed_set + } + def _evaluate_condition(self, value, operator, threshold): """Evaluate a single condition based on pre-defined operators""" if pd.isna(value): @@ -288,7 +304,13 @@ def select_best_model(self, meta_features_df): series_info["matched_features"] = matched_features[best_rule] else: best_rule = "default" - best_model = "prophet" # Default to prophet if no rules match + if getattr(self, "_allowed_set", None): + if "prophet" in self._allowed_set: + best_model = "prophet" + else: + best_model = sorted(self._allowed_set)[0] + else: + best_model = "prophet" # Default to prophet if no rules match series_info["matched_features"] = [] series_info["selected_model"] = best_model diff --git a/ads/opctl/operator/lowcode/forecast/model/base_model.py b/ads/opctl/operator/lowcode/forecast/model/base_model.py index db2c73507..7d25d7df2 100644 --- a/ads/opctl/operator/lowcode/forecast/model/base_model.py +++ b/ads/opctl/operator/lowcode/forecast/model/base_model.py @@ -47,11 +47,11 @@ AUTO_SELECT, BACKTEST_REPORT_NAME, SUMMARY_METRICS_HORIZON_LIMIT, + TROUBLESHOOTING_GUIDE, ForecastOutputColumns, SpeedAccuracyMode, SupportedMetrics, SupportedModels, - TROUBLESHOOTING_GUIDE, ) from ..operator_config import ForecastOperatorConfig, ForecastOperatorSpec from .forecast_datasets import ForecastDatasets, ForecastResults diff --git a/ads/opctl/operator/lowcode/forecast/model/factory.py b/ads/opctl/operator/lowcode/forecast/model/factory.py index 262fe5bbc..123b5a283 100644 --- a/ads/opctl/operator/lowcode/forecast/model/factory.py +++ b/ads/opctl/operator/lowcode/forecast/model/factory.py @@ -76,7 +76,8 @@ def get_model( if model_type == AUTO_SELECT_SERIES: # Initialize MetaSelector for series-specific model selection - selector = MetaSelector() + allowed = operator_config.spec.model_kwargs.get("model_list", None) if hasattr(operator_config.spec, "model_kwargs") and operator_config.spec.model_kwargs else None + selector = MetaSelector(allowed_models=allowed) # Create a Transformations instance transformer = Transformations(dataset_info=datasets.historical_data.spec) @@ -89,7 +90,15 @@ def get_model( ) ) # Get the most common model as default - model_type = meta_features['selected_model'].mode().iloc[0] + selected_str = str(meta_features['selected_model'].mode().iloc[0]).lower() + str_to_enum = { + "prophet": SupportedModels.Prophet, + "arima": SupportedModels.Arima, + "neuralprophet": SupportedModels.NeuralProphet, + "automlx": SupportedModels.AutoMLX, + "autots": SupportedModels.AutoTS, + } + model_type = str_to_enum.get(selected_str, SupportedModels.Prophet) # Store the series-specific model selections in the config for later use operator_config.spec.meta_features = meta_features operator_config.spec.model_kwargs = {} diff --git a/tests/operators/forecast/test_datasets.py b/tests/operators/forecast/test_datasets.py index 8460bbea7..1128c7388 100644 --- a/tests/operators/forecast/test_datasets.py +++ b/tests/operators/forecast/test_datasets.py @@ -413,5 +413,81 @@ def run_operator( # generate_train_metrics = True +@pytest.mark.parametrize("allowed", [["prophet", "arima"], ["prophet"], ["arima"], ["automlx"], ["neuralprophet"]]) +def test_auto_select_series_model_list_filter(allowed): + # Skip neuralprophet when running with NumPy 2.x due to upstream np.NaN usage + if "neuralprophet" in allowed: + try: + import numpy as np # local import to avoid unused import in other tests + major = int(str(np.__version__).split(".")[0]) + except Exception: + major = 0 + if major >= 2: + pytest.skip("Skipping neuralprophet with NumPy >= 2.0 due to upstream incompatibility (uses np.NaN).") + + # Skip pure-arima case if pmdarima cannot be imported (e.g., binary incompatibility with current NumPy) + if [str(m).lower() for m in allowed] == ["arima"]: + try: + import pmdarima as pm # noqa: F401 + except Exception as e: + pytest.skip(f"Skipping arima due to pmdarima import error: {e}") + + dataset_name = f"{DATASET_PREFIX}dataset1.csv" + dataset_i = pd.read_csv(dataset_name) + target = "Y" + + with tempfile.TemporaryDirectory() as tmpdirname: + historical_data_path = f"{tmpdirname}/primary_data.csv" + test_data_path = f"{tmpdirname}/test_data.csv" + output_data_path = f"{tmpdirname}/results" + yaml_i = deepcopy(TEMPLATE_YAML) + + # Train/Test split + dataset_i[[DATETIME_COL, target]][:-PERIODS].to_csv( + historical_data_path, index=False + ) + dataset_i[[DATETIME_COL, target]][-PERIODS:].to_csv(test_data_path, index=False) + + # Prepare YAML + yaml_i["spec"]["historical_data"]["url"] = historical_data_path + yaml_i["spec"]["test_data"] = {"url": test_data_path} + yaml_i["spec"]["output_directory"]["url"] = output_data_path + yaml_i["spec"]["model"] = "auto-select-series" + yaml_i["spec"]["target_column"] = target + yaml_i["spec"]["datetime_column"]["name"] = DATETIME_COL + yaml_i["spec"]["horizon"] = PERIODS + yaml_i["spec"]["generate_metrics"] = True + yaml_i["spec"]["model_kwargs"] = {"model_list": allowed} + + # Run operator + run(yaml_i, backend="operator.local", debug=False) + + # Collect per-model metrics produced by auto-select-series + result_files = os.listdir(output_data_path) + train_metrics_files = [ + f for f in result_files if f.startswith("metrics_") and f.endswith(".csv") + ] + test_metrics_files = [ + f + for f in result_files + if f.startswith("test_metrics_") and f.endswith(".csv") + ] + + # Extract model names from filenames + found_models = set() + for f in train_metrics_files: + found_models.add(f[len("metrics_") : -len(".csv")]) + for f in test_metrics_files: + found_models.add(f[len("test_metrics_") : -len(".csv")]) + + assert found_models, "No per-model metrics files were generated." + # Ensure only allowed models are present + assert found_models.issubset(set(allowed)), f"Found disallowed models in outputs: {found_models - set(allowed)}" + + # Ensure disallowed models are absent + known_models = {"prophet", "arima", "neuralprophet", "automlx", "autots"} + disallowed = known_models - set(allowed) + assert found_models.isdisjoint(disallowed), f"Disallowed models present: {found_models & disallowed}" + if __name__ == "__main__": pass