Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions ads/opctl/operator/lowcode/forecast/meta_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class MetaSelector:
The rules are based on the meta-features calculated by the FFORMS approach.
"""

def __init__(self):
def __init__(self, allowed_models=None):
"""Initialize the MetaSelector with pre-learned meta rules"""
# Pre-learned rules based on meta-features
self._meta_rules = {
Expand Down Expand Up @@ -216,6 +216,22 @@ def __init__(self):
},
}

# Normalize and apply allowed_models filter if provided
self._allowed_set = None
if allowed_models:
known = {"prophet", "arima", "neuralprophet", "automlx", "autots"}
if isinstance(allowed_models, (list, tuple, set)):
self._allowed_set = {str(m).lower() for m in allowed_models}
else:
self._allowed_set = {str(allowed_models).lower()}
self._allowed_set = {m for m in self._allowed_set if m in known}
if self._allowed_set:
self._meta_rules = {
name: rule
for name, rule in self._meta_rules.items()
if rule.get("model") in self._allowed_set
}

def _evaluate_condition(self, value, operator, threshold):
"""Evaluate a single condition based on pre-defined operators"""
if pd.isna(value):
Expand Down Expand Up @@ -288,7 +304,13 @@ def select_best_model(self, meta_features_df):
series_info["matched_features"] = matched_features[best_rule]
else:
best_rule = "default"
best_model = "prophet" # Default to prophet if no rules match
if getattr(self, "_allowed_set", None):
if "prophet" in self._allowed_set:
best_model = "prophet"
else:
best_model = sorted(self._allowed_set)[0]
else:
best_model = "prophet" # Default to prophet if no rules match
series_info["matched_features"] = []

series_info["selected_model"] = best_model
Expand Down
2 changes: 1 addition & 1 deletion ads/opctl/operator/lowcode/forecast/model/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@
AUTO_SELECT,
BACKTEST_REPORT_NAME,
SUMMARY_METRICS_HORIZON_LIMIT,
TROUBLESHOOTING_GUIDE,
ForecastOutputColumns,
SpeedAccuracyMode,
SupportedMetrics,
SupportedModels,
TROUBLESHOOTING_GUIDE,
)
from ..operator_config import ForecastOperatorConfig, ForecastOperatorSpec
from .forecast_datasets import ForecastDatasets, ForecastResults
Expand Down
13 changes: 11 additions & 2 deletions ads/opctl/operator/lowcode/forecast/model/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ def get_model(

if model_type == AUTO_SELECT_SERIES:
# Initialize MetaSelector for series-specific model selection
selector = MetaSelector()
allowed = operator_config.spec.model_kwargs.get("model_list", None) if hasattr(operator_config.spec, "model_kwargs") and operator_config.spec.model_kwargs else None
selector = MetaSelector(allowed_models=allowed)
# Create a Transformations instance
transformer = Transformations(dataset_info=datasets.historical_data.spec)

Expand All @@ -89,7 +90,15 @@ def get_model(
)
)
# Get the most common model as default
model_type = meta_features['selected_model'].mode().iloc[0]
selected_str = str(meta_features['selected_model'].mode().iloc[0]).lower()
str_to_enum = {
"prophet": SupportedModels.Prophet,
"arima": SupportedModels.Arima,
"neuralprophet": SupportedModels.NeuralProphet,
"automlx": SupportedModels.AutoMLX,
"autots": SupportedModels.AutoTS,
}
model_type = str_to_enum.get(selected_str, SupportedModels.Prophet)
# Store the series-specific model selections in the config for later use
operator_config.spec.meta_features = meta_features
operator_config.spec.model_kwargs = {}
Expand Down
76 changes: 76 additions & 0 deletions tests/operators/forecast/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,5 +413,81 @@ def run_operator(
# generate_train_metrics = True


@pytest.mark.parametrize("allowed", [["prophet", "arima"], ["prophet"], ["arima"], ["automlx"], ["neuralprophet"]])
def test_auto_select_series_model_list_filter(allowed):
# Skip neuralprophet when running with NumPy 2.x due to upstream np.NaN usage
if "neuralprophet" in allowed:
try:
import numpy as np # local import to avoid unused import in other tests
major = int(str(np.__version__).split(".")[0])
except Exception:
major = 0
if major >= 2:
pytest.skip("Skipping neuralprophet with NumPy >= 2.0 due to upstream incompatibility (uses np.NaN).")

# Skip pure-arima case if pmdarima cannot be imported (e.g., binary incompatibility with current NumPy)
if [str(m).lower() for m in allowed] == ["arima"]:
try:
import pmdarima as pm # noqa: F401
except Exception as e:
pytest.skip(f"Skipping arima due to pmdarima import error: {e}")

dataset_name = f"{DATASET_PREFIX}dataset1.csv"
dataset_i = pd.read_csv(dataset_name)
target = "Y"

with tempfile.TemporaryDirectory() as tmpdirname:
historical_data_path = f"{tmpdirname}/primary_data.csv"
test_data_path = f"{tmpdirname}/test_data.csv"
output_data_path = f"{tmpdirname}/results"
yaml_i = deepcopy(TEMPLATE_YAML)

# Train/Test split
dataset_i[[DATETIME_COL, target]][:-PERIODS].to_csv(
historical_data_path, index=False
)
dataset_i[[DATETIME_COL, target]][-PERIODS:].to_csv(test_data_path, index=False)

# Prepare YAML
yaml_i["spec"]["historical_data"]["url"] = historical_data_path
yaml_i["spec"]["test_data"] = {"url": test_data_path}
yaml_i["spec"]["output_directory"]["url"] = output_data_path
yaml_i["spec"]["model"] = "auto-select-series"
yaml_i["spec"]["target_column"] = target
yaml_i["spec"]["datetime_column"]["name"] = DATETIME_COL
yaml_i["spec"]["horizon"] = PERIODS
yaml_i["spec"]["generate_metrics"] = True
yaml_i["spec"]["model_kwargs"] = {"model_list": allowed}

# Run operator
run(yaml_i, backend="operator.local", debug=False)

# Collect per-model metrics produced by auto-select-series
result_files = os.listdir(output_data_path)
train_metrics_files = [
f for f in result_files if f.startswith("metrics_") and f.endswith(".csv")
]
test_metrics_files = [
f
for f in result_files
if f.startswith("test_metrics_") and f.endswith(".csv")
]

# Extract model names from filenames
found_models = set()
for f in train_metrics_files:
found_models.add(f[len("metrics_") : -len(".csv")])
for f in test_metrics_files:
found_models.add(f[len("test_metrics_") : -len(".csv")])

assert found_models, "No per-model metrics files were generated."
# Ensure only allowed models are present
assert found_models.issubset(set(allowed)), f"Found disallowed models in outputs: {found_models - set(allowed)}"

# Ensure disallowed models are absent
known_models = {"prophet", "arima", "neuralprophet", "automlx", "autots"}
disallowed = known_models - set(allowed)
assert found_models.isdisjoint(disallowed), f"Disallowed models present: {found_models & disallowed}"

if __name__ == "__main__":
pass