|
4 | 4 | # Copyright (c) 2020, 2022 Oracle and/or its affiliates. |
5 | 5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ |
6 | 6 |
|
7 | | -from __future__ import print_function, absolute_import, division |
| 7 | +from __future__ import absolute_import, division, print_function |
8 | 8 |
|
| 9 | +import numpy as np |
| 10 | +import pandas as pd |
| 11 | +import scipy |
9 | 12 | import copy |
10 | | - |
| 13 | +from ads.common import logger, utils |
11 | 14 | from ads.common.model import ADSModel |
12 | | -from ads.common import logger |
13 | 15 | from ads.dataset import helper |
14 | | -from ads.dataset.dataset_with_target import ADSDatasetWithTarget |
15 | 16 | from ads.dataset.classification_dataset import ( |
16 | 17 | BinaryClassificationDataset, |
| 18 | + BinaryTextClassificationDataset, |
17 | 19 | MultiClassClassificationDataset, |
18 | 20 | MultiClassTextClassificationDataset, |
19 | | - BinaryTextClassificationDataset, |
20 | 21 | ) |
21 | | -from ads.dataset.regression_dataset import RegressionDataset |
22 | | - |
| 22 | +from ads.dataset.dataset_with_target import ADSDatasetWithTarget |
23 | 23 | from ads.dataset.pipeline import TransformerPipeline |
| 24 | +from ads.dataset.regression_dataset import RegressionDataset |
24 | 25 | from ads.type_discovery.type_discovery_driver import TypeDiscoveryDriver |
25 | 26 | from ads.type_discovery.typed_feature import ( |
26 | 27 | ContinuousTypedFeature, |
27 | 28 | DiscreteTypedFeature, |
28 | 29 | ) |
29 | | -from ads.common import utils |
30 | | - |
31 | 30 |
|
32 | 31 | dataset_task_map = { |
33 | 32 | BinaryClassificationDataset: utils.ml_task_types.BINARY_CLASSIFICATION, |
@@ -114,7 +113,7 @@ def __init__( |
114 | 113 | >>> olabs_automl = OracleAutoMLProvider() |
115 | 114 | >>> model, baseline = AutoML(train, provider=olabs_automl).train() |
116 | 115 | """ |
117 | | - from ads.automl.provider import OracleAutoMLProvider, BaselineAutoMLProvider |
| 116 | + from ads.automl.provider import BaselineAutoMLProvider, OracleAutoMLProvider |
118 | 117 |
|
119 | 118 | if hasattr(training_data, "transformer_pipeline"): |
120 | 119 | self.transformer_pipeline = training_data.transformer_pipeline |
@@ -154,7 +153,23 @@ def __init__( |
154 | 153 | or utils._is_dask_series(training_data.y) |
155 | 154 | else training_data.y |
156 | 155 | ) |
157 | | - self.target_name = y.name |
| 156 | + |
| 157 | + if isinstance(y, pd.DataFrame): |
| 158 | + if len(y.columns) != 1: |
| 159 | + raise ValueError("Data must be 1-dimensional.") |
| 160 | + y = y.squeeze() |
| 161 | + elif isinstance(y, np.ndarray): |
| 162 | + y = pd.Series(y) |
| 163 | + if y.name: |
| 164 | + self.target_name = str(y.name) |
| 165 | + else: |
| 166 | + y.name = str(0) |
| 167 | + self.target_name = str(0) |
| 168 | + |
| 169 | + if isinstance(X, np.ndarray): |
| 170 | + X = pd.DataFrame(X) |
| 171 | + elif isinstance(X, scipy.sparse.csr.csr_matrix): |
| 172 | + X = pd.DataFrame(X.todense()) |
158 | 173 | self.feature_names = X.columns.values |
159 | 174 | self.client = client |
160 | 175 | class_names = y.unique() |
@@ -256,8 +271,8 @@ def train(self, **kwargs): |
256 | 271 | avail_n_cores = utils.get_cpu_count() |
257 | 272 |
|
258 | 273 | warn_params = [ |
259 | | - (10 ** 5, 4, "VM.Standard.E2.4"), |
260 | | - (10 ** 6, 16, "VM.Standard.2.16"), |
| 274 | + (10**5, 4, "VM.Standard.E2.4"), |
| 275 | + (10**6, 16, "VM.Standard.2.16"), |
261 | 276 | ] |
262 | 277 |
|
263 | 278 | # train using automl and baseline |
|
0 commit comments