3131 DatasetDefaults ,
3232 deprecate_default_value ,
3333 deprecate_variable ,
34+ get_dataset ,
35+ infer_target_type ,
3436)
3537from ads .dataset .label_encoder import DataFrameLabelEncoder
3638from ads .dataset .pipeline import TransformerPipeline
@@ -223,7 +225,8 @@ def _head(self, n=5):
223225
224226 Examples
225227 --------
226- >>> ds = DatasetFactory.open("classfication_data.csv")
228+ >>> import pandas as pd
229+ >>> ds = ADSDataset.from_dataframe(pd.read_csv("classfication_data.csv"))
227230 >>> ds.head()
228231 * displays the first 5 rows of the dataset, just as the traditional head() function would *
229232 """
@@ -298,7 +301,8 @@ def call(self, func, *args, sample_size=None, **kwargs):
298301
299302 Examples
300303 --------
301- >>> ds = DatasetFactory.open("classfication_data.csv")
304+ >>> import pandas as pd
305+ >>> ds = ADSDataset.from_dataframe(pd.read_csv("classfication_data.csv"))
302306 >>> def f1(df):
303307 ... return(sum(df), axis=0)
304308 >>> sum_ds = ds.call(f1)
@@ -340,20 +344,19 @@ def set_target(self, target, type_discovery=True, target_type=None):
340344
341345 Examples
342346 --------
343- >>> ds = DatasetFactory.open("classfication_data.csv")
347+ >>> import pandas as pd
348+ >>> ds = ADSDataset.from_dataframe(pd.read_csv("classfication_data.csv"))
344349 >>> ds_with_target= ds.set_target("target_class")
345350 """
346- from ads .dataset .factory import DatasetFactory
347-
348351 if target_type :
349352 target_series = self .sampled_df [target ].astype (target_type )
350353 else :
351354 target_series = self .sampled_df [target ]
352- return DatasetFactory . _get_dataset (
355+ return get_dataset (
353356 self .df ,
354357 self .sampled_df ,
355358 target ,
356- DatasetFactory . infer_target_type (target , target_series , type_discovery ),
359+ infer_target_type (target , target_series , type_discovery ),
357360 self .shape ,
358361 ** self .init_kwargs ,
359362 )
@@ -396,7 +399,8 @@ def to_pandas(self, filter=None, frac=None, include_transformer_pipeline=False):
396399
397400 Examples
398401 --------
399- >>> ds = DatasetFactory.open("data.csv")
402+ >>> import pandas as pd
403+ >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
400404 >>> ds_as_df = ds.to_pandas()
401405
402406 Notes
@@ -462,7 +466,8 @@ def to_dask(
462466
463467 Examples
464468 --------
465- >>> ds = DatasetFactory.open("data.csv")
469+ >>> import pandas as pd
470+ >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
466471 >>> ds_dask = ds.to_dask()
467472
468473 Notes
@@ -521,7 +526,8 @@ def to_h2o(self, filter=None, frac=None, include_transformer_pipeline=False):
521526
522527 Examples
523528 --------
524- >>> ds = DatasetFactory.open("data.csv")
529+ >>> import pandas as pd
530+ >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
525531 >>> ds_as_h2o = ds.to_h2o()
526532
527533 Notes
@@ -578,7 +584,8 @@ def to_xgb(self, filter=None, frac=None, include_transformer_pipeline=False):
578584
579585 Examples
580586 --------
581- >>> ds = DatasetFactory.open("data.csv")
587+ >>> import pandas as pd
588+ >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
582589 >>> xgb_dmat = ds.to_xgb()
583590
584591 Notes
@@ -617,7 +624,8 @@ def sample(self, frac=None, random_state=utils.random_state):
617624
618625 Examples
619626 --------
620- >>> ds = DatasetFactory.open("data.csv")
627+ >>> import pandas as pd
628+ >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
621629 >>> ds_sample = ds.sample()
622630 """
623631 df = self .df .sample (frac = frac , random_state = random_state )
@@ -644,7 +652,8 @@ def drop_columns(self, columns):
644652
645653 Examples
646654 --------
647- >>> ds = DatasetFactory.open("data.csv")
655+ >>> import pandas as pd
656+ >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
648657 >>> ds_smaller = ds.drop_columns(['col1', 'col2'])
649658 """
650659 self ._validate_feature (columns )
@@ -671,7 +680,8 @@ def assign_column(self, column, arg):
671680
672681 Examples
673682 --------
674- >>> ds = DatasetFactory.open("data.csv")
683+ >>> import pandas as pd
684+ >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
675685 >>> ds_same_size = ds.assign_column('target',lambda x: x>15 if x not None)
676686 >>> ds_bigger = ds.assign_column('new_col', np.arange(ds.shape[0]))
677687 """
@@ -746,7 +756,8 @@ def rename_columns(self, columns):
746756
747757 Examples
748758 --------
749- >>> ds = DatasetFactory.open("data.csv")
759+ >>> import pandas as pd
760+ >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
750761 >>> ds_renamed = ds.rename_columns({'col1': 'target'})
751762 """
752763 if isinstance (columns , list ):
@@ -770,7 +781,8 @@ def set_name(self, name):
770781
771782 Examples
772783 --------
773- >>> ds = DatasetFactory.open("data1.csv")
784+ >>> import pandas as pd
785+ >>> ds = ADSDataset.from_dataframe(pd.read_csv("data1.csv"))
774786 >>> ds_renamed = ds.set_name("dataset1")
775787 """
776788 self .name = name
@@ -788,7 +800,8 @@ def set_description(self, description):
788800
789801 Examples
790802 --------
791- >>> ds = DatasetFactory.open("data1.csv")
803+ >>> import pandas as pd
804+ >>> ds = ADSDataset.from_dataframe(pd.read_csv("data1.csv"))
792805 >>> ds_renamed = ds.set_description("dataset1 is from "data1.csv"")
793806 """
794807 self .description = description
@@ -821,7 +834,8 @@ def snapshot(self, snapshot_dir=None, name="", storage_options=None):
821834
822835 Examples
823836 --------
824- >>> ds = DatasetFactory.open("data.csv")
837+ >>> import pandas as pd
838+ >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
825839 >>> ds_uri = ds.snapshot()
826840 """
827841 if snapshot_dir is None :
@@ -873,7 +887,8 @@ def to_csv(self, path, storage_options=None, **kwargs):
873887
874888 Examples
875889 --------
876- >>> ds = DatasetFactory.open("data.csv")
890+ >>> import pandas as pd
891+ >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
877892 >>> [ds_link] = ds.to_csv("my/path.csv")
878893 """
879894 if storage_options is None :
@@ -900,7 +915,8 @@ def to_parquet(self, path, storage_options=None, **kwargs):
900915
901916 Examples
902917 --------
903- >>> ds = DatasetFactory.open("data.csv")
918+ >>> import pandas as pd
919+ >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
904920 >>> ds.to_parquet("my/path")
905921 """
906922 if storage_options is None :
@@ -927,7 +943,8 @@ def to_json(self, path, storage_options=None, **kwargs):
927943
928944 Examples
929945 --------
930- >>> ds = DatasetFactory.open("data.csv")
946+ >>> import pandas as pd
947+ >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
931948 >>> ds.to_json("my/path.json")
932949 """
933950 if storage_options is None :
@@ -962,7 +979,8 @@ def to_hdf(
962979
963980 Examples
964981 --------
965- >>> ds = DatasetFactory.open("data.csv")
982+ >>> import pandas as pd
983+ >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
966984 >>> ds.to_hdf(path="my/path.h5", key="df")
967985 """
968986 if storage_options is None :
@@ -1035,7 +1053,13 @@ def to_avro(self, path, schema=None, storage_options=None, **kwargs):
10351053
10361054 Examples
10371055 --------
1038- >>> ds = DatasetFactory.open("data.avro")
1056+ >>> import pandas
1057+ >>> import fastavro
1058+ >>> with open("data.avro", "rb") as fp:
1059+ >>> reader = fastavro.reader(fp)
1060+ >>> records = [r for r in reader]
1061+ >>> df = pandas.DataFrame.from_records(records)
1062+ >>> ds = ADSDataset.from_dataframe(df)
10391063 >>> ds.to_avro("my/path.avro")
10401064 """
10411065 # Get the row by row formatting
@@ -1101,7 +1125,8 @@ def astype(self, types):
11011125
11021126 Examples
11031127 --------
1104- >>> ds = DatasetFactory.open("data.csv")
1128+ >>> import pandas as pd
1129+ >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
11051130 >>> ds_reformatted = ds.astype({"target": "categorical"})
11061131 """
11071132 return self .__getattr__ ("astype" )(helper .map_types (types ))
@@ -1119,8 +1144,10 @@ def merge(self, data, **kwargs):
11191144
11201145 Examples
11211146 --------
1122- >>> ds1 = DatasetFactory.open("data1.csv")
1123- >>> ds2 = DatasetFactory.open("data2.csv")
1147+ >>> import pandas as pd
1148+ >>> df1 = pd.read_csv("data1.csv")
1149+ >>> df2 = pd.read_csv("data2.csv")
1150+ >>> ds = ADSDataset.from_dataframe(df1.merge(df2))
11241151 >>> ds_12 = ds1.merge(ds2)
11251152 """
11261153 assert isinstance (data , pd .DataFrame ) or isinstance (
@@ -1275,9 +1302,8 @@ def _build_new_dataset(
12751302 if progress :
12761303 progress .update ("Building new dataset" )
12771304 target_type = self .target .type if target_type is None else target_type
1278- from ads .dataset .factory import DatasetFactory
12791305
1280- new_ds = DatasetFactory . _get_dataset (
1306+ new_ds = get_dataset (
12811307 df ,
12821308 sampled_df ,
12831309 target ,
0 commit comments