From e3a0a1eb2ed89a8e69e9df1b8cf1d72079c8dc07 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 2 Nov 2025 11:02:48 -0800 Subject: [PATCH 1/3] Reduce deep copies in the test suite --- pandas/conftest.py | 15 ++++++--------- pandas/tests/arrays/interval/test_interval.py | 8 +------- pandas/tests/copy_view/test_methods.py | 2 +- 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 82501cae4634d..7321bf1d1088b 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -720,7 +720,7 @@ def index(request): - ... """ # copy to avoid mutation, e.g. setting .name - return indices_dict[request.param].copy() + return indices_dict[request.param].copy(deep=False) @pytest.fixture( @@ -733,7 +733,7 @@ def index_flat(request): index fixture, but excluding MultiIndex cases. """ key = request.param - return indices_dict[key].copy() + return indices_dict[key].copy(deep=False) @pytest.fixture( @@ -756,11 +756,7 @@ def index_with_missing(request): MultiIndex is excluded because isna() is not defined for MultiIndex. """ - - # GH 35538. Use deep copy to avoid illusive bug on np-dev - # GHA pipeline that writes into indices_dict despite copy - ind = indices_dict[request.param].copy(deep=True) - vals = ind.values.copy() + ind = indices_dict[request.param] if request.param in ["tuples", "mi-with-dt64tz-level", "multi"]: # For setting missing values in the top level of MultiIndex vals = ind.tolist() @@ -768,6 +764,7 @@ def index_with_missing(request): vals[-1] = (None,) + vals[-1][1:] return MultiIndex.from_tuples(vals) else: + vals = ind.values.copy() vals[0] = None vals[-1] = None return type(ind)(vals) @@ -848,7 +845,7 @@ def index_or_series_obj(request): Fixture for tests on indexes, series and series with a narrow dtype copy to avoid mutation, e.g. setting .name """ - return _index_or_series_objs[request.param].copy(deep=True) + return _index_or_series_objs[request.param].copy(deep=False) _typ_objects_series = { @@ -871,7 +868,7 @@ def index_or_series_memory_obj(request): series with empty objects type copy to avoid mutation, e.g. setting .name """ - return _index_or_series_memory_objs[request.param].copy(deep=True) + return _index_or_series_memory_objs[request.param].copy(deep=False) # ---------------------------------------------------------------- diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index 8e13dcf25ceba..82b9907b3dffc 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -115,9 +115,7 @@ def test_shift_datetime(self): class TestSetitem: def test_set_na(self, left_right_dtypes): left, right = left_right_dtypes - left = left.copy(deep=True) - right = right.copy(deep=True) - result = IntervalArray.from_arrays(left, right) + result = IntervalArray.from_arrays(left, right, copy=True) if result.dtype.subtype.kind not in ["m", "M"]: msg = "'value' should be an interval type, got <.*NaTType'> instead." @@ -168,8 +166,6 @@ def test_setitem_mismatched_closed(self): class TestReductions: def test_min_max_invalid_axis(self, left_right_dtypes): left, right = left_right_dtypes - left = left.copy(deep=True) - right = right.copy(deep=True) arr = IntervalArray.from_arrays(left, right) msg = "`axis` must be fewer than the number of dimensions" @@ -188,8 +184,6 @@ def test_min_max_invalid_axis(self, left_right_dtypes): def test_min_max(self, left_right_dtypes, index_or_series_or_array): # GH#44746 left, right = left_right_dtypes - left = left.copy(deep=True) - right = right.copy(deep=True) arr = IntervalArray.from_arrays(left, right) # The expected results below are only valid if monotonic diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 8abecd13c7038..49ce689e5f517 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -560,7 +560,7 @@ def test_to_frame(): ], ids=["shallow-copy", "reset_index", "rename", "select_dtypes"], ) -def test_chained_methods(request, method, idx): +def test_chained_methods(method, idx): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() From d6b08fd93222bf10ba9085c7346d40448ceb130c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 3 Nov 2025 13:08:19 -0800 Subject: [PATCH 2/3] Move the copies to the tests --- pandas/tests/base/test_fillna.py | 1 + pandas/tests/base/test_unique.py | 2 ++ pandas/tests/base/test_value_counts.py | 6 +++--- pandas/tests/generic/test_to_xarray.py | 4 ++-- pandas/tests/indexes/test_common.py | 4 ++-- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/pandas/tests/base/test_fillna.py b/pandas/tests/base/test_fillna.py index 8c56bcc169d8e..9a10338776d88 100644 --- a/pandas/tests/base/test_fillna.py +++ b/pandas/tests/base/test_fillna.py @@ -44,6 +44,7 @@ def test_fillna_null(null_obj, index_or_series_obj): elif isinstance(obj, MultiIndex): pytest.skip(f"MultiIndex can't hold '{null_obj}'") + obj = obj.copy(deep=True) values = obj._values fill_value = values[0] expected = values.copy() diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index 6e55531bbce8f..e588ad8e5f283 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -46,6 +46,7 @@ def test_unique_null(null_obj, index_or_series_obj, using_nan_is_na): ): pytest.skip("NaN is not a valid NA for this dtype.") + obj = obj.copy(deep=True) values = obj._values values[0:2] = null_obj @@ -87,6 +88,7 @@ def test_nunique_null(null_obj, index_or_series_obj): elif isinstance(obj, pd.MultiIndex): pytest.skip(f"MultiIndex can't hold '{null_obj}'") + obj = obj.copy(deep=True) values = obj._values values[0:2] = null_obj diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index f642d26c32f5d..d792c36bd8da5 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -55,15 +55,15 @@ def test_value_counts(index_or_series_obj): @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") def test_value_counts_null(null_obj, index_or_series_obj): orig = index_or_series_obj - obj = orig.copy() - if not allow_na_ops(obj): + if not allow_na_ops(orig): pytest.skip("type doesn't allow for NA operations") - elif len(obj) < 1: + elif len(orig) < 1: pytest.skip("Test doesn't make sense on empty data") elif isinstance(orig, MultiIndex): pytest.skip(f"MultiIndex can't hold '{null_obj}'") + obj = orig.copy(deep=True) values = obj._values values[0:2] = null_obj diff --git a/pandas/tests/generic/test_to_xarray.py b/pandas/tests/generic/test_to_xarray.py index ea1cc2e1e0e2a..f368c0bebf979 100644 --- a/pandas/tests/generic/test_to_xarray.py +++ b/pandas/tests/generic/test_to_xarray.py @@ -31,7 +31,7 @@ def df(self): ) def test_to_xarray_index_types(self, index_flat, df, request): - index = index_flat + index = index_flat.copy(deep=False) # MultiIndex is tested in test_to_xarray_with_multiindex if len(index) == 0: pytest.skip("Test doesn't make sense for empty index") @@ -84,7 +84,7 @@ def test_to_xarray_with_multiindex(self, df, using_infer_string): class TestSeriesToXArray: def test_to_xarray_index_types(self, index_flat, request): # MultiIndex is tested in test_to_xarray_with_multiindex - index = index_flat + index = index_flat.copy(deep=False) ser = Series(range(len(index)), index=index, dtype="int64") ser.index.name = "foo" diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index a842d174a4894..77caade83280e 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -106,7 +106,7 @@ def test_to_flat_index(self, index_flat): def test_set_name_methods(self, index_flat): # MultiIndex tested separately - index = index_flat + index = index_flat.copy(deep=False) new_name = "This is the new name for this index" original_name = index.name @@ -230,7 +230,7 @@ def test_unique(self, index_flat): if not index._can_hold_na: pytest.skip("Skip na-check if index cannot hold na") - vals = index._values[[0] * 5] + vals = index._values.copy()[[0] * 5] vals[0] = np.nan vals_unique = vals[:2] From cbab3f5f234010873348583b171efb1ca6351ddd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 4 Nov 2025 10:07:41 -0800 Subject: [PATCH 3/3] Remove possibly unneeded shallow copies in tests --- pandas/tests/generic/test_to_xarray.py | 4 ++-- pandas/tests/indexes/test_common.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/generic/test_to_xarray.py b/pandas/tests/generic/test_to_xarray.py index f368c0bebf979..ea1cc2e1e0e2a 100644 --- a/pandas/tests/generic/test_to_xarray.py +++ b/pandas/tests/generic/test_to_xarray.py @@ -31,7 +31,7 @@ def df(self): ) def test_to_xarray_index_types(self, index_flat, df, request): - index = index_flat.copy(deep=False) + index = index_flat # MultiIndex is tested in test_to_xarray_with_multiindex if len(index) == 0: pytest.skip("Test doesn't make sense for empty index") @@ -84,7 +84,7 @@ def test_to_xarray_with_multiindex(self, df, using_infer_string): class TestSeriesToXArray: def test_to_xarray_index_types(self, index_flat, request): # MultiIndex is tested in test_to_xarray_with_multiindex - index = index_flat.copy(deep=False) + index = index_flat ser = Series(range(len(index)), index=index, dtype="int64") ser.index.name = "foo" diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 77caade83280e..42425f4fcdcc2 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -106,7 +106,7 @@ def test_to_flat_index(self, index_flat): def test_set_name_methods(self, index_flat): # MultiIndex tested separately - index = index_flat.copy(deep=False) + index = index_flat new_name = "This is the new name for this index" original_name = index.name