Skip to content

Commit a56544b

Browse files
committed
Use sanitize_array consistently
1 parent 3f2932c commit a56544b

File tree

10 files changed

+100
-42
lines changed

10 files changed

+100
-42
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -674,7 +674,7 @@ Other API changes
674674
- :class:`Series` "flex" methods like :meth:`Series.add` no longer allow passing a :class:`DataFrame` for ``other``; use the DataFrame reversed method instead (:issue:`46179`)
675675
- :meth:`CategoricalIndex.append` no longer attempts to cast different-dtype indexes to the caller's dtype (:issue:`41626`)
676676
- :meth:`ExtensionDtype.construct_array_type` is now a regular method instead of a ``classmethod`` (:issue:`58663`)
677-
- Arithmetic operations between a :class:`Series`, :class:`Index`, or :class:`ExtensionArray` with a ``list`` now consistently wrap that list with ``np.array``. To do any other kind of type inference or casting, do so explicitly before operating (:issue:`62552`)
677+
- Arithmetic operations between a :class:`Series`, :class:`Index`, or :class:`ExtensionArray` with a ``list`` now consistently wrap that list with an array equivalent to ``Series(my_list).array``. To do any other kind of type inference or casting, do so explicitly before operating (:issue:`62552`)
678678
- Comparison operations between :class:`Index` and :class:`Series` now consistently return :class:`Series` regardless of which object is on the left or right (:issue:`36759`)
679679
- Numpy functions like ``np.isinf`` that return a bool dtype when called on a :class:`Index` object now return a bool-dtype :class:`Index` instead of ``np.ndarray`` (:issue:`52676`)
680680

pandas/core/indexes/base.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7179,19 +7179,7 @@ def _cmp_method(self, other, op):
71797179
else:
71807180
other = np.asarray(other)
71817181

7182-
if is_object_dtype(self.dtype) and isinstance(other, ExtensionArray):
7183-
# e.g. PeriodArray, Categorical
7184-
result = op(self._values, other)
7185-
7186-
elif isinstance(self._values, ExtensionArray):
7187-
result = op(self._values, other)
7188-
7189-
elif is_object_dtype(self.dtype) and not isinstance(self, ABCMultiIndex):
7190-
# don't pass MultiIndex
7191-
result = ops.comp_method_OBJECT_ARRAY(op, self._values, other)
7192-
7193-
else:
7194-
result = ops.comparison_op(self._values, other, op)
7182+
result = ops.comparison_op(self._values, other, op)
71957183

71967184
return result
71977185

pandas/core/ops/array_ops.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,10 @@
5353

5454
from pandas.core import roperator
5555
from pandas.core.computation import expressions
56-
from pandas.core.construction import ensure_wrapped_if_datetimelike
56+
from pandas.core.construction import (
57+
ensure_wrapped_if_datetimelike,
58+
sanitize_array,
59+
)
5760
from pandas.core.ops import missing
5861
from pandas.core.ops.dispatch import should_extension_dispatch
5962
from pandas.core.ops.invalid import invalid_comparison
@@ -263,7 +266,7 @@ def arithmetic_op(left: ArrayLike, right: Any, op):
263266
# casts integer dtypes to timedelta64 when operating with timedelta64 - GH#22390)
264267
if isinstance(right, list):
265268
# GH#62423
266-
right = np.array(right)
269+
right = sanitize_array(right, None)
267270
right = ensure_wrapped_if_datetimelike(right)
268271

269272
if (
@@ -314,8 +317,8 @@ def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike:
314317
if isinstance(rvalues, list):
315318
# We don't catch tuple here bc we may be comparing e.g. MultiIndex
316319
# to a tuple that represents a single entry, see test_compare_tuple_strs
317-
rvalues = np.asarray(rvalues)
318-
rvalues = ensure_wrapped_if_datetimelike(rvalues)
320+
rvalues = sanitize_array(rvalues, None)
321+
rvalues = ensure_wrapped_if_datetimelike(rvalues)
319322

320323
if isinstance(rvalues, (np.ndarray, ABCExtensionArray)):
321324
# TODO: make this treatment consistent across ops and classes.

pandas/core/ops/common.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
from functools import wraps
88
from typing import TYPE_CHECKING
99

10-
import numpy as np
11-
1210
from pandas._libs.lib import item_from_zerodim
1311
from pandas._libs.missing import is_matching_na
1412

@@ -18,6 +16,11 @@
1816
ABCSeries,
1917
)
2018

19+
from pandas.core.construction import (
20+
ensure_wrapped_if_datetimelike,
21+
sanitize_array,
22+
)
23+
2124
if TYPE_CHECKING:
2225
from collections.abc import Callable
2326

@@ -59,6 +62,7 @@ def _unpack_zerodim_and_defer(method: F, name: str) -> F:
5962
-------
6063
method
6164
"""
65+
is_logical = name.strip("_") in ["or", "xor", "and", "ror", "rxor", "rand"]
6266

6367
@wraps(method)
6468
def new_method(self, other):
@@ -69,15 +73,14 @@ def new_method(self, other):
6973
return NotImplemented
7074

7175
other = item_from_zerodim(other)
72-
73-
if isinstance(self, ABCExtensionArray):
74-
if isinstance(other, list):
75-
# See GH#62423
76-
other = np.array(other)
77-
if other.dtype.kind in "mM":
78-
from pandas.core.construction import ensure_wrapped_if_datetimelike
79-
80-
other = ensure_wrapped_if_datetimelike(other)
76+
if (
77+
isinstance(self, ABCExtensionArray)
78+
and isinstance(other, list)
79+
and not is_logical
80+
):
81+
# See GH#62423
82+
other = sanitize_array(other, None)
83+
other = ensure_wrapped_if_datetimelike(other)
8184

8285
return method(self, other)
8386

pandas/tests/arithmetic/test_numeric.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,23 @@ def test_numeric_cmp_string_numexpr_path(self, box_with_array, monkeypatch):
151151

152152

153153
class TestNumericArraylikeArithmeticWithDatetimeLike:
154+
def test_mul_timedelta_list(self, box_with_array):
155+
# GH#62524
156+
box = box_with_array
157+
left = np.array([3, 4])
158+
left = tm.box_expected(left, box)
159+
160+
right = [Timedelta(days=1), Timedelta(days=2)]
161+
162+
result = left * right
163+
164+
expected = TimedeltaIndex([Timedelta(days=3), Timedelta(days=8)])
165+
expected = tm.box_expected(expected, box)
166+
tm.assert_equal(result, expected)
167+
168+
result2 = right * left
169+
tm.assert_equal(result2, expected)
170+
154171
@pytest.mark.parametrize("box_cls", [np.array, Index, Series])
155172
@pytest.mark.parametrize(
156173
"left", lefts, ids=lambda x: type(x).__name__ + str(x.dtype)

pandas/tests/arithmetic/test_timedelta64.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,23 @@ class TestTimedelta64ArithmeticUnsorted:
274274
# Tests moved from type-specific test files but not
275275
# yet sorted/parametrized/de-duplicated
276276

277+
def test_td64_op_with_list(self, box_with_array):
278+
# GH#62353
279+
box = box_with_array
280+
281+
left = TimedeltaIndex(["2D", "4D"])
282+
left = tm.box_expected(left, box)
283+
284+
right = [Timestamp("2016-01-01"), Timestamp("2016-02-01")]
285+
286+
result = left + right
287+
expected = DatetimeIndex(["2016-01-03", "2016-02-05"], dtype="M8[ns]")
288+
expected = tm.box_expected(expected, box)
289+
tm.assert_equal(result, expected)
290+
291+
result2 = right + left
292+
tm.assert_equal(result2, expected)
293+
277294
def test_ufunc_coercions(self):
278295
# normal ops are also tested in tseries/test_timedeltas.py
279296
idx = TimedeltaIndex(["2h", "4h", "6h", "8h", "10h"], freq="2h", name="x")

pandas/tests/arrays/string_/test_string.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,15 @@ def test_add_2d(dtype, request):
218218
s + b
219219

220220

221-
def test_add_sequence(dtype):
221+
def test_add_sequence(dtype, request):
222+
if dtype.storage == "python" and dtype.na_value is np.nan:
223+
mark = pytest.mark.xfail(
224+
reason="As of GH#62522, the list gets wrapped withm sanitize_array, "
225+
"which casts to a higher-priority StringArray, so we get "
226+
"NotImplemented."
227+
)
228+
request.applymarker(mark)
229+
222230
a = pd.array(["a", "b", None, None], dtype=dtype)
223231
other = ["x", None, "y", None]
224232

@@ -391,10 +399,18 @@ def test_comparison_methods_array_arrow_extension(comparison_op, dtype2):
391399
tm.assert_extension_array_equal(result, expected)
392400

393401

394-
def test_comparison_methods_list(comparison_op, dtype):
402+
@pytest.mark.parametrize("box", [pd.array, pd.Index, pd.Series])
403+
def test_comparison_methods_list(comparison_op, dtype, box, request):
404+
if box is pd.array and dtype.na_value is np.nan:
405+
mark = pytest.mark.xfail(
406+
reason="After wrapping list, op returns NotImplemented, see GH#62522"
407+
)
408+
request.applymarker(mark)
409+
395410
op_name = f"__{comparison_op.__name__}__"
396411

397-
a = pd.array(["a", None, "c"], dtype=dtype)
412+
a = box(pd.array(["a", None, "c"], dtype=dtype))
413+
item = "c"
398414
other = [None, None, "c"]
399415
result = comparison_op(a, other)
400416

@@ -407,15 +423,21 @@ def test_comparison_methods_list(comparison_op, dtype):
407423
expected = np.array([True, True, False])
408424
else:
409425
expected = np.array([False, False, False])
410-
expected[-1] = getattr(other[-1], op_name)(a[-1])
411-
tm.assert_numpy_array_equal(result, expected)
426+
expected[-1] = getattr(other[-1], op_name)(item)
427+
if box is not pd.Index:
428+
# if GH#62766 is addressed this check can be removed
429+
expected = tm.box_expected(expected, box)
430+
tm.assert_equal(result, expected)
412431

413432
else:
414433
expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
415434
expected = np.full(len(a), fill_value=None, dtype="object")
416-
expected[-1] = getattr(other[-1], op_name)(a[-1])
435+
expected[-1] = getattr(other[-1], op_name)(item)
417436
expected = pd.array(expected, dtype=expected_dtype)
418-
tm.assert_extension_array_equal(result, expected)
437+
if box is not pd.Index:
438+
# if GH#62766 is addressed this check can be removed
439+
expected = tm.box_expected(expected, box)
440+
tm.assert_equal(result, expected)
419441

420442

421443
def test_constructor_raises(cls):

pandas/tests/indexes/categorical/test_category.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -326,10 +326,9 @@ def test_disallow_addsub_ops(self, func, op_name):
326326
cat_or_list = "'(Categorical|list)' and '(Categorical|list)'"
327327
msg = "|".join(
328328
[
329-
f"cannot perform {op_name} with this index type: CategoricalIndex",
330-
"can only concatenate list",
331329
rf"unsupported operand type\(s\) for [\+-]: {cat_or_list}",
332-
"Object with dtype category cannot perform the numpy op ",
330+
"operation 'r?(add|sub)' not supported for dtype 'str' "
331+
"with dtype 'category'",
333332
]
334333
)
335334
with pytest.raises(TypeError, match=msg):

pandas/tests/series/test_arithmetic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -358,10 +358,10 @@ def test_mask_div_propagate_na_for_non_na_dtype(self):
358358

359359
@pytest.mark.parametrize("val", [3, 3.5])
360360
def test_add_list_to_masked_array(self, val):
361-
# GH#22962
361+
# GH#22962, behavior changed by GH#62552
362362
ser = Series([1, None, 3], dtype="Int64")
363363
result = ser + [1, None, val]
364-
expected = Series([2, pd.NA, 3 + val], dtype=object)
364+
expected = Series([2, pd.NA, 3 + val], dtype="Float64")
365365
tm.assert_series_equal(result, expected)
366366

367367
result = [1, None, val] + ser

pandas/tests/series/test_constructors.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2216,7 +2216,16 @@ def test_np_string_array_object_cast(self, data):
22162216
arr = np.array(data, dtype=StringDType())
22172217
res = Series(arr)
22182218
assert res.dtype == np.object_
2219-
assert (res == data).all()
2219+
2220+
if data[-1] is np.nan:
2221+
# as of GH#62522 the comparison op for `res==data` casts data
2222+
# using sanitize_array, which casts to 'str' dtype, which does not
2223+
# consider string 'nan' to be equal to np.nan,
2224+
# (which apparently numpy does? weird.)
2225+
assert (res.iloc[:-1] == data[:-1]).all()
2226+
assert res.iloc[-1] == "nan"
2227+
else:
2228+
assert (res == data).all()
22202229

22212230

22222231
class TestSeriesConstructorInternals:

0 commit comments

Comments
 (0)