Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,7 @@ Other API changes
- :class:`Series` "flex" methods like :meth:`Series.add` no longer allow passing a :class:`DataFrame` for ``other``; use the DataFrame reversed method instead (:issue:`46179`)
- :meth:`CategoricalIndex.append` no longer attempts to cast different-dtype indexes to the caller's dtype (:issue:`41626`)
- :meth:`ExtensionDtype.construct_array_type` is now a regular method instead of a ``classmethod`` (:issue:`58663`)
- Arithmetic operations between a :class:`Series`, :class:`Index`, or :class:`ExtensionArray` with a ``list`` now consistently wrap that list with an array equivalent to ``Series(my_list).array``. To do any other kind of type inference or casting, do so explicitly before operating (:issue:`62552`)
- Comparison operations between :class:`Index` and :class:`Series` now consistently return :class:`Series` regardless of which object is on the left or right (:issue:`36759`)
- Numpy functions like ``np.isinf`` that return a bool dtype when called on a :class:`Index` object now return a bool-dtype :class:`Index` instead of ``np.ndarray`` (:issue:`52676`)

Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -935,6 +935,9 @@ def _maybe_mask_result(

return IntegerArray(result, mask, copy=False)

elif result.dtype == object:
result[mask] = self.dtype.na_value
return result
else:
result[mask] = np.nan
return result
Expand Down
14 changes: 1 addition & 13 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7272,19 +7272,7 @@ def _cmp_method(self, other, op):
else:
other = np.asarray(other)

if is_object_dtype(self.dtype) and isinstance(other, ExtensionArray):
# e.g. PeriodArray, Categorical
result = op(self._values, other)

elif isinstance(self._values, ExtensionArray):
result = op(self._values, other)

elif is_object_dtype(self.dtype) and not isinstance(self, ABCMultiIndex):
# don't pass MultiIndex
result = ops.comp_method_OBJECT_ARRAY(op, self._values, other)

else:
result = ops.comparison_op(self._values, other, op)
result = ops.comparison_op(self._values, other, op)

return result

Expand Down
12 changes: 10 additions & 2 deletions pandas/core/ops/array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@

from pandas.core import roperator
from pandas.core.computation import expressions
from pandas.core.construction import ensure_wrapped_if_datetimelike
from pandas.core.construction import (
ensure_wrapped_if_datetimelike,
sanitize_array,
)
from pandas.core.ops import missing
from pandas.core.ops.dispatch import should_extension_dispatch
from pandas.core.ops.invalid import invalid_comparison
Expand Down Expand Up @@ -261,6 +264,10 @@ def arithmetic_op(left: ArrayLike, right: Any, op):
# and `maybe_prepare_scalar_for_op` has already been called on `right`
# We need to special-case datetime64/timedelta64 dtypes (e.g. because numpy
# casts integer dtypes to timedelta64 when operating with timedelta64 - GH#22390)
if isinstance(right, list):
# GH#62423
right = sanitize_array(right, None)
right = ensure_wrapped_if_datetimelike(right)

if (
should_extension_dispatch(left, right)
Expand Down Expand Up @@ -310,7 +317,8 @@ def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike:
if isinstance(rvalues, list):
# We don't catch tuple here bc we may be comparing e.g. MultiIndex
# to a tuple that represents a single entry, see test_compare_tuple_strs
rvalues = np.asarray(rvalues)
rvalues = sanitize_array(rvalues, None)
rvalues = ensure_wrapped_if_datetimelike(rvalues)

if isinstance(rvalues, (np.ndarray, ABCExtensionArray)):
# TODO: make this treatment consistent across ops and classes.
Expand Down
15 changes: 15 additions & 0 deletions pandas/core/ops/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,16 @@
from pandas._libs.missing import is_matching_na

from pandas.core.dtypes.generic import (
ABCExtensionArray,
ABCIndex,
ABCSeries,
)

from pandas.core.construction import (
ensure_wrapped_if_datetimelike,
sanitize_array,
)

if TYPE_CHECKING:
from collections.abc import Callable

Expand Down Expand Up @@ -56,6 +62,7 @@ def _unpack_zerodim_and_defer(method: F, name: str) -> F:
-------
method
"""
is_logical = name.strip("_") in ["or", "xor", "and", "ror", "rxor", "rand"]

@wraps(method)
def new_method(self, other):
Expand All @@ -66,6 +73,14 @@ def new_method(self, other):
return NotImplemented

other = item_from_zerodim(other)
if (
isinstance(self, ABCExtensionArray)
and isinstance(other, list)
and not is_logical
):
# See GH#62423
other = sanitize_array(other, None)
other = ensure_wrapped_if_datetimelike(other)

return method(self, other)

Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,23 @@ def test_numeric_cmp_string_numexpr_path(self, box_with_array, monkeypatch):


class TestNumericArraylikeArithmeticWithDatetimeLike:
def test_mul_timedelta_list(self, box_with_array):
# GH#62524
box = box_with_array
left = np.array([3, 4])
left = tm.box_expected(left, box)

right = [Timedelta(days=1), Timedelta(days=2)]

result = left * right

expected = TimedeltaIndex([Timedelta(days=3), Timedelta(days=8)])
expected = tm.box_expected(expected, box)
tm.assert_equal(result, expected)

result2 = right * left
tm.assert_equal(result2, expected)

@pytest.mark.parametrize("box_cls", [np.array, Index, Series])
@pytest.mark.parametrize(
"left", lefts, ids=lambda x: type(x).__name__ + str(x.dtype)
Expand Down
48 changes: 38 additions & 10 deletions pandas/tests/arithmetic/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,22 @@ def test_add_2d(any_string_dtype, request):
s + b


def test_add_sequence(any_string_dtype, request):
def test_add_sequence(any_string_dtype, request, using_infer_string):
dtype = any_string_dtype
if dtype == np.dtype(object):
if (
dtype != object
and dtype.storage == "python"
and dtype.na_value is np.nan
and HAS_PYARROW
and using_infer_string
):
mark = pytest.mark.xfail(
reason="As of GH#62522, the list gets wrapped with sanitize_array, "
"which casts to a higher-priority StringArray, so we get "
"NotImplemented."
)
request.applymarker(mark)
if dtype == np.dtype(object) and using_infer_string:
mark = pytest.mark.xfail(reason="Cannot broadcast list")
request.applymarker(mark)

Expand Down Expand Up @@ -415,30 +428,45 @@ def test_comparison_methods_array_arrow_extension(comparison_op, any_string_dtyp
tm.assert_extension_array_equal(result, expected)


def test_comparison_methods_list(comparison_op, any_string_dtype):
@pytest.mark.parametrize("box", [pd.array, pd.Index, Series])
def test_comparison_methods_list(comparison_op, any_string_dtype, box, request):
dtype = any_string_dtype

if box is pd.array and dtype != object and dtype.na_value is np.nan:
mark = pytest.mark.xfail(
reason="After wrapping list, op returns NotImplemented, see GH#62522"
)
request.applymarker(mark)

op_name = f"__{comparison_op.__name__}__"

a = pd.array(["a", None, "c"], dtype=dtype)
a = box(pd.array(["a", None, "c"], dtype=dtype))
item = "c"
other = [None, None, "c"]
result = comparison_op(a, other)

# ensure operation is commutative
result2 = comparison_op(other, a)
tm.assert_equal(result, result2)

if dtype == object or dtype.na_value is np.nan:
if dtype == np.dtype(object) or dtype.na_value is np.nan:
if operator.ne == comparison_op:
expected = np.array([True, True, False])
else:
expected = np.array([False, False, False])
expected[-1] = getattr(other[-1], op_name)(a[-1])
result = extract_array(result, extract_numpy=True)
tm.assert_numpy_array_equal(result, expected)
expected[-1] = getattr(item, op_name)(item)
if box is not pd.Index:
# if GH#62766 is addressed this check can be removed
expected = box(expected, dtype=expected.dtype)
tm.assert_equal(result, expected)

else:
expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
expected = np.full(len(a), fill_value=None, dtype="object")
expected[-1] = getattr(other[-1], op_name)(a[-1])
expected[-1] = getattr(item, op_name)(item)
expected = pd.array(expected, dtype=expected_dtype)
tm.assert_extension_array_equal(result, expected)
expected = extract_array(expected, extract_numpy=True)
if box is not pd.Index:
# if GH#62766 is addressed this check can be removed
expected = tm.box_expected(expected, box)
tm.assert_equal(result, expected)
17 changes: 17 additions & 0 deletions pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,23 @@ class TestTimedelta64ArithmeticUnsorted:
# Tests moved from type-specific test files but not
# yet sorted/parametrized/de-duplicated

def test_td64_op_with_list(self, box_with_array):
# GH#62353
box = box_with_array

left = TimedeltaIndex(["2D", "4D"])
left = tm.box_expected(left, box)

right = [Timestamp("2016-01-01"), Timestamp("2016-02-01")]

result = left + right
expected = DatetimeIndex(["2016-01-03", "2016-02-05"], dtype="M8[ns]")
expected = tm.box_expected(expected, box)
tm.assert_equal(result, expected)

result2 = right + left
tm.assert_equal(result2, expected)

def test_ufunc_coercions(self):
# normal ops are also tested in tseries/test_timedeltas.py
idx = TimedeltaIndex(["2h", "4h", "6h", "8h", "10h"], freq="2h", name="x")
Expand Down
4 changes: 0 additions & 4 deletions pandas/tests/arrays/integer/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,10 +201,6 @@ def test_error_invalid_values(data, all_arithmetic_operators):
]: # (data[~data.isna()] >= 0).all():
res = ops(str_ser)
expected = pd.Series(["foo" * x for x in data], index=s.index)
expected = expected.fillna(np.nan)
# TODO: doing this fillna to keep tests passing as we make
# assert_almost_equal stricter, but the expected with pd.NA seems
# more-correct than np.nan here.
tm.assert_series_equal(res, expected)
else:
with tm.external_error_raised(TypeError):
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/indexes/categorical/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,9 +326,10 @@ def test_disallow_addsub_ops(self, func, op_name):
cat_or_list = "'(Categorical|list)' and '(Categorical|list)'"
msg = "|".join(
[
f"cannot perform {op_name} with this index type: CategoricalIndex",
"can only concatenate list",
rf"unsupported operand type\(s\) for [\+-]: {cat_or_list}",
"Object with dtype category cannot perform the numpy op (add|subtract)",
"operation 'r?(add|sub)' not supported for dtype 'str' "
"with dtype 'category'",
]
)
with pytest.raises(TypeError, match=msg):
Expand Down
29 changes: 8 additions & 21 deletions pandas/tests/series/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import pytest

from pandas._libs import lib
from pandas.compat._optional import import_optional_dependency

import pandas as pd
from pandas import (
Expand All @@ -30,7 +29,6 @@
import pandas._testing as tm
from pandas.core import ops
from pandas.core.computation import expressions as expr
from pandas.util.version import Version


@pytest.fixture(autouse=True, params=[0, 1000000], ids=["numexpr", "python"])
Expand Down Expand Up @@ -380,36 +378,25 @@ def test_mask_div_propagate_na_for_non_na_dtype(self):
result = ser2 / ser1
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("val, dtype", [(3, "Int64"), (3.5, "Float64")])
def test_add_list_to_masked_array(self, val, dtype):
# GH#22962
@pytest.mark.parametrize("val", [3, 3.5])
def test_add_list_to_masked_array(self, val):
# GH#22962, behavior changed by GH#62552
ser = Series([1, None, 3], dtype="Int64")
result = ser + [1, None, val]
expected = Series([2, None, 3 + val], dtype=dtype)
expected = Series([2, pd.NA, 3 + val], dtype="Float64")
tm.assert_series_equal(result, expected)

result = [1, None, val] + ser
tm.assert_series_equal(result, expected)

def test_add_list_to_masked_array_boolean(self, request):
def test_add_list_to_masked_array_boolean(self):
# GH#22962
ne = import_optional_dependency("numexpr", errors="ignore")
warning = (
UserWarning
if request.node.callspec.id == "numexpr"
and ne
and Version(ne.__version__) < Version("2.13.1")
else None
)
ser = Series([True, None, False], dtype="boolean")
msg = "operator is not supported by numexpr for the bool dtype"
with tm.assert_produces_warning(warning, match=msg):
result = ser + [True, None, True]
expected = Series([True, None, True], dtype="boolean")
result = ser + [True, None, True]
expected = Series([2, pd.NA, 1], dtype=object)
tm.assert_series_equal(result, expected)

with tm.assert_produces_warning(warning, match=msg):
result = [True, None, True] + ser
result = [True, None, True] + ser
tm.assert_series_equal(result, expected)


Expand Down
11 changes: 10 additions & 1 deletion pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2216,7 +2216,16 @@ def test_np_string_array_object_cast(self, data):
arr = np.array(data, dtype=StringDType())
res = Series(arr)
assert res.dtype == np.object_
assert (res == data).all()

if data[-1] is np.nan:
# as of GH#62522 the comparison op for `res==data` casts data
# using sanitize_array, which casts to 'str' dtype, which does not
# consider string 'nan' to be equal to np.nan,
# (which apparently numpy does? weird.)
assert (res.iloc[:-1] == data[:-1]).all()
assert res.iloc[-1] == "nan"
else:
assert (res == data).all()


class TestSeriesConstructorInternals:
Expand Down