Skip to content

Commit f0d7359

Browse files
authored
BUG: Fix sorting behavior of DataFrame.join on list arguments (#62954)
1 parent ede9a18 commit f0d7359

File tree

4 files changed

+53
-25
lines changed

4 files changed

+53
-25
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1200,6 +1200,7 @@ Reshaping
12001200
- Bug in :meth:`DataFrame.combine` with non-unique columns incorrectly raising (:issue:`51340`)
12011201
- Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`)
12021202
- Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
1203+
- Bug in :meth:`DataFrame.join` not producing the correct row order when joining with a list of Series/DataFrames (:issue:`62954`)
12031204
- Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
12041205
- Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)
12051206
- Bug in :meth:`Series.combine_first` incorrectly replacing ``None`` entries with ``NaN`` (:issue:`58977`)

pandas/core/frame.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11408,12 +11408,18 @@ def join(
1140811408

1140911409
# join indexes only using concat
1141011410
if can_concat:
11411-
if how == "left":
11411+
if how == "left" or how == "right":
1141211412
res = concat(
1141311413
frames, axis=1, join="outer", verify_integrity=True, sort=sort
1141411414
)
11415-
return res.reindex(self.index)
11415+
index = self.index if how == "left" else frames[-1].index
11416+
if sort:
11417+
index = index.sort_values()
11418+
result = res.reindex(index)
11419+
return result
1141611420
else:
11421+
if how == "outer":
11422+
sort = True
1141711423
return concat(
1141811424
frames, axis=1, join=how, verify_integrity=True, sort=sort
1141911425
)
@@ -11424,6 +11430,7 @@ def join(
1142411430
joined = merge(
1142511431
joined,
1142611432
frame,
11433+
sort=sort,
1142711434
how=how,
1142811435
left_index=True,
1142911436
right_index=True,

pandas/tests/frame/methods/test_join.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -395,29 +395,6 @@ def test_join_list_series(float_frame):
395395
tm.assert_frame_equal(result, float_frame)
396396

397397

398-
def test_suppress_future_warning_with_sort_kw(sort):
399-
sort_kw = sort
400-
a = DataFrame({"col1": [1, 2]}, index=["c", "a"])
401-
402-
b = DataFrame({"col2": [4, 5]}, index=["b", "a"])
403-
404-
c = DataFrame({"col3": [7, 8]}, index=["a", "b"])
405-
406-
expected = DataFrame(
407-
{
408-
"col1": {"a": 2.0, "b": float("nan"), "c": 1.0},
409-
"col2": {"a": 5.0, "b": 4.0, "c": float("nan")},
410-
"col3": {"a": 7.0, "b": 8.0, "c": float("nan")},
411-
}
412-
)
413-
if sort_kw is False:
414-
expected = expected.reindex(index=["c", "a", "b"])
415-
416-
with tm.assert_produces_warning(None):
417-
result = a.join([b, c], how="outer", sort=sort_kw)
418-
tm.assert_frame_equal(result, expected)
419-
420-
421398
class TestDataFrameJoin:
422399
def test_join(self, multiindex_dataframe_random_data):
423400
frame = multiindex_dataframe_random_data

pandas/tests/reshape/merge/test_join.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,49 @@ def _check_diff_index(df_list, result, exp_index):
671671
with pytest.raises(ValueError, match=msg):
672672
df_list[0].join(df_list[1:], on="a")
673673

674+
@pytest.mark.parametrize("how", ["left", "right", "inner", "outer"])
675+
def test_join_many_sort_unique(self, how, sort):
676+
# https://github.com/pandas-dev/pandas/pull/62954
677+
df = DataFrame({"a": [1, 2, 3]}, index=[1, 0, 2])
678+
df2 = DataFrame({"b": [4, 5, 6]}, index=[2, 0, 1])
679+
if how == "right":
680+
expected = DataFrame({"a": [3, 2, 1], "b": [4, 5, 6]}, index=[2, 0, 1])
681+
else:
682+
expected = DataFrame({"a": [1, 2, 3], "b": [6, 5, 4]}, index=[1, 0, 2])
683+
if how == "outer" or sort:
684+
# outer always sorts.
685+
expected = expected.sort_index()
686+
result = df.join([df2], how=how, sort=sort)
687+
tm.assert_frame_equal(result, expected)
688+
689+
@pytest.mark.parametrize("how", ["left", "right", "inner", "outer"])
690+
def test_join_many_sort_nonunique(self, how, sort):
691+
# https://github.com/pandas-dev/pandas/pull/62954
692+
df = DataFrame({"a": [1, 2, 3]}, index=[3, 0, 0])
693+
df2 = DataFrame({"b": [4, 5, 6]}, index=[2, 0, 1])
694+
if how == "inner":
695+
expected = DataFrame({"a": [2, 3], "b": [5, 5]}, index=[0, 0])
696+
elif how == "left":
697+
expected = DataFrame(
698+
{"a": [1, 2, 3], "b": [np.nan, 5.0, 5.0]}, index=[3, 0, 0]
699+
)
700+
elif how == "right":
701+
expected = DataFrame(
702+
{"a": [np.nan, 2.0, 3.0, np.nan], "b": [4, 5, 5, 6]}, index=[2, 0, 0, 1]
703+
)
704+
else:
705+
expected = DataFrame(
706+
{
707+
"a": [2.0, 3.0, np.nan, np.nan, 1.0],
708+
"b": [5.0, 5.0, 6.0, 4.0, np.nan],
709+
},
710+
index=[0, 0, 1, 2, 3],
711+
)
712+
if sort:
713+
expected = expected.sort_index()
714+
result = df.join([df2], how=how, sort=sort)
715+
tm.assert_frame_equal(result, expected)
716+
674717
def test_join_many_mixed(self):
675718
df = DataFrame(
676719
np.random.default_rng(2).standard_normal((8, 4)),

0 commit comments

Comments
 (0)