diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst index 93f88db0843dc..e9035a1a9a1e0 100644 --- a/doc/source/reference/indexing.rst +++ b/doc/source/reference/indexing.rst @@ -294,6 +294,7 @@ MultiIndex components MultiIndex.copy MultiIndex.append MultiIndex.truncate + MultiIndex.insert_level MultiIndex selecting ~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 6b78f63f92988..43016d5fa956a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -217,6 +217,7 @@ Other enhancements - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`) - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`) - Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`). +- Added :meth:`MultiIndex.insert_level` to insert new levels at specified positions in a MultiIndex (:issue:`62558`) - Added half-year offset classes :class:`HalfYearBegin`, :class:`HalfYearEnd`, :class:`BHalfYearBegin` and :class:`BHalfYearEnd` (:issue:`60928`) - Added support for ``axis=1`` with ``dict`` or :class:`Series` arguments into :meth:`DataFrame.fillna` (:issue:`4514`) - Added support to read and write from and to Apache Iceberg tables with the new :func:`read_iceberg` and :meth:`DataFrame.to_iceberg` functions (:issue:`61383`) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 1cc1928136da1..451464fd8392c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -92,6 +92,7 @@ ExtensionArray, ) from pandas.core.arrays.categorical import ( + factorize_from_iterable, factorize_from_iterables, recode_for_categories, ) @@ -2811,6 +2812,73 @@ def reorder_levels(self, order) -> MultiIndex: result = self._reorder_ilevels(order) return result + def insert_level( + self, position: int, value, name: Hashable = lib.no_default + ) -> MultiIndex: + """ + Insert a new level at the specified position in the MultiIndex. + + Parameters + ---------- + position : int + The position at which to insert the new level (0-based). + Must be between 0 and nlevels (inclusive). + value : array-like + Values to use for the new level. Length must match the length of the index. + name : Hashable, default lib.no_default + Name for the new level. If not provided, the new level will have no name. + + Returns + ------- + MultiIndex + New MultiIndex with the inserted level. + + See Also + -------- + MultiIndex.droplevel : Remove levels from the MultiIndex. + MultiIndex.swaplevel : Swap two levels in the MultiIndex. + MultiIndex.reorder_levels : Reorder levels using specified order. + + Examples + -------- + >>> idx = pd.MultiIndex.from_tuples([("A", 1), ("B", 2)]) + >>> idx.insert_level(0, ["new_value", "new_value"]) + MultiIndex([('new_value', 'A', 1), ('new_value', 'B', 2)], ...) + + >>> idx.insert_level(1, ["X", "Y"]) + MultiIndex([('A', 'X', 1), ('B', 'Y', 2)], ...) + """ + if not isinstance(position, int): + raise TypeError("position must be an integer") + + if position < 0 or position > self.nlevels: + raise ValueError(f"position must be between 0 and {self.nlevels}") + + if name is lib.no_default: + name = None + + if not (is_list_like(value) and len(value) == len(self)): + raise ValueError( + "value must be an array-like object of the same length as self" + ) + + if all(val is None for val in value): + new_level = Index([], dtype="object") + new_codes = np.full(len(value), -1, dtype=np.intp) + else: + new_codes, new_level = factorize_from_iterable(value) + + new_levels = self.levels[:position] + [new_level] + self.levels[position:] + new_codes_list = self.codes[:position] + [new_codes] + self.codes[position:] + new_names = self.names[:position] + [name] + self.names[position:] + + return MultiIndex( + levels=new_levels, + codes=new_codes_list, + names=new_names, + verify_integrity=False, + ) + def _reorder_ilevels(self, order) -> MultiIndex: if len(order) != self.nlevels: raise AssertionError( diff --git a/pandas/tests/indexes/multi/test_insert_level.py b/pandas/tests/indexes/multi/test_insert_level.py new file mode 100644 index 0000000000000..7b231d1d87ad7 --- /dev/null +++ b/pandas/tests/indexes/multi/test_insert_level.py @@ -0,0 +1,159 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "position, value, name, expected_tuples, expected_names", + [ + ( + 0, + ["new_value"] * 3, + None, + [("new_value", "A", 1), ("new_value", "B", 2), ("new_value", "C", 3)], + [None, "level1", "level2"], + ), + ( + 1, + ["middle"] * 3, + None, + [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], + ["level1", None, "level2"], + ), + ( + 0, + ["new_val"] * 3, + "new_level", + [("new_val", "A", 1), ("new_val", "B", 2), ("new_val", "C", 3)], + ["new_level", "level1", "level2"], + ), + ( + 1, + ["middle"] * 3, + "custom_name", + [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], + ["level1", "custom_name", "level2"], + ), + ( + 0, + ["start"] * 3, + None, + [("start", "A", 1), ("start", "B", 2), ("start", "C", 3)], + [None, "level1", "level2"], + ), + ( + 2, + ["end"] * 3, + None, + [("A", 1, "end"), ("B", 2, "end"), ("C", 3, "end")], + ["level1", "level2", None], + ), + ( + 1, + [100, 100, 100], + None, + [("A", 100, 1), ("B", 100, 2), ("C", 100, 3)], + ["level1", None, "level2"], + ), + ( + 1, + [1.5, 1.5, 1.5], + None, + [("A", 1.5, 1), ("B", 1.5, 2), ("C", 1.5, 3)], + ["level1", None, "level2"], + ), + ( + 1, + [None, None, None], + None, + [("A", None, 1), ("B", None, 2), ("C", None, 3)], + ["level1", None, "level2"], + ), + ( + 1, + ["X", "Y", "Z"], + None, + [("A", "X", 1), ("B", "Y", 2), ("C", "Z", 3)], + ["level1", None, "level2"], + ), + ( + 0, + [""] * 3, + "empty_string", + [("", "A", 1), ("", "B", 2), ("", "C", 3)], + ["empty_string", "level1", "level2"], + ), + ( + 1, + [True, True, True], + None, + [("A", True, 1), ("B", True, 2), ("C", True, 3)], + ["level1", None, "level2"], + ), + ], +) +def test_insert_level_basic(position, value, name, expected_tuples, expected_names): + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] + ) + + result = simple_idx.insert_level(position, value, name=name) + expected = pd.MultiIndex.from_tuples(expected_tuples, names=expected_names) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "position, value, expected_error", + [ + (5, ["invalid"] * 3, "position must be between"), + (-1, ["invalid"] * 3, "position must be between"), + ( + 1, + ["too", "few"], + "value must be an array-like object of the same length as self", + ), + (3, ["value"] * 3, "position must be between"), + ( + 0, + "scalar_value", + "value must be an array-like object of the same length as self", + ), + ], +) +def test_insert_level_error_cases(position, value, expected_error): + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] + ) + + with pytest.raises(ValueError, match=expected_error): + simple_idx.insert_level(position, value) + + +def test_insert_level_preserves_original(): + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] + ) + + original = simple_idx.copy() + simple_idx.insert_level(1, ["temp"] * 3) + + tm.assert_index_equal(original, simple_idx) + + +def test_insert_level_empty_index(): + empty_idx = pd.MultiIndex.from_tuples([], names=["level1", "level2"]) + + result = empty_idx.insert_level(0, []) + expected = pd.MultiIndex.from_tuples([], names=[None, "level1", "level2"]) + tm.assert_index_equal(result, expected) + + +def test_insert_level_single_element(): + single_idx = pd.MultiIndex.from_tuples([("A", 1)], names=["level1", "level2"]) + + result = single_idx.insert_level(1, ["middle"]) + expected = pd.MultiIndex.from_tuples( + [("A", "middle", 1)], names=["level1", None, "level2"] + ) + tm.assert_index_equal(result, expected)