From e3d6970e92720dd30cf1009a0d8c52100100b398 Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Tue, 7 Oct 2025 20:43:01 +0800 Subject: [PATCH 01/18] FEAT: Add insert_level method to MultiIndex - Implement insert_level method for MultiIndex to insert new levels at specified positions - Add comprehensive test cases for the new functionality - Fix level names handling to match expected behavior Resolves: MultiIndex level insertion feature request --- fastparquet | Bin 0 -> 1604 bytes pandas/core/indexes/multi.py | 66 ++++++++++ .../tests/indexes/multi/test_constructors.py | 11 ++ .../tests/indexes/multi/test_insert_level.py | 118 ++++++++++++++++++ pyarrow | Bin 0 -> 1604 bytes 5 files changed, 195 insertions(+) create mode 100644 fastparquet create mode 100644 pandas/tests/indexes/multi/test_insert_level.py create mode 100644 pyarrow diff --git a/fastparquet b/fastparquet new file mode 100644 index 0000000000000000000000000000000000000000..90e1311582647a65c92628f6d4c82eceb7c8141d GIT binary patch literal 1604 zcmb7F%Z}nk6fGXHSY$DZVgec#$eN%;p*zMvXBf?z-TZp1QsTS9n}u(=5-v=(7xqK11mF z7YO~9NTwM*_3PF6=Rbab&LZ}6wonHA89YoOmr0>`dv3tKtv$ z+!B0XalYc323Ia}uHqQra3kA>RS+tE2uq&YD-X;nH4S1`9)5&H%y&!4jRWl|%bjJ| z`t7MNmUSEMJd%!!TuWQ&v4Y6)c1IuxeIm;-LPb|X1!C_x!-ZJOHOa`_=N`a&Nc{+`66{~K=T`1L;Rqg@hKE>nlTj^EYQn|mF zZ^;nM7RGo44;kNMT*FNhw=p(x%cQ<$^90`z=bIybg#jjO7?g$LBE}m6z)VOkMS_t) z++V`f{7!W2NbXE(Q(?GPyOU6L2i9AwsT#YUuSoT!xLBE@+ncM7Zj08&6rK89mEMt< zNSC%jzlANWI-YCp2z+>RSg*B~cp+X60z&Dl@C>FEo!~Q}5$IASnhJ4ld3wwl z&7_J=VpVq%s4bFD_GD?hoC;6m?X*FAdq1s?>sp6s!j>-8J>Xx;*CNRc8Y>%P(%_A0 zlk$r0kk>4Vh4OSfhyDEQ8nku;OP1;jRTz7!qfxHOWZdcBnAY7#y0sm|w?nAi2Tj-- z#rYgXngnvYaXg2(d}Gm?JfRip%MTYLiol(CRf# zHOjW;4n0+ c^UmmXEgr68F}|qzLIHkD|NaD_EBOET4 self.nlevels: + raise ValueError(f"position must be between 0 and {self.nlevels}") + + # 处理value参数 + from pandas.core.construction import extract_array + from pandas.core.indexes.base import ensure_index + + if not hasattr(value, '__iter__') or isinstance(value, str): + value = [value] * len(self) + else: + value = list(value) + if len(value) != len(self): + raise ValueError("Length of values must match length of index") + + # 简单实现:通过重建MultiIndex来插入level + tuples = list(self) + + # 在每个tuple的指定位置插入新值 + new_tuples = [] + for i, tup in enumerate(tuples): + if isinstance(tup, tuple): + new_tuple = list(tup) + new_tuple.insert(position, value[i]) + new_tuples.append(tuple(new_tuple)) + else: + # 如果当前不是tuple(单层索引的情况) + new_tuple = [tup] + new_tuple.insert(position, value[i]) + new_tuples.append(tuple(new_tuple)) + + # 修复:正确处理层级名称 + # 新插入的层级使用传入的name参数,如果没有传入则为None + # 原有层级的名称保持不变 + + if self.names is not None: + new_names = list(self.names) + else: + new_names = [None] * self.nlevels + + # 插入新层级的名称 - 使用传入的name参数 + new_names.insert(position, name) # 这里name可能是None,这正是测试期望的 + + # 创建新的MultiIndex,明确传递names参数 + from pandas import MultiIndex + return MultiIndex.from_tuples(new_tuples, names=new_names) + + + + + + + + + + + def __new__( cls, levels=None, @@ -4383,3 +4447,5 @@ def cartesian_product(X: list[np.ndarray]) -> list[np.ndarray]: ) for i, x in enumerate(X) ] + + diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index c134e44681122..a795097626d2e 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -870,3 +870,14 @@ def test_dtype_representation(using_infer_string): dtype=object, ) tm.assert_series_equal(result, expected) + + +def test_insert_level_integration(): + idx = pd.MultiIndex.from_tuples([('A', 1), ('B', 2)]) + + df = pd.DataFrame({'data': [10, 20]}, index=idx) + new_idx = idx.insert_level(0, 'group1') + df_new = df.set_index(new_idx) + + assert df_new.index.nlevels == 3 + assert len(df_new) == 2 \ No newline at end of file diff --git a/pandas/tests/indexes/multi/test_insert_level.py b/pandas/tests/indexes/multi/test_insert_level.py new file mode 100644 index 0000000000000..9c69608e2b2aa --- /dev/null +++ b/pandas/tests/indexes/multi/test_insert_level.py @@ -0,0 +1,118 @@ +import pytest +import pandas as pd +import numpy as np +import pandas._testing as tm + + + +class TestMultiIndexInsertLevel: + """测试MultiIndex.insert_level方法""" + + def setup_method(self): + """测试前置准备""" + # 创建基础测试数据 + self.simple_idx = pd.MultiIndex.from_tuples( + [('A', 1), ('B', 2), ('C', 3)], names=['level1', 'level2'] + ) + self.empty_idx = pd.MultiIndex.from_tuples([], names=['level1', 'level2']) + + def test_insert_level_basic(self): + """测试基本功能""" + # 在位置0插入新层级 + result = self.simple_idx.insert_level(0, 'new_value') + expected = pd.MultiIndex.from_tuples( + [('new_value', 'A', 1), ('new_value', 'B', 2), ('new_value', 'C', 3)], + names=[None, 'level1', 'level2'] # 新插入的层级名称为None + ) + tm.assert_index_equal(result, expected) + + # 在位置1插入新层级 + result = self.simple_idx.insert_level(1, 'middle') + expected = pd.MultiIndex.from_tuples( + [('A', 'middle', 1), ('B', 'middle', 2), ('C', 'middle', 3)], + names=['level1', None, 'level2'] # 新插入的层级名称为None + ) + tm.assert_index_equal(result, expected) + + def test_insert_level_with_different_values(self): + """测试插入不同值的层级""" + new_values = ['X', 'Y', 'Z'] + result = self.simple_idx.insert_level(1, new_values) + expected = pd.MultiIndex.from_tuples( + [('A', 'X', 1), ('B', 'Y', 2), ('C', 'Z', 3)], + names=['level1', None, 'level2'] # 新插入的层级名称为None + ) + tm.assert_index_equal(result, expected) + + def test_insert_level_with_name(self): + """测试指定层级名称""" + result = self.simple_idx.insert_level(0, 'new_val', name='new_level') + assert result.names[0] == 'new_level' + + def test_insert_level_edge_positions(self): + """测试边界位置插入""" + # 在开始位置插入 + result_start = self.simple_idx.insert_level(0, 'start') + assert result_start.nlevels == 3 + + # 在结束位置插入 + result_end = self.simple_idx.insert_level(2, 'end') + assert result_end.nlevels == 3 + + def test_insert_level_error_cases(self): + """测试错误情况""" + # 位置超出范围 + with pytest.raises(ValueError, match="position must be between"): + self.simple_idx.insert_level(5, 'invalid') + + # 位置为负数 + with pytest.raises(ValueError, match="position must be between"): + self.simple_idx.insert_level(-1, 'invalid') + + # 值长度不匹配 + with pytest.raises(ValueError, match="Length of values must match"): + self.simple_idx.insert_level(1, ['too', 'few']) + + def test_insert_level_with_different_data_types(self): + """测试不同数据类型""" + # 整数 + result_int = self.simple_idx.insert_level(1, 100) + + # 浮点数 + result_float = self.simple_idx.insert_level(1, 1.5) + + # None值 + result_none = self.simple_idx.insert_level(1, None) + + # 确保都能正常创建 + assert result_int.nlevels == 3 + assert result_float.nlevels == 3 + assert result_none.nlevels == 3 + + def test_insert_level_preserves_original(self): + """测试原索引不被修改""" + original = self.simple_idx.copy() + result = self.simple_idx.insert_level(1, 'temp') + + # 原索引应保持不变 + tm.assert_index_equal(original, self.simple_idx) + # 新索引应有更多层级 + assert result.nlevels == original.nlevels + 1 + + def test_debug_names(): + """调试层级名称问题""" + idx = pd.MultiIndex.from_tuples( + [('A', 1), ('B', 2), ('C', 3)], + names=['level1', 'level2'] + ) + print("Original names:", idx.names) + + result = idx.insert_level(0, 'new_value') + print("Result names:", result.names) + + # 手动创建期望的结果 + expected = pd.MultiIndex.from_tuples( + [('new_value', 'A', 1), ('new_value', 'B', 2), ('new_value', 'C', 3)], + names=[None, 'level1', 'level2'] # 注意:新插入的层级名称应该是None + ) + print("Expected names:", expected.names) \ No newline at end of file diff --git a/pyarrow b/pyarrow new file mode 100644 index 0000000000000000000000000000000000000000..90e1311582647a65c92628f6d4c82eceb7c8141d GIT binary patch literal 1604 zcmb7F%Z}nk6fGXHSY$DZVgec#$eN%;p*zMvXBf?z-TZp1QsTS9n}u(=5-v=(7xqK11mF z7YO~9NTwM*_3PF6=Rbab&LZ}6wonHA89YoOmr0>`dv3tKtv$ z+!B0XalYc323Ia}uHqQra3kA>RS+tE2uq&YD-X;nH4S1`9)5&H%y&!4jRWl|%bjJ| z`t7MNmUSEMJd%!!TuWQ&v4Y6)c1IuxeIm;-LPb|X1!C_x!-ZJOHOa`_=N`a&Nc{+`66{~K=T`1L;Rqg@hKE>nlTj^EYQn|mF zZ^;nM7RGo44;kNMT*FNhw=p(x%cQ<$^90`z=bIybg#jjO7?g$LBE}m6z)VOkMS_t) z++V`f{7!W2NbXE(Q(?GPyOU6L2i9AwsT#YUuSoT!xLBE@+ncM7Zj08&6rK89mEMt< zNSC%jzlANWI-YCp2z+>RSg*B~cp+X60z&Dl@C>FEo!~Q}5$IASnhJ4ld3wwl z&7_J=VpVq%s4bFD_GD?hoC;6m?X*FAdq1s?>sp6s!j>-8J>Xx;*CNRc8Y>%P(%_A0 zlk$r0kk>4Vh4OSfhyDEQ8nku;OP1;jRTz7!qfxHOWZdcBnAY7#y0sm|w?nAi2Tj-- z#rYgXngnvYaXg2(d}Gm?JfRip%MTYLiol(CRf# zHOjW;4n0+ c^UmmXEgr68F}|qzLIHkD|NaD_EBOET4 Date: Tue, 7 Oct 2025 20:58:33 +0800 Subject: [PATCH 02/18] FEAT: Add insert_level method to MultiIndex - Implement insert_level method for MultiIndex to insert new levels at specified positions - Add comprehensive test cases for the new functionality - Fix level names handling to match expected behavior Resolves: MultiIndex level insertion feature request --- pandas/core/indexes/multi.py | 106 +++++++----------- .../tests/indexes/multi/test_insert_level.py | 35 +----- 2 files changed, 47 insertions(+), 94 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index e6a62ec66a32d..fe279f4060d16 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -299,70 +299,6 @@ class MultiIndex(Index): # -------------------------------------------------------------------- # Constructors - def insert_level(self, position: int, value, name=None): - """ - Insert a new level at the specified position in the MultiIndex. - """ - # 参数验证 - if not isinstance(position, int): - raise TypeError("position must be an integer") - - if position < 0 or position > self.nlevels: - raise ValueError(f"position must be between 0 and {self.nlevels}") - - # 处理value参数 - from pandas.core.construction import extract_array - from pandas.core.indexes.base import ensure_index - - if not hasattr(value, '__iter__') or isinstance(value, str): - value = [value] * len(self) - else: - value = list(value) - if len(value) != len(self): - raise ValueError("Length of values must match length of index") - - # 简单实现:通过重建MultiIndex来插入level - tuples = list(self) - - # 在每个tuple的指定位置插入新值 - new_tuples = [] - for i, tup in enumerate(tuples): - if isinstance(tup, tuple): - new_tuple = list(tup) - new_tuple.insert(position, value[i]) - new_tuples.append(tuple(new_tuple)) - else: - # 如果当前不是tuple(单层索引的情况) - new_tuple = [tup] - new_tuple.insert(position, value[i]) - new_tuples.append(tuple(new_tuple)) - - # 修复:正确处理层级名称 - # 新插入的层级使用传入的name参数,如果没有传入则为None - # 原有层级的名称保持不变 - - if self.names is not None: - new_names = list(self.names) - else: - new_names = [None] * self.nlevels - - # 插入新层级的名称 - 使用传入的name参数 - new_names.insert(position, name) # 这里name可能是None,这正是测试期望的 - - # 创建新的MultiIndex,明确传递names参数 - from pandas import MultiIndex - return MultiIndex.from_tuples(new_tuples, names=new_names) - - - - - - - - - - - def __new__( cls, levels=None, @@ -4448,4 +4384,46 @@ def cartesian_product(X: list[np.ndarray]) -> list[np.ndarray]: for i, x in enumerate(X) ] + def insert_level(self, position: int, value, name=None): + """ + Insert a new level at the specified position in the MultiIndex. + """ + if not isinstance(position, int): + raise TypeError("position must be an integer") + + if position < 0 or position > self.nlevels: + raise ValueError(f"position must be between 0 and {self.nlevels}") + + from pandas.core.construction import extract_array + from pandas.core.indexes.base import ensure_index + + if not hasattr(value, '__iter__') or isinstance(value, str): + value = [value] * len(self) + else: + value = list(value) + if len(value) != len(self): + raise ValueError("Length of values must match length of index") + + tuples = list(self) + + new_tuples = [] + for i, tup in enumerate(tuples): + if isinstance(tup, tuple): + new_tuple = list(tup) + new_tuple.insert(position, value[i]) + new_tuples.append(tuple(new_tuple)) + else: + new_tuple = [tup] + new_tuple.insert(position, value[i]) + new_tuples.append(tuple(new_tuple)) + + if self.names is not None: + new_names = list(self.names) + else: + new_names = [None] * self.nlevels + + new_names.insert(position, name) + + from pandas import MultiIndex + return MultiIndex.from_tuples(new_tuples, names=new_names) diff --git a/pandas/tests/indexes/multi/test_insert_level.py b/pandas/tests/indexes/multi/test_insert_level.py index 9c69608e2b2aa..6c9855bb13946 100644 --- a/pandas/tests/indexes/multi/test_insert_level.py +++ b/pandas/tests/indexes/multi/test_insert_level.py @@ -6,101 +6,77 @@ class TestMultiIndexInsertLevel: - """测试MultiIndex.insert_level方法""" def setup_method(self): - """测试前置准备""" - # 创建基础测试数据 self.simple_idx = pd.MultiIndex.from_tuples( [('A', 1), ('B', 2), ('C', 3)], names=['level1', 'level2'] ) self.empty_idx = pd.MultiIndex.from_tuples([], names=['level1', 'level2']) def test_insert_level_basic(self): - """测试基本功能""" - # 在位置0插入新层级 result = self.simple_idx.insert_level(0, 'new_value') expected = pd.MultiIndex.from_tuples( [('new_value', 'A', 1), ('new_value', 'B', 2), ('new_value', 'C', 3)], - names=[None, 'level1', 'level2'] # 新插入的层级名称为None + names=[None, 'level1', 'level2'] ) tm.assert_index_equal(result, expected) - # 在位置1插入新层级 result = self.simple_idx.insert_level(1, 'middle') expected = pd.MultiIndex.from_tuples( [('A', 'middle', 1), ('B', 'middle', 2), ('C', 'middle', 3)], - names=['level1', None, 'level2'] # 新插入的层级名称为None - ) + names=['level1', None, 'level2'] tm.assert_index_equal(result, expected) def test_insert_level_with_different_values(self): - """测试插入不同值的层级""" new_values = ['X', 'Y', 'Z'] result = self.simple_idx.insert_level(1, new_values) expected = pd.MultiIndex.from_tuples( [('A', 'X', 1), ('B', 'Y', 2), ('C', 'Z', 3)], - names=['level1', None, 'level2'] # 新插入的层级名称为None + names=['level1', None, 'level2'] ) tm.assert_index_equal(result, expected) def test_insert_level_with_name(self): - """测试指定层级名称""" result = self.simple_idx.insert_level(0, 'new_val', name='new_level') assert result.names[0] == 'new_level' def test_insert_level_edge_positions(self): - """测试边界位置插入""" - # 在开始位置插入 result_start = self.simple_idx.insert_level(0, 'start') assert result_start.nlevels == 3 - # 在结束位置插入 result_end = self.simple_idx.insert_level(2, 'end') assert result_end.nlevels == 3 def test_insert_level_error_cases(self): - """测试错误情况""" - # 位置超出范围 with pytest.raises(ValueError, match="position must be between"): self.simple_idx.insert_level(5, 'invalid') - # 位置为负数 with pytest.raises(ValueError, match="position must be between"): self.simple_idx.insert_level(-1, 'invalid') - # 值长度不匹配 with pytest.raises(ValueError, match="Length of values must match"): self.simple_idx.insert_level(1, ['too', 'few']) def test_insert_level_with_different_data_types(self): - """测试不同数据类型""" - # 整数 result_int = self.simple_idx.insert_level(1, 100) - # 浮点数 result_float = self.simple_idx.insert_level(1, 1.5) - # None值 result_none = self.simple_idx.insert_level(1, None) - # 确保都能正常创建 assert result_int.nlevels == 3 assert result_float.nlevels == 3 assert result_none.nlevels == 3 def test_insert_level_preserves_original(self): - """测试原索引不被修改""" original = self.simple_idx.copy() result = self.simple_idx.insert_level(1, 'temp') - # 原索引应保持不变 tm.assert_index_equal(original, self.simple_idx) - # 新索引应有更多层级 + assert result.nlevels == original.nlevels + 1 def test_debug_names(): - """调试层级名称问题""" idx = pd.MultiIndex.from_tuples( [('A', 1), ('B', 2), ('C', 3)], names=['level1', 'level2'] @@ -110,9 +86,8 @@ def test_debug_names(): result = idx.insert_level(0, 'new_value') print("Result names:", result.names) - # 手动创建期望的结果 expected = pd.MultiIndex.from_tuples( [('new_value', 'A', 1), ('new_value', 'B', 2), ('new_value', 'C', 3)], - names=[None, 'level1', 'level2'] # 注意:新插入的层级名称应该是None + names=[None, 'level1', 'level2'] ) print("Expected names:", expected.names) \ No newline at end of file From 5b76304a718af8cb638e68a32089e0842aa5049b Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Tue, 7 Oct 2025 21:57:51 +0800 Subject: [PATCH 03/18] FEAT: Add insert_level method to MultiIndex - Implement insert_level method for MultiIndex to insert new levels at specified positions - Add comprehensive test cases for the new functionality - Fix level names handling to match expected behavior Resolves: MultiIndex level insertion feature request --- pandas/core/indexes/multi.py | 68 +++++++++++++++++++ .../tests/indexes/multi/test_insert_level.py | 1 + 2 files changed, 69 insertions(+) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index fe279f4060d16..452238f2f7e7f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2707,6 +2707,74 @@ def reorder_levels(self, order) -> MultiIndex: result = self._reorder_ilevels(order) return result + def insert_level(self, position: int, value, name=None) -> MultiIndex: + """ + Insert a new level at the specified position and return a new MultiIndex. + + Parameters + ---------- + position : int + The integer position where the new level should be inserted. + Must be between 0 and ``self.nlevels`` (inclusive). + value : scalar or sequence + Values for the inserted level. If a scalar is provided, it is + broadcast to the length of the index. If a sequence is provided, + it must be the same length as the index. + name : Hashable, default None + Name of the inserted level. If not provided, the inserted level + name will be ``None``. + + Returns + ------- + MultiIndex + A new ``MultiIndex`` with the inserted level. + + Examples + -------- + >>> idx = pd.MultiIndex.from_tuples([('A', 1), ('B', 2)], names=['x', 'y']) + >>> idx.insert_level(0, 'grp') + MultiIndex([('grp', 'A', 1), ('grp', 'B', 2)], + names=[None, 'x', 'y']) + >>> idx.insert_level(1, ['L1', 'L2'], name='z') + MultiIndex([('A', 'L1', 1), ('B', 'L2', 2)], + names=['x', 'z', 'y']) + """ + if not isinstance(position, int): + raise TypeError("position must be an integer") + + if position < 0 or position > self.nlevels: + raise ValueError(f"position must be between 0 and {self.nlevels}") + + if not hasattr(value, "__iter__") or isinstance(value, str): + value = [value] * len(self) + else: + value = list(value) + if len(value) != len(self): + raise ValueError("Length of values must match length of index") + + tuples = list(self) + + new_tuples = [] + for i, tup in enumerate(tuples): + if isinstance(tup, tuple): + new_tuple = list(tup) + new_tuple.insert(position, value[i]) + new_tuples.append(tuple(new_tuple)) + else: + new_tuple = [tup] + new_tuple.insert(position, value[i]) + new_tuples.append(tuple(new_tuple)) + + if self.names is not None: + new_names = list(self.names) + else: + new_names = [None] * self.nlevels + + new_names.insert(position, name) + + from pandas import MultiIndex + return MultiIndex.from_tuples(new_tuples, names=new_names) + def _reorder_ilevels(self, order) -> MultiIndex: if len(order) != self.nlevels: raise AssertionError( diff --git a/pandas/tests/indexes/multi/test_insert_level.py b/pandas/tests/indexes/multi/test_insert_level.py index 6c9855bb13946..6921efe454c4e 100644 --- a/pandas/tests/indexes/multi/test_insert_level.py +++ b/pandas/tests/indexes/multi/test_insert_level.py @@ -25,6 +25,7 @@ def test_insert_level_basic(self): expected = pd.MultiIndex.from_tuples( [('A', 'middle', 1), ('B', 'middle', 2), ('C', 'middle', 3)], names=['level1', None, 'level2'] + ) tm.assert_index_equal(result, expected) def test_insert_level_with_different_values(self): From 5f0caf01e2b0a08bd6933618fb4d1cd0421291ea Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Tue, 7 Oct 2025 22:22:49 +0800 Subject: [PATCH 04/18] ENH: Add insert_level method to MultiIndex with formatting fixes --- pandas/core/indexes/multi.py | 42 --------------- .../tests/indexes/multi/test_insert_level.py | 53 +++++++++---------- 2 files changed, 26 insertions(+), 69 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 452238f2f7e7f..fec245935c49e 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -4452,46 +4452,4 @@ def cartesian_product(X: list[np.ndarray]) -> list[np.ndarray]: for i, x in enumerate(X) ] - def insert_level(self, position: int, value, name=None): - """ - Insert a new level at the specified position in the MultiIndex. - """ - if not isinstance(position, int): - raise TypeError("position must be an integer") - - if position < 0 or position > self.nlevels: - raise ValueError(f"position must be between 0 and {self.nlevels}") - - from pandas.core.construction import extract_array - from pandas.core.indexes.base import ensure_index - - if not hasattr(value, '__iter__') or isinstance(value, str): - value = [value] * len(self) - else: - value = list(value) - if len(value) != len(self): - raise ValueError("Length of values must match length of index") - - tuples = list(self) - - new_tuples = [] - for i, tup in enumerate(tuples): - if isinstance(tup, tuple): - new_tuple = list(tup) - new_tuple.insert(position, value[i]) - new_tuples.append(tuple(new_tuple)) - else: - new_tuple = [tup] - new_tuple.insert(position, value[i]) - new_tuples.append(tuple(new_tuple)) - - if self.names is not None: - new_names = list(self.names) - else: - new_names = [None] * self.nlevels - - new_names.insert(position, name) - - from pandas import MultiIndex - return MultiIndex.from_tuples(new_tuples, names=new_names) diff --git a/pandas/tests/indexes/multi/test_insert_level.py b/pandas/tests/indexes/multi/test_insert_level.py index 6921efe454c4e..8169a32e8049b 100644 --- a/pandas/tests/indexes/multi/test_insert_level.py +++ b/pandas/tests/indexes/multi/test_insert_level.py @@ -1,62 +1,61 @@ import pytest + import pandas as pd -import numpy as np import pandas._testing as tm - class TestMultiIndexInsertLevel: def setup_method(self): self.simple_idx = pd.MultiIndex.from_tuples( - [('A', 1), ('B', 2), ('C', 3)], names=['level1', 'level2'] + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] ) - self.empty_idx = pd.MultiIndex.from_tuples([], names=['level1', 'level2']) + self.empty_idx = pd.MultiIndex.from_tuples([], names=["level1", "level2"]) def test_insert_level_basic(self): - result = self.simple_idx.insert_level(0, 'new_value') + result = self.simple_idx.insert_level(0, "new_value") expected = pd.MultiIndex.from_tuples( - [('new_value', 'A', 1), ('new_value', 'B', 2), ('new_value', 'C', 3)], - names=[None, 'level1', 'level2'] + [("new_value", "A", 1), ("new_value", "B", 2), ("new_value", "C", 3)], + names=[None, "level1", "level2"] ) tm.assert_index_equal(result, expected) - result = self.simple_idx.insert_level(1, 'middle') + result = self.simple_idx.insert_level(1, "middle") expected = pd.MultiIndex.from_tuples( - [('A', 'middle', 1), ('B', 'middle', 2), ('C', 'middle', 3)], - names=['level1', None, 'level2'] + [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], + names=["level1", None, "level2"] ) tm.assert_index_equal(result, expected) def test_insert_level_with_different_values(self): - new_values = ['X', 'Y', 'Z'] + new_values = ["X", "Y", "Z"] result = self.simple_idx.insert_level(1, new_values) expected = pd.MultiIndex.from_tuples( - [('A', 'X', 1), ('B', 'Y', 2), ('C', 'Z', 3)], - names=['level1', None, 'level2'] + [("A", "X", 1), ("B", "Y", 2), ("C", "Z", 3)], + names=["level1", None, "level2"] ) tm.assert_index_equal(result, expected) def test_insert_level_with_name(self): - result = self.simple_idx.insert_level(0, 'new_val', name='new_level') - assert result.names[0] == 'new_level' + result = self.simple_idx.insert_level(0, "new_val", name="new_level") + assert result.names[0] == "new_level" def test_insert_level_edge_positions(self): - result_start = self.simple_idx.insert_level(0, 'start') + result_start = self.simple_idx.insert_level(0, "start") assert result_start.nlevels == 3 - result_end = self.simple_idx.insert_level(2, 'end') + result_end = self.simple_idx.insert_level(2, "end") assert result_end.nlevels == 3 def test_insert_level_error_cases(self): with pytest.raises(ValueError, match="position must be between"): - self.simple_idx.insert_level(5, 'invalid') + self.simple_idx.insert_level(5, "invalid") with pytest.raises(ValueError, match="position must be between"): - self.simple_idx.insert_level(-1, 'invalid') + self.simple_idx.insert_level(-1, "invalid") with pytest.raises(ValueError, match="Length of values must match"): - self.simple_idx.insert_level(1, ['too', 'few']) + self.simple_idx.insert_level(1, ["too", "few"]) def test_insert_level_with_different_data_types(self): result_int = self.simple_idx.insert_level(1, 100) @@ -71,7 +70,7 @@ def test_insert_level_with_different_data_types(self): def test_insert_level_preserves_original(self): original = self.simple_idx.copy() - result = self.simple_idx.insert_level(1, 'temp') + result = self.simple_idx.insert_level(1, "temp") tm.assert_index_equal(original, self.simple_idx) @@ -79,16 +78,16 @@ def test_insert_level_preserves_original(self): def test_debug_names(): idx = pd.MultiIndex.from_tuples( - [('A', 1), ('B', 2), ('C', 3)], - names=['level1', 'level2'] + [("A", 1), ("B", 2), ("C", 3)], + names=["level1", "level2"] ) print("Original names:", idx.names) - result = idx.insert_level(0, 'new_value') + result = idx.insert_level(0, "new_value") print("Result names:", result.names) expected = pd.MultiIndex.from_tuples( - [('new_value', 'A', 1), ('new_value', 'B', 2), ('new_value', 'C', 3)], - names=[None, 'level1', 'level2'] + [("new_value", "A", 1), ("new_value", "B", 2), ("new_value", "C", 3)], + names=[None, "level1", "level2"] ) - print("Expected names:", expected.names) \ No newline at end of file + print("Expected names:", expected.names) From 97a98e579b30f9bbbd02fa6ad9551bf7d18d0583 Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Tue, 7 Oct 2025 22:29:25 +0800 Subject: [PATCH 05/18] STYLE: Format code with ruff --- pandas/core/indexes/multi.py | 9 ++++----- pandas/tests/indexes/multi/test_insert_level.py | 12 +++++------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index fec245935c49e..f70fae0e8053d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2731,11 +2731,11 @@ def insert_level(self, position: int, value, name=None) -> MultiIndex: Examples -------- - >>> idx = pd.MultiIndex.from_tuples([('A', 1), ('B', 2)], names=['x', 'y']) - >>> idx.insert_level(0, 'grp') + >>> idx = pd.MultiIndex.from_tuples([("A", 1), ("B", 2)], names=["x", "y"]) + >>> idx.insert_level(0, "grp") MultiIndex([('grp', 'A', 1), ('grp', 'B', 2)], names=[None, 'x', 'y']) - >>> idx.insert_level(1, ['L1', 'L2'], name='z') + >>> idx.insert_level(1, ["L1", "L2"], name="z") MultiIndex([('A', 'L1', 1), ('B', 'L2', 2)], names=['x', 'z', 'y']) """ @@ -2773,6 +2773,7 @@ def insert_level(self, position: int, value, name=None) -> MultiIndex: new_names.insert(position, name) from pandas import MultiIndex + return MultiIndex.from_tuples(new_tuples, names=new_names) def _reorder_ilevels(self, order) -> MultiIndex: @@ -4451,5 +4452,3 @@ def cartesian_product(X: list[np.ndarray]) -> list[np.ndarray]: ) for i, x in enumerate(X) ] - - diff --git a/pandas/tests/indexes/multi/test_insert_level.py b/pandas/tests/indexes/multi/test_insert_level.py index 8169a32e8049b..9b9a270aaeebb 100644 --- a/pandas/tests/indexes/multi/test_insert_level.py +++ b/pandas/tests/indexes/multi/test_insert_level.py @@ -5,7 +5,6 @@ class TestMultiIndexInsertLevel: - def setup_method(self): self.simple_idx = pd.MultiIndex.from_tuples( [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] @@ -16,14 +15,14 @@ def test_insert_level_basic(self): result = self.simple_idx.insert_level(0, "new_value") expected = pd.MultiIndex.from_tuples( [("new_value", "A", 1), ("new_value", "B", 2), ("new_value", "C", 3)], - names=[None, "level1", "level2"] + names=[None, "level1", "level2"], ) tm.assert_index_equal(result, expected) result = self.simple_idx.insert_level(1, "middle") expected = pd.MultiIndex.from_tuples( [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], - names=["level1", None, "level2"] + names=["level1", None, "level2"], ) tm.assert_index_equal(result, expected) @@ -32,7 +31,7 @@ def test_insert_level_with_different_values(self): result = self.simple_idx.insert_level(1, new_values) expected = pd.MultiIndex.from_tuples( [("A", "X", 1), ("B", "Y", 2), ("C", "Z", 3)], - names=["level1", None, "level2"] + names=["level1", None, "level2"], ) tm.assert_index_equal(result, expected) @@ -78,8 +77,7 @@ def test_insert_level_preserves_original(self): def test_debug_names(): idx = pd.MultiIndex.from_tuples( - [("A", 1), ("B", 2), ("C", 3)], - names=["level1", "level2"] + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] ) print("Original names:", idx.names) @@ -88,6 +86,6 @@ def test_debug_names(): expected = pd.MultiIndex.from_tuples( [("new_value", "A", 1), ("new_value", "B", 2), ("new_value", "C", 3)], - names=[None, "level1", "level2"] + names=[None, "level1", "level2"], ) print("Expected names:", expected.names) From 1a9ddc59762b7771eff7b42b191de0159a0a8902 Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Tue, 7 Oct 2025 23:35:07 +0800 Subject: [PATCH 06/18] FIX: Remove undefined pd reference --- pandas/tests/indexes/multi/test_constructors.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index a795097626d2e..b8ce4fb8d013b 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -873,11 +873,11 @@ def test_dtype_representation(using_infer_string): def test_insert_level_integration(): - idx = pd.MultiIndex.from_tuples([('A', 1), ('B', 2)]) + idx = MultiIndex.from_tuples([("A", 1), ("B", 2)]) - df = pd.DataFrame({'data': [10, 20]}, index=idx) - new_idx = idx.insert_level(0, 'group1') + df = pd.DataFrame({"data": [10, 20]}, index=idx) + new_idx = idx.insert_level(0, "group1") df_new = df.set_index(new_idx) assert df_new.index.nlevels == 3 - assert len(df_new) == 2 \ No newline at end of file + assert len(df_new) == 2 From 9e8676dd084f4d5454eabc06d673c19cb7a952f3 Mon Sep 17 00:00:00 2001 From: chi <1362186537@qq.com> Date: Sun, 12 Oct 2025 22:54:38 +0800 Subject: [PATCH 07/18] Update pandas/core/indexes/multi.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Álvaro Kothe --- pandas/core/indexes/multi.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f70fae0e8053d..e42be2282e081 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2772,7 +2772,6 @@ def insert_level(self, position: int, value, name=None) -> MultiIndex: new_names.insert(position, name) - from pandas import MultiIndex return MultiIndex.from_tuples(new_tuples, names=new_names) From 77f3af87b307239c87c21c8a0df00802a67f1d86 Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Tue, 14 Oct 2025 21:26:21 +0800 Subject: [PATCH 08/18] DOC: Add whatsnew entry for MultiIndex.insert_level --- doc/source/whatsnew/v3.0.0.rst | 3 +- pandas/core/indexes/multi.py | 1 - pandas/tests/frame/test_query_eval.py | 20 +-- .../tests/indexes/multi/test_insert_level.py | 164 +++++++++++------- 4 files changed, 105 insertions(+), 83 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 448ceffdaa1eb..ccf677ff45407 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -214,6 +214,7 @@ Other enhancements - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`) - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`) - Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`). +- Added :meth:`MultiIndex.insert_level` to insert new levels at specified positions in a MultiIndex (:issue:`62558`) - Added half-year offset classes :class:`HalfYearBegin`, :class:`HalfYearEnd`, :class:`BHalfYearBegin` and :class:`BHalfYearEnd` (:issue:`60928`) - Added support for ``axis=1`` with ``dict`` or :class:`Series` arguments into :meth:`DataFrame.fillna` (:issue:`4514`) - Added support to read and write from and to Apache Iceberg tables with the new :func:`read_iceberg` and :meth:`DataFrame.to_iceberg` functions (:issue:`61383`) @@ -228,7 +229,7 @@ Other enhancements - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) - Switched wheel upload to **PyPI Trusted Publishing** (OIDC) for release-tag pushes in ``wheels.yml``. (:issue:`61718`) -- + .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index b9044f5cd1e86..dce1ee750d51e 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2775,7 +2775,6 @@ def insert_level(self, position: int, value, name=None) -> MultiIndex: new_names.insert(position, name) - return MultiIndex.from_tuples(new_tuples, names=new_names) def _reorder_ilevels(self, order) -> MultiIndex: diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index b599be5d042fe..b31e8529b238b 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -160,21 +160,13 @@ def test_query_empty_string(self): df.query("") def test_query_duplicate_column_name(self, engine, parser): - df = DataFrame( - { - "A": range(3), - "B": range(3), - "C": range(3) - } - ).rename(columns={"B": "A"}) + df = DataFrame({"A": range(3), "B": range(3), "C": range(3)}).rename( + columns={"B": "A"} + ) res = df.query("C == 1", engine=engine, parser=parser) - expect = DataFrame( - [[1, 1, 1]], - columns=["A", "A", "C"], - index=[1] - ) + expect = DataFrame([[1, 1, 1]], columns=["A", "A", "C"], index=[1]) tm.assert_frame_equal(res, expect) @@ -1140,9 +1132,7 @@ def test_query_with_nested_special_character(self, parser, engine): [">=", operator.ge], ], ) - def test_query_lex_compare_strings( - self, parser, engine, op, func - ): + def test_query_lex_compare_strings(self, parser, engine, op, func): a = Series(np.random.default_rng(2).choice(list("abcde"), 20)) b = Series(np.arange(a.size)) df = DataFrame({"X": a, "Y": b}) diff --git a/pandas/tests/indexes/multi/test_insert_level.py b/pandas/tests/indexes/multi/test_insert_level.py index 9b9a270aaeebb..ec4c47763aa1a 100644 --- a/pandas/tests/indexes/multi/test_insert_level.py +++ b/pandas/tests/indexes/multi/test_insert_level.py @@ -5,87 +5,119 @@ class TestMultiIndexInsertLevel: - def setup_method(self): - self.simple_idx = pd.MultiIndex.from_tuples( + @pytest.mark.parametrize( + "position, value, name, expected_tuples, expected_names", + [ + ( + 0, + "new_value", + None, + [("new_value", "A", 1), ("new_value", "B", 2), ("new_value", "C", 3)], + [None, "level1", "level2"], + ), + ( + 1, + "middle", + None, + [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], + ["level1", None, "level2"], + ), + ( + 0, + "new_val", + "new_level", + [("new_val", "A", 1), ("new_val", "B", 2), ("new_val", "C", 3)], + ["new_level", "level1", "level2"], + ), + ( + 1, + "middle", + "custom_name", + [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], + ["level1", "custom_name", "level2"], + ), + ], + ) + def test_insert_level_basic( + self, position, value, name, expected_tuples, expected_names + ): + simple_idx = pd.MultiIndex.from_tuples( [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] ) - self.empty_idx = pd.MultiIndex.from_tuples([], names=["level1", "level2"]) - def test_insert_level_basic(self): - result = self.simple_idx.insert_level(0, "new_value") - expected = pd.MultiIndex.from_tuples( - [("new_value", "A", 1), ("new_value", "B", 2), ("new_value", "C", 3)], - names=[None, "level1", "level2"], - ) + result = simple_idx.insert_level(position, value, name=name) + expected = pd.MultiIndex.from_tuples(expected_tuples, names=expected_names) tm.assert_index_equal(result, expected) - result = self.simple_idx.insert_level(1, "middle") - expected = pd.MultiIndex.from_tuples( - [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], - names=["level1", None, "level2"], + @pytest.mark.parametrize( + "position, value", + [ + (0, "start"), + (2, "end"), + ], + ) + def test_insert_level_edge_positions(self, position, value): + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] ) - tm.assert_index_equal(result, expected) - def test_insert_level_with_different_values(self): - new_values = ["X", "Y", "Z"] - result = self.simple_idx.insert_level(1, new_values) - expected = pd.MultiIndex.from_tuples( - [("A", "X", 1), ("B", "Y", 2), ("C", "Z", 3)], - names=["level1", None, "level2"], + result = simple_idx.insert_level(position, value) + assert result.nlevels == 3 + + @pytest.mark.parametrize( + "position, value, expected_error", + [ + (5, "invalid", "position must be between"), + (-1, "invalid", "position must be between"), + (1, ["too", "few"], "Length of values must match"), + ], + ) + def test_insert_level_error_cases(self, position, value, expected_error): + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] ) - tm.assert_index_equal(result, expected) - - def test_insert_level_with_name(self): - result = self.simple_idx.insert_level(0, "new_val", name="new_level") - assert result.names[0] == "new_level" - - def test_insert_level_edge_positions(self): - result_start = self.simple_idx.insert_level(0, "start") - assert result_start.nlevels == 3 - - result_end = self.simple_idx.insert_level(2, "end") - assert result_end.nlevels == 3 - - def test_insert_level_error_cases(self): - with pytest.raises(ValueError, match="position must be between"): - self.simple_idx.insert_level(5, "invalid") - with pytest.raises(ValueError, match="position must be between"): - self.simple_idx.insert_level(-1, "invalid") + with pytest.raises(ValueError, match=expected_error): + simple_idx.insert_level(position, value) - with pytest.raises(ValueError, match="Length of values must match"): - self.simple_idx.insert_level(1, ["too", "few"]) - - def test_insert_level_with_different_data_types(self): - result_int = self.simple_idx.insert_level(1, 100) - - result_float = self.simple_idx.insert_level(1, 1.5) - - result_none = self.simple_idx.insert_level(1, None) + @pytest.mark.parametrize( + "value", + [100, 1.5, None], + ) + def test_insert_level_with_different_data_types(self, value): + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] + ) - assert result_int.nlevels == 3 - assert result_float.nlevels == 3 - assert result_none.nlevels == 3 + result = simple_idx.insert_level(1, value) + assert result.nlevels == 3 def test_insert_level_preserves_original(self): - original = self.simple_idx.copy() - result = self.simple_idx.insert_level(1, "temp") + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] + ) - tm.assert_index_equal(original, self.simple_idx) + original = simple_idx.copy() + simple_idx.insert_level(1, "temp") - assert result.nlevels == original.nlevels + 1 + tm.assert_index_equal(original, simple_idx) - def test_debug_names(): - idx = pd.MultiIndex.from_tuples( - [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] - ) - print("Original names:", idx.names) + def test_insert_level_empty_index(self): + empty_idx = pd.MultiIndex.from_tuples([], names=["level1", "level2"]) - result = idx.insert_level(0, "new_value") - print("Result names:", result.names) + result = empty_idx.insert_level(0, []) + assert result.nlevels == 3 + assert len(result) == 0 - expected = pd.MultiIndex.from_tuples( - [("new_value", "A", 1), ("new_value", "B", 2), ("new_value", "C", 3)], - names=[None, "level1", "level2"], - ) - print("Expected names:", expected.names) + def test_insert_level_with_different_values(self): + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] + ) + + new_values = ["X", "Y", "Z"] + result = simple_idx.insert_level(1, new_values) + expected = pd.MultiIndex.from_tuples( + [("A", "X", 1), ("B", "Y", 2), ("C", "Z", 3)], + names=["level1", None, "level2"], + ) + tm.assert_index_equal(result, expected) From 7bf306766a8e5b83d396bd1df9cbf33a35e89d2f Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Wed, 15 Oct 2025 10:58:15 +0800 Subject: [PATCH 09/18] TEST: Comprehensive consolidation of all test cases into parametrized test --- .../tests/indexes/multi/test_insert_level.py | 100 +++++++++++------- 1 file changed, 63 insertions(+), 37 deletions(-) diff --git a/pandas/tests/indexes/multi/test_insert_level.py b/pandas/tests/indexes/multi/test_insert_level.py index ec4c47763aa1a..f03b85f917698 100644 --- a/pandas/tests/indexes/multi/test_insert_level.py +++ b/pandas/tests/indexes/multi/test_insert_level.py @@ -36,6 +36,62 @@ class TestMultiIndexInsertLevel: [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], ["level1", "custom_name", "level2"], ), + ( + 0, + "start", + None, + [("start", "A", 1), ("start", "B", 2), ("start", "C", 3)], + [None, "level1", "level2"], + ), + ( + 2, + "end", + None, + [("A", 1, "end"), ("B", 2, "end"), ("C", 3, "end")], + ["level1", "level2", None], + ), + ( + 1, + 100, + None, + [("A", 100, 1), ("B", 100, 2), ("C", 100, 3)], + ["level1", None, "level2"], + ), + ( + 1, + 1.5, + None, + [("A", 1.5, 1), ("B", 1.5, 2), ("C", 1.5, 3)], + ["level1", None, "level2"], + ), + ( + 1, + None, + None, + [("A", None, 1), ("B", None, 2), ("C", None, 3)], + ["level1", None, "level2"], + ), + ( + 1, + ["X", "Y", "Z"], + None, + [("A", "X", 1), ("B", "Y", 2), ("C", "Z", 3)], + ["level1", None, "level2"], + ), + ( + 0, + "", + "empty_string", + [("", "A", 1), ("", "B", 2), ("", "C", 3)], + ["empty_string", "level1", "level2"], + ), + ( + 1, + True, + None, + [("A", True, 1), ("B", True, 2), ("C", True, 3)], + ["level1", None, "level2"], + ), ], ) def test_insert_level_basic( @@ -49,27 +105,13 @@ def test_insert_level_basic( expected = pd.MultiIndex.from_tuples(expected_tuples, names=expected_names) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize( - "position, value", - [ - (0, "start"), - (2, "end"), - ], - ) - def test_insert_level_edge_positions(self, position, value): - simple_idx = pd.MultiIndex.from_tuples( - [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] - ) - - result = simple_idx.insert_level(position, value) - assert result.nlevels == 3 - @pytest.mark.parametrize( "position, value, expected_error", [ (5, "invalid", "position must be between"), (-1, "invalid", "position must be between"), (1, ["too", "few"], "Length of values must match"), + (3, "value", "position must be between"), ], ) def test_insert_level_error_cases(self, position, value, expected_error): @@ -80,18 +122,6 @@ def test_insert_level_error_cases(self, position, value, expected_error): with pytest.raises(ValueError, match=expected_error): simple_idx.insert_level(position, value) - @pytest.mark.parametrize( - "value", - [100, 1.5, None], - ) - def test_insert_level_with_different_data_types(self, value): - simple_idx = pd.MultiIndex.from_tuples( - [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] - ) - - result = simple_idx.insert_level(1, value) - assert result.nlevels == 3 - def test_insert_level_preserves_original(self): simple_idx = pd.MultiIndex.from_tuples( [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] @@ -106,18 +136,14 @@ def test_insert_level_empty_index(self): empty_idx = pd.MultiIndex.from_tuples([], names=["level1", "level2"]) result = empty_idx.insert_level(0, []) - assert result.nlevels == 3 - assert len(result) == 0 + expected = pd.MultiIndex.from_tuples([], names=[None, "level1", "level2"]) + tm.assert_index_equal(result, expected) - def test_insert_level_with_different_values(self): - simple_idx = pd.MultiIndex.from_tuples( - [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] - ) + def test_insert_level_single_element(self): + single_idx = pd.MultiIndex.from_tuples([("A", 1)], names=["level1", "level2"]) - new_values = ["X", "Y", "Z"] - result = simple_idx.insert_level(1, new_values) + result = single_idx.insert_level(1, "middle") expected = pd.MultiIndex.from_tuples( - [("A", "X", 1), ("B", "Y", 2), ("C", "Z", 3)], - names=["level1", None, "level2"], + [("A", "middle", 1)], names=["level1", None, "level2"] ) tm.assert_index_equal(result, expected) From c4ecf7a6294446cf89de498ed7230477eada45fc Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Thu, 16 Oct 2025 00:15:37 +0800 Subject: [PATCH 10/18] FIX: Remove accidental binary file and update whatsnew --- doc/source/whatsnew/v3.0.0.rst | 2 +- fastparquet | Bin 1604 -> 0 bytes 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 fastparquet diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 6494948d28c18..df52706a1c4e3 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -229,7 +229,7 @@ Other enhancements - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) - Switched wheel upload to **PyPI Trusted Publishing** (OIDC) for release-tag pushes in ``wheels.yml``. (:issue:`61718`) - +- .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: diff --git a/fastparquet b/fastparquet deleted file mode 100644 index 90e1311582647a65c92628f6d4c82eceb7c8141d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1604 zcmb7F%Z}nk6fGXHSY$DZVgec#$eN%;p*zMvXBf?z-TZp1QsTS9n}u(=5-v=(7xqK11mF z7YO~9NTwM*_3PF6=Rbab&LZ}6wonHA89YoOmr0>`dv3tKtv$ z+!B0XalYc323Ia}uHqQra3kA>RS+tE2uq&YD-X;nH4S1`9)5&H%y&!4jRWl|%bjJ| z`t7MNmUSEMJd%!!TuWQ&v4Y6)c1IuxeIm;-LPb|X1!C_x!-ZJOHOa`_=N`a&Nc{+`66{~K=T`1L;Rqg@hKE>nlTj^EYQn|mF zZ^;nM7RGo44;kNMT*FNhw=p(x%cQ<$^90`z=bIybg#jjO7?g$LBE}m6z)VOkMS_t) z++V`f{7!W2NbXE(Q(?GPyOU6L2i9AwsT#YUuSoT!xLBE@+ncM7Zj08&6rK89mEMt< zNSC%jzlANWI-YCp2z+>RSg*B~cp+X60z&Dl@C>FEo!~Q}5$IASnhJ4ld3wwl z&7_J=VpVq%s4bFD_GD?hoC;6m?X*FAdq1s?>sp6s!j>-8J>Xx;*CNRc8Y>%P(%_A0 zlk$r0kk>4Vh4OSfhyDEQ8nku;OP1;jRTz7!qfxHOWZdcBnAY7#y0sm|w?nAi2Tj-- z#rYgXngnvYaXg2(d}Gm?JfRip%MTYLiol(CRf# zHOjW;4n0+ c^UmmXEgr68F}|qzLIHkD|NaD_EBOET4 Date: Thu, 16 Oct 2025 00:28:31 +0800 Subject: [PATCH 11/18] FIX: Revert accidental changes to test_query_eval.py --- pandas/tests/frame/test_query_eval.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index b31e8529b238b..b599be5d042fe 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -160,13 +160,21 @@ def test_query_empty_string(self): df.query("") def test_query_duplicate_column_name(self, engine, parser): - df = DataFrame({"A": range(3), "B": range(3), "C": range(3)}).rename( - columns={"B": "A"} - ) + df = DataFrame( + { + "A": range(3), + "B": range(3), + "C": range(3) + } + ).rename(columns={"B": "A"}) res = df.query("C == 1", engine=engine, parser=parser) - expect = DataFrame([[1, 1, 1]], columns=["A", "A", "C"], index=[1]) + expect = DataFrame( + [[1, 1, 1]], + columns=["A", "A", "C"], + index=[1] + ) tm.assert_frame_equal(res, expect) @@ -1132,7 +1140,9 @@ def test_query_with_nested_special_character(self, parser, engine): [">=", operator.ge], ], ) - def test_query_lex_compare_strings(self, parser, engine, op, func): + def test_query_lex_compare_strings( + self, parser, engine, op, func + ): a = Series(np.random.default_rng(2).choice(list("abcde"), 20)) b = Series(np.arange(a.size)) df = DataFrame({"X": a, "Y": b}) From 79e04b6769bef34b514b76dd66b4524a0d3be776 Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Mon, 20 Oct 2025 00:45:45 +0800 Subject: [PATCH 12/18] PERF: Optimize MultiIndex.insert_level to avoid unnecessary type conversions & REF: Remove unnecessary else branch in MultiIndex.insert_level & REF: Simplify names handling in MultiIndex.insert_level --- pandas/core/indexes/multi.py | 57 ++++++----------------------------- pyarrow | Bin 1604 -> 0 bytes 2 files changed, 9 insertions(+), 48 deletions(-) delete mode 100644 pyarrow diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f25c81168826f..cdfeb7c9ad796 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2710,38 +2710,7 @@ def reorder_levels(self, order) -> MultiIndex: result = self._reorder_ilevels(order) return result - def insert_level(self, position: int, value, name=None) -> MultiIndex: - """ - Insert a new level at the specified position and return a new MultiIndex. - - Parameters - ---------- - position : int - The integer position where the new level should be inserted. - Must be between 0 and ``self.nlevels`` (inclusive). - value : scalar or sequence - Values for the inserted level. If a scalar is provided, it is - broadcast to the length of the index. If a sequence is provided, - it must be the same length as the index. - name : Hashable, default None - Name of the inserted level. If not provided, the inserted level - name will be ``None``. - - Returns - ------- - MultiIndex - A new ``MultiIndex`` with the inserted level. - - Examples - -------- - >>> idx = pd.MultiIndex.from_tuples([("A", 1), ("B", 2)], names=["x", "y"]) - >>> idx.insert_level(0, "grp") - MultiIndex([('grp', 'A', 1), ('grp', 'B', 2)], - names=[None, 'x', 'y']) - >>> idx.insert_level(1, ["L1", "L2"], name="z") - MultiIndex([('A', 'L1', 1), ('B', 'L2', 2)], - names=['x', 'z', 'y']) - """ + def insert_level(self, position: int, value, name=None): if not isinstance(position, int): raise TypeError("position must be an integer") @@ -2755,25 +2724,17 @@ def insert_level(self, position: int, value, name=None) -> MultiIndex: if len(value) != len(self): raise ValueError("Length of values must match length of index") - tuples = list(self) - new_tuples = [] - for i, tup in enumerate(tuples): - if isinstance(tup, tuple): - new_tuple = list(tup) - new_tuple.insert(position, value[i]) - new_tuples.append(tuple(new_tuple)) + for i, tup in enumerate(self): + if position == 0: + new_tuple = (value[i],) + tup + elif position == len(tup): + new_tuple = tup + (value[i],) else: - new_tuple = [tup] - new_tuple.insert(position, value[i]) - new_tuples.append(tuple(new_tuple)) - - if self.names is not None: - new_names = list(self.names) - else: - new_names = [None] * self.nlevels + new_tuple = tup[:position] + (value[i],) + tup[position:] + new_tuples.append(new_tuple) - new_names.insert(position, name) + new_names = self.names[:position] + [name] + self.names[position:] return MultiIndex.from_tuples(new_tuples, names=new_names) diff --git a/pyarrow b/pyarrow deleted file mode 100644 index 90e1311582647a65c92628f6d4c82eceb7c8141d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1604 zcmb7F%Z}nk6fGXHSY$DZVgec#$eN%;p*zMvXBf?z-TZp1QsTS9n}u(=5-v=(7xqK11mF z7YO~9NTwM*_3PF6=Rbab&LZ}6wonHA89YoOmr0>`dv3tKtv$ z+!B0XalYc323Ia}uHqQra3kA>RS+tE2uq&YD-X;nH4S1`9)5&H%y&!4jRWl|%bjJ| z`t7MNmUSEMJd%!!TuWQ&v4Y6)c1IuxeIm;-LPb|X1!C_x!-ZJOHOa`_=N`a&Nc{+`66{~K=T`1L;Rqg@hKE>nlTj^EYQn|mF zZ^;nM7RGo44;kNMT*FNhw=p(x%cQ<$^90`z=bIybg#jjO7?g$LBE}m6z)VOkMS_t) z++V`f{7!W2NbXE(Q(?GPyOU6L2i9AwsT#YUuSoT!xLBE@+ncM7Zj08&6rK89mEMt< zNSC%jzlANWI-YCp2z+>RSg*B~cp+X60z&Dl@C>FEo!~Q}5$IASnhJ4ld3wwl z&7_J=VpVq%s4bFD_GD?hoC;6m?X*FAdq1s?>sp6s!j>-8J>Xx;*CNRc8Y>%P(%_A0 zlk$r0kk>4Vh4OSfhyDEQ8nku;OP1;jRTz7!qfxHOWZdcBnAY7#y0sm|w?nAi2Tj-- z#rYgXngnvYaXg2(d}Gm?JfRip%MTYLiol(CRf# zHOjW;4n0+ c^UmmXEgr68F}|qzLIHkD|NaD_EBOET4 Date: Mon, 20 Oct 2025 01:10:15 +0800 Subject: [PATCH 13/18] REF: Simplify tuple construction in MultiIndex.insert_level --- pandas/core/indexes/multi.py | 38 ++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index cdfeb7c9ad796..8650f89541326 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2711,6 +2711,36 @@ def reorder_levels(self, order) -> MultiIndex: return result def insert_level(self, position: int, value, name=None): + """ + Insert a new level at the specified position in the MultiIndex. + + Parameters + ---------- + position : int + The position at which to insert the new level (0-based). + value : scalar or array-like + Value(s) to use for the new level. If scalar, broadcast to all items. + If array-like, length must match the length of the index. + name : object, optional + Name for the new level. + + Returns + ------- + MultiIndex + New MultiIndex with the inserted level. + + Examples + -------- + >>> idx = pd.MultiIndex.from_tuples([("A", 1), ("B", 2)]) + >>> idx.insert_level(0, "new_value") + MultiIndex([('new_value', 'A', 1), ('new_value', 'B', 2)], ...) + + >>> idx.insert_level(1, ["X", "Y"]) + MultiIndex([('A', 'X', 1), ('B', 'Y', 2)], ...) + + >>> idx.insert_level(0, "new_val", name="new_level") + MultiIndex([('new_val', 'A', 1), ('new_val', 'B', 2)], ...) + """ if not isinstance(position, int): raise TypeError("position must be an integer") @@ -2725,13 +2755,9 @@ def insert_level(self, position: int, value, name=None): raise ValueError("Length of values must match length of index") new_tuples = [] + for i, tup in enumerate(self): - if position == 0: - new_tuple = (value[i],) + tup - elif position == len(tup): - new_tuple = tup + (value[i],) - else: - new_tuple = tup[:position] + (value[i],) + tup[position:] + new_tuple = tup[:position] + (value[i],) + tup[position:] new_tuples.append(new_tuple) new_names = self.names[:position] + [name] + self.names[position:] From eb320fbf4e8df2731a772e71d9c8e3b8b708a195 Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Wed, 22 Oct 2025 20:53:24 +0800 Subject: [PATCH 14/18] Add API reference documentation & Implement insert_level using levels/codes operations --- doc/source/reference/indexing.rst | 1 + pandas/core/indexes/multi.py | 45 +-- .../tests/indexes/multi/test_constructors.py | 11 - .../tests/indexes/multi/test_insert_level.py | 268 +++++++++--------- 4 files changed, 161 insertions(+), 164 deletions(-) diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst index 93f88db0843dc..e9035a1a9a1e0 100644 --- a/doc/source/reference/indexing.rst +++ b/doc/source/reference/indexing.rst @@ -294,6 +294,7 @@ MultiIndex components MultiIndex.copy MultiIndex.append MultiIndex.truncate + MultiIndex.insert_level MultiIndex selecting ~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 8650f89541326..7f0d2e31c93fd 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2710,7 +2710,9 @@ def reorder_levels(self, order) -> MultiIndex: result = self._reorder_ilevels(order) return result - def insert_level(self, position: int, value, name=None): + def insert_level( + self, position: int, value, name: Hashable = lib.no_default + ) -> MultiIndex: """ Insert a new level at the specified position in the MultiIndex. @@ -2718,11 +2720,11 @@ def insert_level(self, position: int, value, name=None): ---------- position : int The position at which to insert the new level (0-based). - value : scalar or array-like - Value(s) to use for the new level. If scalar, broadcast to all items. - If array-like, length must match the length of the index. - name : object, optional - Name for the new level. + Must be between 0 and nlevels (inclusive). + value : array-like + Values to use for the new level. Length must match the length of the index. + name : Hashable, default lib.no_default + Name for the new level. If not provided, the new level will have no name. Returns ------- @@ -2732,14 +2734,11 @@ def insert_level(self, position: int, value, name=None): Examples -------- >>> idx = pd.MultiIndex.from_tuples([("A", 1), ("B", 2)]) - >>> idx.insert_level(0, "new_value") + >>> idx.insert_level(0, ["new_value", "new_value"]) MultiIndex([('new_value', 'A', 1), ('new_value', 'B', 2)], ...) >>> idx.insert_level(1, ["X", "Y"]) MultiIndex([('A', 'X', 1), ('B', 'Y', 2)], ...) - - >>> idx.insert_level(0, "new_val", name="new_level") - MultiIndex([('new_val', 'A', 1), ('new_val', 'B', 2)], ...) """ if not isinstance(position, int): raise TypeError("position must be an integer") @@ -2747,22 +2746,28 @@ def insert_level(self, position: int, value, name=None): if position < 0 or position > self.nlevels: raise ValueError(f"position must be between 0 and {self.nlevels}") + if name is lib.no_default: + name = None + if not hasattr(value, "__iter__") or isinstance(value, str): - value = [value] * len(self) - else: - value = list(value) - if len(value) != len(self): - raise ValueError("Length of values must match length of index") + raise TypeError("value must be an array-like object") - new_tuples = [] + value = list(value) + if len(value) != len(self): + raise ValueError("Length of values must match length of index") - for i, tup in enumerate(self): - new_tuple = tup[:position] + (value[i],) + tup[position:] - new_tuples.append(new_tuple) + new_level = Index(value) + new_codes_for_level = new_level.get_indexer(value) + new_levels = self.levels[:position] + [new_level] + self.levels[position:] + new_codes = ( + self.codes[:position] + [new_codes_for_level] + self.codes[position:] + ) new_names = self.names[:position] + [name] + self.names[position:] - return MultiIndex.from_tuples(new_tuples, names=new_names) + return MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) def _reorder_ilevels(self, order) -> MultiIndex: if len(order) != self.nlevels: diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index d72c499ee4b39..6b461fcf3920d 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -870,14 +870,3 @@ def test_dtype_representation(using_infer_string): dtype=object, ) tm.assert_series_equal(result, expected) - - -def test_insert_level_integration(): - idx = MultiIndex.from_tuples([("A", 1), ("B", 2)]) - - df = pd.DataFrame({"data": [10, 20]}, index=idx) - new_idx = idx.insert_level(0, "group1") - df_new = df.set_index(new_idx) - - assert df_new.index.nlevels == 3 - assert len(df_new) == 2 diff --git a/pandas/tests/indexes/multi/test_insert_level.py b/pandas/tests/indexes/multi/test_insert_level.py index f03b85f917698..b0faed8638210 100644 --- a/pandas/tests/indexes/multi/test_insert_level.py +++ b/pandas/tests/indexes/multi/test_insert_level.py @@ -4,146 +4,148 @@ import pandas._testing as tm -class TestMultiIndexInsertLevel: - @pytest.mark.parametrize( - "position, value, name, expected_tuples, expected_names", - [ - ( - 0, - "new_value", - None, - [("new_value", "A", 1), ("new_value", "B", 2), ("new_value", "C", 3)], - [None, "level1", "level2"], - ), - ( - 1, - "middle", - None, - [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], - ["level1", None, "level2"], - ), - ( - 0, - "new_val", - "new_level", - [("new_val", "A", 1), ("new_val", "B", 2), ("new_val", "C", 3)], - ["new_level", "level1", "level2"], - ), - ( - 1, - "middle", - "custom_name", - [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], - ["level1", "custom_name", "level2"], - ), - ( - 0, - "start", - None, - [("start", "A", 1), ("start", "B", 2), ("start", "C", 3)], - [None, "level1", "level2"], - ), - ( - 2, - "end", - None, - [("A", 1, "end"), ("B", 2, "end"), ("C", 3, "end")], - ["level1", "level2", None], - ), - ( - 1, - 100, - None, - [("A", 100, 1), ("B", 100, 2), ("C", 100, 3)], - ["level1", None, "level2"], - ), - ( - 1, - 1.5, - None, - [("A", 1.5, 1), ("B", 1.5, 2), ("C", 1.5, 3)], - ["level1", None, "level2"], - ), - ( - 1, - None, - None, - [("A", None, 1), ("B", None, 2), ("C", None, 3)], - ["level1", None, "level2"], - ), - ( - 1, - ["X", "Y", "Z"], - None, - [("A", "X", 1), ("B", "Y", 2), ("C", "Z", 3)], - ["level1", None, "level2"], - ), - ( - 0, - "", - "empty_string", - [("", "A", 1), ("", "B", 2), ("", "C", 3)], - ["empty_string", "level1", "level2"], - ), - ( - 1, - True, - None, - [("A", True, 1), ("B", True, 2), ("C", True, 3)], - ["level1", None, "level2"], - ), - ], +@pytest.mark.parametrize( + "position, value, name, expected_tuples, expected_names", + [ + ( + 0, + ["new_value"] * 3, + None, + [("new_value", "A", 1), ("new_value", "B", 2), ("new_value", "C", 3)], + [None, "level1", "level2"], + ), + ( + 1, + ["middle"] * 3, + None, + [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], + ["level1", None, "level2"], + ), + ( + 0, + ["new_val"] * 3, + "new_level", + [("new_val", "A", 1), ("new_val", "B", 2), ("new_val", "C", 3)], + ["new_level", "level1", "level2"], + ), + ( + 1, + ["middle"] * 3, + "custom_name", + [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], + ["level1", "custom_name", "level2"], + ), + ( + 0, + ["start"] * 3, + None, + [("start", "A", 1), ("start", "B", 2), ("start", "C", 3)], + [None, "level1", "level2"], + ), + ( + 2, + ["end"] * 3, + None, + [("A", 1, "end"), ("B", 2, "end"), ("C", 3, "end")], + ["level1", "level2", None], + ), + ( + 1, + [100, 100, 100], + None, + [("A", 100, 1), ("B", 100, 2), ("C", 100, 3)], + ["level1", None, "level2"], + ), + ( + 1, + [1.5, 1.5, 1.5], + None, + [("A", 1.5, 1), ("B", 1.5, 2), ("C", 1.5, 3)], + ["level1", None, "level2"], + ), + ( + 1, + [None, None, None], + None, + [("A", None, 1), ("B", None, 2), ("C", None, 3)], + ["level1", None, "level2"], + ), + ( + 1, + ["X", "Y", "Z"], + None, + [("A", "X", 1), ("B", "Y", 2), ("C", "Z", 3)], + ["level1", None, "level2"], + ), + ( + 0, + [""] * 3, + "empty_string", + [("", "A", 1), ("", "B", 2), ("", "C", 3)], + ["empty_string", "level1", "level2"], + ), + ( + 1, + [True, True, True], + None, + [("A", True, 1), ("B", True, 2), ("C", True, 3)], + ["level1", None, "level2"], + ), + ], +) +def test_insert_level_basic(position, value, name, expected_tuples, expected_names): + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] ) - def test_insert_level_basic( - self, position, value, name, expected_tuples, expected_names - ): - simple_idx = pd.MultiIndex.from_tuples( - [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] - ) - - result = simple_idx.insert_level(position, value, name=name) - expected = pd.MultiIndex.from_tuples(expected_tuples, names=expected_names) - tm.assert_index_equal(result, expected) - - @pytest.mark.parametrize( - "position, value, expected_error", - [ - (5, "invalid", "position must be between"), - (-1, "invalid", "position must be between"), - (1, ["too", "few"], "Length of values must match"), - (3, "value", "position must be between"), - ], + + result = simple_idx.insert_level(position, value, name=name) + expected = pd.MultiIndex.from_tuples(expected_tuples, names=expected_names) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "position, value, expected_error", + [ + (5, ["invalid"] * 3, "position must be between"), + (-1, ["invalid"] * 3, "position must be between"), + (1, ["too", "few"], "Length of values must match"), + (3, ["value"] * 3, "position must be between"), + (0, "scalar_value", "value must be an array-like object"), + ], +) +def test_insert_level_error_cases(position, value, expected_error): + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] + ) + + with pytest.raises(ValueError, match=expected_error): + simple_idx.insert_level(position, value) + + +def test_insert_level_preserves_original(): + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] ) - def test_insert_level_error_cases(self, position, value, expected_error): - simple_idx = pd.MultiIndex.from_tuples( - [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] - ) - with pytest.raises(ValueError, match=expected_error): - simple_idx.insert_level(position, value) + original = simple_idx.copy() + simple_idx.insert_level(1, ["temp"] * 3) - def test_insert_level_preserves_original(self): - simple_idx = pd.MultiIndex.from_tuples( - [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] - ) + tm.assert_index_equal(original, simple_idx) - original = simple_idx.copy() - simple_idx.insert_level(1, "temp") - tm.assert_index_equal(original, simple_idx) +def test_insert_level_empty_index(): + empty_idx = pd.MultiIndex.from_tuples([], names=["level1", "level2"]) - def test_insert_level_empty_index(self): - empty_idx = pd.MultiIndex.from_tuples([], names=["level1", "level2"]) + result = empty_idx.insert_level(0, []) + expected = pd.MultiIndex.from_tuples([], names=[None, "level1", "level2"]) + tm.assert_index_equal(result, expected) - result = empty_idx.insert_level(0, []) - expected = pd.MultiIndex.from_tuples([], names=[None, "level1", "level2"]) - tm.assert_index_equal(result, expected) - def test_insert_level_single_element(self): - single_idx = pd.MultiIndex.from_tuples([("A", 1)], names=["level1", "level2"]) +def test_insert_level_single_element(): + single_idx = pd.MultiIndex.from_tuples([("A", 1)], names=["level1", "level2"]) - result = single_idx.insert_level(1, "middle") - expected = pd.MultiIndex.from_tuples( - [("A", "middle", 1)], names=["level1", None, "level2"] - ) - tm.assert_index_equal(result, expected) + result = single_idx.insert_level(1, ["middle"]) + expected = pd.MultiIndex.from_tuples( + [("A", "middle", 1)], names=["level1", None, "level2"] + ) + tm.assert_index_equal(result, expected) From 9f80ad12e3b6da9677bda19d5954afff8364849c Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Wed, 22 Oct 2025 23:51:32 +0800 Subject: [PATCH 15/18] all changes without levels/codes operations --- pandas/core/indexes/multi.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 7f0d2e31c93fd..5943c90b8d969 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2750,24 +2750,21 @@ def insert_level( name = None if not hasattr(value, "__iter__") or isinstance(value, str): - raise TypeError("value must be an array-like object") + raise ValueError("value must be an array-like object") value = list(value) if len(value) != len(self): raise ValueError("Length of values must match length of index") - new_level = Index(value) - new_codes_for_level = new_level.get_indexer(value) + # 简洁可靠的实现 + new_tuples = [] + for i, tup in enumerate(self): + new_tuple = tup[:position] + (value[i],) + tup[position:] + new_tuples.append(new_tuple) - new_levels = self.levels[:position] + [new_level] + self.levels[position:] - new_codes = ( - self.codes[:position] + [new_codes_for_level] + self.codes[position:] - ) new_names = self.names[:position] + [name] + self.names[position:] - return MultiIndex( - levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False - ) + return MultiIndex.from_tuples(new_tuples, names=new_names) def _reorder_ilevels(self, order) -> MultiIndex: if len(order) != self.nlevels: From a8d626a819f28628edd201ff811398f46e308e2e Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Thu, 23 Oct 2025 13:22:55 +0800 Subject: [PATCH 16/18] add see also --- pandas/core/indexes/multi.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 5943c90b8d969..f08efa8418625 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2731,6 +2731,12 @@ def insert_level( MultiIndex New MultiIndex with the inserted level. + See Also + -------- + MultiIndex.droplevel : Remove levels from the MultiIndex. + MultiIndex.swaplevel : Swap two levels in the MultiIndex. + MultiIndex.reorder_levels : Reorder levels using specified order. + Examples -------- >>> idx = pd.MultiIndex.from_tuples([("A", 1), ("B", 2)]) From 10ecc1f099214a7762c33c8ee37d38b127582a0d Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Tue, 28 Oct 2025 20:30:06 +0800 Subject: [PATCH 17/18] Change from_tuple to factorize_from_iterable --- pandas/core/indexes/multi.py | 30 +++++++++++-------- .../tests/indexes/multi/test_insert_level.py | 12 ++++++-- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f08efa8418625..c3e458c662ed6 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -93,6 +93,7 @@ ExtensionArray, ) from pandas.core.arrays.categorical import ( + factorize_from_iterable, factorize_from_iterables, recode_for_categories, ) @@ -2755,22 +2756,27 @@ def insert_level( if name is lib.no_default: name = None - if not hasattr(value, "__iter__") or isinstance(value, str): - raise ValueError("value must be an array-like object") - - value = list(value) - if len(value) != len(self): - raise ValueError("Length of values must match length of index") + if not (is_list_like(value) and len(value) == len(self)): + raise ValueError( + "value must be an array-like object of the same length as self" + ) - # 简洁可靠的实现 - new_tuples = [] - for i, tup in enumerate(self): - new_tuple = tup[:position] + (value[i],) + tup[position:] - new_tuples.append(new_tuple) + if all(val is None for val in value): + new_level = Index([], dtype="object") + new_codes = [-1] * len(value) + else: + new_codes, new_level = factorize_from_iterable(value) + new_levels = self.levels[:position] + [new_level] + self.levels[position:] + new_codes_list = self.codes[:position] + [new_codes] + self.codes[position:] new_names = self.names[:position] + [name] + self.names[position:] - return MultiIndex.from_tuples(new_tuples, names=new_names) + return MultiIndex( + levels=new_levels, + codes=new_codes_list, + names=new_names, + verify_integrity=False, + ) def _reorder_ilevels(self, order) -> MultiIndex: if len(order) != self.nlevels: diff --git a/pandas/tests/indexes/multi/test_insert_level.py b/pandas/tests/indexes/multi/test_insert_level.py index b0faed8638210..7b231d1d87ad7 100644 --- a/pandas/tests/indexes/multi/test_insert_level.py +++ b/pandas/tests/indexes/multi/test_insert_level.py @@ -108,9 +108,17 @@ def test_insert_level_basic(position, value, name, expected_tuples, expected_nam [ (5, ["invalid"] * 3, "position must be between"), (-1, ["invalid"] * 3, "position must be between"), - (1, ["too", "few"], "Length of values must match"), + ( + 1, + ["too", "few"], + "value must be an array-like object of the same length as self", + ), (3, ["value"] * 3, "position must be between"), - (0, "scalar_value", "value must be an array-like object"), + ( + 0, + "scalar_value", + "value must be an array-like object of the same length as self", + ), ], ) def test_insert_level_error_cases(position, value, expected_error): From 4b2bb505e754d0b4fc7bb56200f6e275cb04cd81 Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Tue, 28 Oct 2025 21:17:58 +0800 Subject: [PATCH 18/18] for type annotation and mypy fixes --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f2bbd8f8ab0af..451464fd8392c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2864,7 +2864,7 @@ def insert_level( if all(val is None for val in value): new_level = Index([], dtype="object") - new_codes = [-1] * len(value) + new_codes = np.full(len(value), -1, dtype=np.intp) else: new_codes, new_level = factorize_from_iterable(value)