From e21b8d54f822b80bcc4b07e373ecb187e1a34faa Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 2 Nov 2025 19:29:11 -0800 Subject: [PATCH 1/3] Make _merge_blocks lazy --- pandas/core/internals/managers.py | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 40ba74fed49d1..8d94606660639 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1938,7 +1938,7 @@ def _consolidate_check(self) -> None: def _consolidate_inplace(self) -> None: if not self.is_consolidated(): - self.blocks = _consolidate(self.blocks) + self.blocks = tuple(_consolidate(self.blocks)) self._is_consolidated = True self._known_consolidated = True self._rebuild_blknos_and_blklocs() @@ -2430,30 +2430,21 @@ def _stack_arrays(tuples, dtype: np.dtype): return stacked, placement -def _consolidate(blocks: tuple[Block, ...]) -> tuple[Block, ...]: +def _consolidate(blocks: tuple[Block, ...]) -> Generator[Block]: """ Merge blocks having same dtype, exclude non-consolidating blocks """ # sort by _can_consolidate, dtype gkey = lambda x: x._consolidate_key grouper = itertools.groupby(sorted(blocks, key=gkey), gkey) - - new_blocks: list[Block] = [] - for (_can_consolidate, dtype), group_blocks in grouper: - merged_blocks, _ = _merge_blocks( - list(group_blocks), dtype=dtype, can_consolidate=_can_consolidate - ) - new_blocks = extend_blocks(merged_blocks, new_blocks) - return tuple(new_blocks) + for (_can_consolidate, _), group_blocks in grouper: + yield from _merge_blocks(tuple(group_blocks), can_consolidate=_can_consolidate) -def _merge_blocks( - blocks: list[Block], dtype: DtypeObj, can_consolidate: bool -) -> tuple[list[Block], bool]: +def _merge_blocks(blocks: tuple[Block], can_consolidate: bool) -> Generator[Block]: if len(blocks) == 1: - return blocks, False - - if can_consolidate: + yield from blocks + elif can_consolidate: # TODO: optimization potential in case all mgrs contain slices and # combination of those slices is a slice, too. new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks]) @@ -2476,10 +2467,10 @@ def _merge_blocks( new_mgr_locs = new_mgr_locs[argsort] bp = BlockPlacement(new_mgr_locs) - return [new_block_2d(new_values, placement=bp)], True - - # can't consolidate --> no merge - return blocks, False + yield new_block_2d(new_values, placement=bp) + else: + # can't consolidate --> no merge + yield from blocks def _preprocess_slice_or_indexer( From beb6649913e44bb9185d33d60b9cfaf535f98a3c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 3 Nov 2025 12:08:03 -0800 Subject: [PATCH 2/3] Fix typing checks --- pandas/core/internals/managers.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 8d94606660639..5cfc3d4782a95 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -2441,7 +2441,7 @@ def _consolidate(blocks: tuple[Block, ...]) -> Generator[Block]: yield from _merge_blocks(tuple(group_blocks), can_consolidate=_can_consolidate) -def _merge_blocks(blocks: tuple[Block], can_consolidate: bool) -> Generator[Block]: +def _merge_blocks(blocks: tuple[Block, ...], can_consolidate: bool) -> Generator[Block]: if len(blocks) == 1: yield from blocks elif can_consolidate: @@ -2452,11 +2452,7 @@ def _merge_blocks(blocks: tuple[Block], can_consolidate: bool) -> Generator[Bloc new_values: ArrayLike if isinstance(blocks[0].dtype, np.dtype): - # error: List comprehension has incompatible type List[Union[ndarray, - # ExtensionArray]]; expected List[Union[complex, generic, - # Sequence[Union[int, float, complex, str, bytes, generic]], - # Sequence[Sequence[Any]], SupportsArray]] - new_values = np.vstack([b.values for b in blocks]) # type: ignore[misc] + new_values = np.vstack([b.values for b in blocks]) else: bvals = [blk.values for blk in blocks] bvals2 = cast(Sequence[NDArrayBackedExtensionArray], bvals) From 96e5d3f1f24e9365c62162037a8ee74539b3f8d6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 3 Nov 2025 13:12:46 -0800 Subject: [PATCH 3/3] Add back type ignore --- pandas/core/internals/managers.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5cfc3d4782a95..a347478413ec7 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -2452,7 +2452,11 @@ def _merge_blocks(blocks: tuple[Block, ...], can_consolidate: bool) -> Generator new_values: ArrayLike if isinstance(blocks[0].dtype, np.dtype): - new_values = np.vstack([b.values for b in blocks]) + # error: List comprehension has incompatible type List[Union[ndarray, + # ExtensionArray]]; expected List[Union[complex, generic, + # Sequence[Union[int, float, complex, str, bytes, generic]], + # Sequence[Sequence[Any]], SupportsArray]] + new_values = np.vstack([b.values for b in blocks]) # type: ignore[misc] else: bvals = [blk.values for blk in blocks] bvals2 = cast(Sequence[NDArrayBackedExtensionArray], bvals)