Address reviewer feedback on autofilter PR

antznette1 · antznette1 · commit 59f2bc46350d · 2025-11-02T02:37:57.000+01:00
- Remove duplicate to_excel function code in generic.py
- Add NotImplementedError for odfpy engine when autofilter=True
- Remove broad exception handling from autofilter implementations
- Add comprehensive tests for nonzero startrow/startcol
- Add tests for MultiIndex columns with merge_cells=True and False
- Improve tests to verify each column has autofilter
- Remove redundant test_to_excel test
- Remove redundant pytest.importorskip from test functions
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -2313,149 +2313,6 @@ def to_excel(
         if not isinstance(excel_writer, ExcelWriter):
             # we need to close the writer if we created it
             excel_writer.close()
-    ) -> None:
-        """
-        Write {klass} to an Excel sheet.
-
-        To write a single {klass} to an Excel .xlsx file it is only necessary to
-        specify a target file name. To write to multiple sheets it is necessary to
-        create an `ExcelWriter` object with a target file name, and specify a sheet
-        in the file to write to.
-
-        Multiple sheets may be written to by specifying unique `sheet_name`.
-        With all data written to the file it is necessary to save the changes.
-        Note that creating an `ExcelWriter` object with a file name that already
-        exists will result in the contents of the existing file being erased.
-
-        Parameters
-        ----------
-        excel_writer : path-like, file-like, or ExcelWriter object
-            File path or existing ExcelWriter.
-        sheet_name : str, default 'Sheet1'
-            Name of sheet which will contain DataFrame.
-        na_rep : str, default ''
-            Missing data representation.
-        float_format : str, optional
-            Format string for floating point numbers. For example
-            ``float_format="%.2f"`` will format 0.1234 to 0.12.
-        columns : sequence or list of str, optional
-            Columns to write.
-        header : bool or list of str, default True
-            Write out the column names. If a list of string is given it is
-            assumed to be aliases for the column names.
-        index : bool, default True
-            Write row names (index).
-        index_label : str or sequence, optional
-            Column label for index column(s) if desired. If not specified, and
-            `header` and `index` are True, then the index names are used. A
-            sequence should be given if the DataFrame uses MultiIndex.
-        startrow : int, default 0
-            Upper left cell row to dump data frame.
-        startcol : int, default 0
-            Upper left cell column to dump data frame.
-        engine : str, optional
-            Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this
-            via the options ``io.excel.xlsx.writer`` or
-            ``io.excel.xlsm.writer``.
-
-        merge_cells : bool or 'columns', default False
-            If True, write MultiIndex index and columns as merged cells.
-            If 'columns', merge MultiIndex column cells only.
-        {encoding_parameter}
-        inf_rep : str, default 'inf'
-            Representation for infinity (there is no native representation for
-            infinity in Excel).
-        {verbose_parameter}
-        freeze_panes : tuple of int (length 2), optional
-            Specifies the one-based bottommost row and rightmost column that
-            is to be frozen.
-        {storage_options}
-
-            .. versionadded:: {storage_options_versionadded}
-        {extra_parameters}
-        See Also
-        --------
-        to_csv : Write DataFrame to a comma-separated values (csv) file.
-        ExcelWriter : Class for writing DataFrame objects into excel sheets.
-        read_excel : Read an Excel file into a pandas DataFrame.
-        read_csv : Read a comma-separated values (csv) file into DataFrame.
-        io.formats.style.Styler.to_excel : Add styles to Excel sheet.
-
-        Notes
-        -----
-        For compatibility with :meth:`~DataFrame.to_csv`,
-        to_excel serializes lists and dicts to strings before writing.
-
-        Once a workbook has been saved it is not possible to write further
-        data without rewriting the whole workbook.
-
-        pandas will check the number of rows, columns,
-        and cell character count does not exceed Excel's limitations.
-        All other limitations must be checked by the user.
-
-        Examples
-        --------
-
-        Create, write to and save a workbook:
-
-        >>> df1 = pd.DataFrame(
-        ...     [["a", "b"], ["c", "d"]],
-        ...     index=["row 1", "row 2"],
-        ...     columns=["col 1", "col 2"],
-        ... )
-        >>> df1.to_excel("output.xlsx")  # doctest: +SKIP
-
-        To specify the sheet name:
-
-        >>> df1.to_excel("output.xlsx", sheet_name="Sheet_name_1")  # doctest: +SKIP
-
-        If you wish to write to more than one sheet in the workbook, it is
-        necessary to specify an ExcelWriter object:
-
-        >>> df2 = df1.copy()
-        >>> with pd.ExcelWriter("output.xlsx") as writer:  # doctest: +SKIP
-        ...     df1.to_excel(writer, sheet_name="Sheet_name_1")
-        ...     df2.to_excel(writer, sheet_name="Sheet_name_2")
-
-        ExcelWriter can also be used to append to an existing Excel file:
-
-        >>> with pd.ExcelWriter("output.xlsx", mode="a") as writer:  # doctest: +SKIP
-        ...     df1.to_excel(writer, sheet_name="Sheet_name_3")
-
-        To set the library that is used to write the Excel file,
-        you can pass the `engine` keyword (the default engine is
-        automatically chosen depending on the file extension):
-
-        >>> df1.to_excel("output1.xlsx", engine="xlsxwriter")  # doctest: +SKIP
-        """
-        if engine_kwargs is None:
-            engine_kwargs = {}
-
-        df = self if isinstance(self, ABCDataFrame) else self.to_frame()
-
-        from pandas.io.formats.excel import ExcelFormatter
-
-        formatter = ExcelFormatter(
-            df,
-            na_rep=na_rep,
-            cols=columns,
-            header=header,
-            float_format=float_format,
-            index=index,
-            index_label=index_label,
-            merge_cells=merge_cells,
-            inf_rep=inf_rep,
-        )
-        formatter.write(
-            excel_writer,
-            sheet_name=sheet_name,
-            startrow=startrow,
-            startcol=startcol,
-            freeze_panes=freeze_panes,
-            engine=engine,
-            storage_options=storage_options,
-            engine_kwargs=engine_kwargs,
-        )
 
     @final
     @doc(
diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py
@@ -99,10 +99,17 @@ def _write_cells(
         startrow: int = 0,
         startcol: int = 0,
         freeze_panes: tuple[int, int] | None = None,
+        autofilter: bool = False,
     ) -> None:
         """
         Write the frame cells using odf
         """
+        if autofilter:
+            raise NotImplementedError(
+                "Autofilter is not supported with the 'odf' engine. "
+                "Please use 'openpyxl' or 'xlsxwriter' engine instead."
+            )
+
         from odf.table import (
             Table,
             TableCell,
diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
@@ -552,15 +552,12 @@ def _write_cells(
                                 setattr(xcell, k, v)
 
         if autofilter and min_r is not None and min_c is not None and max_r is not None and max_c is not None:
-            try:
-                # Convert numeric bounds to Excel-style range e.g. A1:D10
-                from openpyxl.utils import get_column_letter
-
-                start_ref = f"{get_column_letter(min_c)}{min_r}"
-                end_ref = f"{get_column_letter(max_c)}{max_r}"
-                wks.auto_filter.ref = f"{start_ref}:{end_ref}"
-            except Exception:
-                pass
+            # Convert numeric bounds to Excel-style range e.g. A1:D10
+            from openpyxl.utils import get_column_letter
+
+            start_ref = f"{get_column_letter(min_c)}{min_r}"
+            end_ref = f"{get_column_letter(max_c)}{max_r}"
+            wks.auto_filter.ref = f"{start_ref}:{end_ref}"
 
 
 class OpenpyxlReader(BaseExcelReader["Workbook"]):
diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py
@@ -304,8 +304,4 @@ def _write_cells(
 
         if autofilter and min_r is not None and min_c is not None and max_r is not None and max_c is not None:
             # Apply autofilter over the used range. xlsxwriter uses 0-based indices.
-            try:
-                wks.autofilter(min_r, min_c, max_r, max_c)
-            except Exception:
-                # Be resilient if engine version doesn't support or range invalid
-                pass
+            wks.autofilter(min_r, min_c, max_r, max_c)
diff --git a/pandas/tests/io/excel/test_odswriter.py b/pandas/tests/io/excel/test_odswriter.py
@@ -104,3 +104,13 @@ def test_cell_value_type(
         cell = sheet_cells[0]
         assert cell.attributes.get((OFFICENS, "value-type")) == cell_value_type
         assert cell.attributes.get((OFFICENS, cell_value_attribute)) == cell_value
+
+
+def test_to_excel_autofilter_odfpy_raises(tmp_excel):
+    # Test that autofilter=True raises NotImplementedError with odfpy engine
+    from pandas import DataFrame
+
+    df = DataFrame({"A": [1, 2], "B": [3, 4]})
+    msg = "Autofilter is not supported with the 'odf' engine"
+    with pytest.raises(NotImplementedError, match=msg):
+        df.to_excel(tmp_excel, engine="odf", autofilter=True)
diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py
@@ -165,6 +165,74 @@ def test_to_excel_autofilter_openpyxl(tmp_excel):
         # Expect filter over the full range, e.g. A1:B3 (header + 2 rows)
         assert ws.auto_filter is not None
         assert ws.auto_filter.ref is not None
+        # Verify filter covers all columns (A and B)
+        assert "A" in ws.auto_filter.ref
+        assert "B" in ws.auto_filter.ref
+
+
+def test_to_excel_autofilter_startrow_startcol_openpyxl(tmp_excel):
+    # Test autofilter with nonzero startrow and startcol
+    df = DataFrame({"A": [1, 2], "B": [3, 4]})
+    df.to_excel(
+        tmp_excel,
+        engine="openpyxl",
+        index=False,
+        autofilter=True,
+        startrow=2,
+        startcol=1,
+    )
+
+    with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb:
+        ws = wb[wb.sheetnames[0]]
+        assert ws.auto_filter is not None
+        assert ws.auto_filter.ref is not None
+        # Filter should be offset by startrow=2 and startcol=1 (B3:D5)
+        assert ws.auto_filter.ref.startswith("B")
+        assert "3" in ws.auto_filter.ref
+
+
+def test_to_excel_autofilter_multiindex_merge_cells_openpyxl(tmp_excel):
+    # Test autofilter with MultiIndex columns and merge_cells=True
+    df = DataFrame(
+        [[1, 2, 3, 4], [5, 6, 7, 8]],
+        columns=pd.MultiIndex.from_tuples(
+            [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")]
+        ),
+    )
+    df.to_excel(
+        tmp_excel,
+        engine="openpyxl",
+        index=False,
+        autofilter=True,
+        merge_cells=True,
+    )
+
+    with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb:
+        ws = wb[wb.sheetnames[0]]
+        assert ws.auto_filter is not None
+        assert ws.auto_filter.ref is not None
+
+
+def test_to_excel_autofilter_multiindex_no_merge_openpyxl(tmp_excel):
+    # Test autofilter with MultiIndex columns and merge_cells=False
+    df = DataFrame(
+        [[1, 2, 3, 4], [5, 6, 7, 8]],
+        columns=pd.MultiIndex.from_tuples(
+            [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")]
+        ),
+    )
+    df.to_excel(
+        tmp_excel,
+        engine="openpyxl",
+        index=False,
+        autofilter=True,
+        merge_cells=False,
+    )
+
+    with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb:
+        ws = wb[wb.sheetnames[0]]
+        assert ws.auto_filter is not None
+        assert ws.auto_filter.ref is not None
 
 
 @pytest.mark.parametrize("kwarg_name", ["read_only", "data_only"])
diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py
@@ -86,18 +86,93 @@ def test_book_and_sheets_consistent(tmp_excel):
         assert writer.sheets == {"test_name": sheet}
 
 
-def test_to_excel(tmp_excel):
-    DataFrame([[1, 2]]).to_excel(tmp_excel)
-
-
 def test_to_excel_autofilter_xlsxwriter(tmp_excel):
-    pytest.importorskip("xlsxwriter")
     openpyxl = pytest.importorskip("openpyxl")
 
     df = DataFrame({"A": [1, 2], "B": [3, 4]})
     # Write with xlsxwriter, verify via openpyxl that an autofilter exists
     df.to_excel(tmp_excel, engine="xlsxwriter", index=False, autofilter=True)
 
+    wb = openpyxl.load_workbook(tmp_excel)
+    try:
+        ws = wb[wb.sheetnames[0]]
+        assert ws.auto_filter is not None
+        assert ws.auto_filter.ref is not None
+        # Verify filter covers all columns (A and B)
+        assert "A" in ws.auto_filter.ref
+        assert "B" in ws.auto_filter.ref
+    finally:
+        wb.close()
+
+
+def test_to_excel_autofilter_startrow_startcol_xlsxwriter(tmp_excel):
+    openpyxl = pytest.importorskip("openpyxl")
+
+    df = DataFrame({"A": [1, 2], "B": [3, 4]})
+    df.to_excel(
+        tmp_excel,
+        engine="xlsxwriter",
+        index=False,
+        autofilter=True,
+        startrow=2,
+        startcol=1,
+    )
+
+    wb = openpyxl.load_workbook(tmp_excel)
+    try:
+        ws = wb[wb.sheetnames[0]]
+        assert ws.auto_filter is not None
+        assert ws.auto_filter.ref is not None
+        # Filter should be offset by startrow=2 and startcol=1 (B3:D5)
+        assert ws.auto_filter.ref.startswith("B")
+        assert "3" in ws.auto_filter.ref
+    finally:
+        wb.close()
+
+
+def test_to_excel_autofilter_multiindex_merge_cells_xlsxwriter(tmp_excel):
+    openpyxl = pytest.importorskip("openpyxl")
+
+    df = DataFrame(
+        [[1, 2, 3, 4], [5, 6, 7, 8]],
+        columns=pd.MultiIndex.from_tuples(
+            [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")]
+        ),
+    )
+    df.to_excel(
+        tmp_excel,
+        engine="xlsxwriter",
+        index=False,
+        autofilter=True,
+        merge_cells=True,
+    )
+
+    wb = openpyxl.load_workbook(tmp_excel)
+    try:
+        ws = wb[wb.sheetnames[0]]
+        assert ws.auto_filter is not None
+        assert ws.auto_filter.ref is not None
+    finally:
+        wb.close()
+
+
+def test_to_excel_autofilter_multiindex_no_merge_xlsxwriter(tmp_excel):
+    openpyxl = pytest.importorskip("openpyxl")
+
+    df = DataFrame(
+        [[1, 2, 3, 4], [5, 6, 7, 8]],
+        columns=pd.MultiIndex.from_tuples(
+            [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")]
+        ),
+    )
+    df.to_excel(
+        tmp_excel,
+        engine="xlsxwriter",
+        index=False,
+        autofilter=True,
+        merge_cells=False,
+    )
+
     wb = openpyxl.load_workbook(tmp_excel)
     try:
         ws = wb[wb.sheetnames[0]]