TST: Improve runtime of some unit tests (#62968)

mroeschke · web-flow · commit 930b66d6b50c · 2025-11-05T14:30:02.000-08:00
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
@@ -104,17 +104,21 @@ def _eval_single_bin(lhs, cmp1, rhs, engine):
     ids=["DataFrame", "Series", "SeriesNaN", "DataFrameNaN", "float"],
 )
 def lhs(request):
-    nan_df1 = DataFrame(np.random.default_rng(2).standard_normal((10, 5)))
-    nan_df1[nan_df1 > 0.5] = np.nan
-
-    opts = (
-        DataFrame(np.random.default_rng(2).standard_normal((10, 5))),
-        Series(np.random.default_rng(2).standard_normal(5)),
-        Series([1, 2, np.nan, np.nan, 5]),
-        nan_df1,
-        np.random.default_rng(2).standard_normal(),
-    )
-    return opts[request.param]
+    rng = np.random.default_rng(2)
+    if request.param == 0:
+        return DataFrame(rng.standard_normal((10, 5)))
+    elif request.param == 1:
+        return Series(rng.standard_normal(5))
+    elif request.param == 2:
+        return Series([1, 2, np.nan, np.nan, 5])
+    elif request.param == 3:
+        nan_df1 = DataFrame(rng.standard_normal((10, 5)))
+        nan_df1[nan_df1 > 0.5] = np.nan
+        return nan_df1
+    elif request.param == 4:
+        return rng.standard_normal()
+    else:
+        raise ValueError(f"{request.param}")
 
 
 rhs = lhs
diff --git a/pandas/tests/indexing/multiindex/test_indexing_slow.py b/pandas/tests/indexing/multiindex/test_indexing_slow.py
@@ -71,6 +71,7 @@ def b(df, cols):
     return df.drop_duplicates(subset=cols[:-1])
 
 
+@pytest.mark.slow
 @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
 @pytest.mark.parametrize("lexsort_depth", list(range(5)))
 @pytest.mark.parametrize("frame_fixture", ["a", "b"])
diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py
@@ -229,17 +229,21 @@ def test_chunks_have_consistent_numerical_type(all_parsers, monkeypatch):
     assert result.a.dtype == float
 
 
-def test_warn_if_chunks_have_mismatched_type(all_parsers, using_infer_string):
+def test_warn_if_chunks_have_mismatched_type(
+    all_parsers, using_infer_string, monkeypatch
+):
     warning_type = None
     parser = all_parsers
-    size = 10000
+    heuristic = 2**3
+    size = 10
 
     # see gh-3866: if chunks are different types and can't
     # be coerced using numerical types, then issue warning.
     if parser.engine == "c" and parser.low_memory:
         warning_type = DtypeWarning
-        # Use larger size to hit warning path
-        size = 499999
+        # Use a size to hit warning path dictated by DEFAULT_BUFFER_HEURISTIC
+        # monkeypatched below
+        size = heuristic - 1
 
     integers = [str(i) for i in range(size)]
     data = "a\n" + "\n".join(integers + ["a", "b"] + integers)
@@ -251,12 +255,14 @@ def test_warn_if_chunks_have_mismatched_type(all_parsers, using_infer_string):
             buf,
         )
     else:
-        df = parser.read_csv_check_warnings(
-            warning_type,
-            r"Columns \(0: a\) have mixed types. "
-            "Specify dtype option on import or set low_memory=False.",
-            buf,
-        )
+        with monkeypatch.context() as m:
+            m.setattr(libparsers, "DEFAULT_BUFFER_HEURISTIC", heuristic)
+            df = parser.read_csv_check_warnings(
+                warning_type,
+                r"Columns \(0: a\) have mixed types. "
+                "Specify dtype option on import or set low_memory=False.",
+                buf,
+            )
     if parser.engine == "c" and parser.low_memory:
         assert df.a.dtype == object
     elif using_infer_string:
@@ -295,30 +301,6 @@ def test_empty_with_nrows_chunksize(all_parsers, iterator):
     tm.assert_frame_equal(result, expected)
 
 
-def test_read_csv_memory_growth_chunksize(temp_file, all_parsers):
-    # see gh-24805
-    #
-    # Let's just make sure that we don't crash
-    # as we iteratively process all chunks.
-    parser = all_parsers
-
-    with open(temp_file, "w", encoding="utf-8") as f:
-        for i in range(1000):
-            f.write(str(i) + "\n")
-
-    if parser.engine == "pyarrow":
-        msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
-        with pytest.raises(ValueError, match=msg):
-            with parser.read_csv(temp_file, chunksize=20) as result:
-                for _ in result:
-                    pass
-        return
-
-    with parser.read_csv(temp_file, chunksize=20) as result:
-        for _ in result:
-            pass
-
-
 def test_chunksize_with_usecols_second_block_shorter(all_parsers):
     # GH#21211
     parser = all_parsers
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
@@ -265,12 +265,11 @@ def test_bad_date_parse(all_parsers, cache, value):
     )
 
 
-@pytest.mark.parametrize("value", ["0"])
-def test_bad_date_parse_with_warning(all_parsers, cache, value):
+def test_bad_date_parse_with_warning(all_parsers, cache):
     # if we have an invalid date make sure that we handle this with
     # and w/o the cache properly.
     parser = all_parsers
-    s = StringIO((f"{value},\n") * 50000)
+    s = StringIO(("0,\n") * (start_caching_at + 1))
 
     if parser.engine == "pyarrow":
         # pyarrow reads "0" as 0 (of type int64), and so
diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
@@ -247,7 +247,7 @@ def test_get_state(self, table_type, dtype):
         assert "n_buckets" in state
         assert "upper_bound" in state
 
-    @pytest.mark.parametrize("N", range(1, 110))
+    @pytest.mark.parametrize("N", range(1, 110, 4))
     def test_no_reallocation(self, table_type, dtype, N):
         keys = np.arange(N).astype(dtype)
         preallocated_table = table_type(N)
@@ -517,7 +517,7 @@ def test_tracemalloc_for_empty_StringHashTable():
         assert get_allocated_khash_memory() == 0
 
 
-@pytest.mark.parametrize("N", range(1, 110))
+@pytest.mark.parametrize("N", range(1, 110, 4))
 def test_no_reallocation_StringHashTable(N):
     keys = np.arange(N).astype(np.str_).astype(np.object_)
     preallocated_table = ht.StringHashTable(N)
diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py
@@ -4,11 +4,9 @@
     date,
     datetime,
 )
-import gc
 import itertools
 import re
 import string
-import weakref
 
 import numpy as np
 import pytest
@@ -2173,15 +2171,15 @@ def test_memory_leak(self, kind):
                 index=date_range("2000-01-01", periods=10, freq="B"),
             )
 
-        # Use a weakref so we can see if the object gets collected without
-        # also preventing it from being collected
-        ref = weakref.ref(df.plot(kind=kind, **args))
-
-        # have matplotlib delete all the figures
-        plt.close("all")
-        # force a garbage collection
-        gc.collect()
-        assert ref() is None
+        ax = df.plot(kind=kind, **args)
+        # https://github.com/pandas-dev/pandas/issues/9003#issuecomment-70544889
+        if kind in ["line", "area"]:
+            for i, (cached_data, _, _) in enumerate(ax._plot_data):
+                ser = df.iloc[:, i]
+                assert not tm.shares_memory(ser, cached_data)
+                tm.assert_numpy_array_equal(ser._values, cached_data._values)
+        else:
+            assert not hasattr(ax, "_plot_data")
 
     def test_df_gridspec_patterns_vert_horiz(self):
         # GH 10819
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
@@ -526,7 +526,7 @@ def test_nearest_upsample_with_limit(tz_aware_fixture, freq, rule, unit):
 
 
 def test_resample_ohlc(unit):
-    index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="Min")
+    index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 2), freq="Min")
     s = Series(range(len(index)), index=index)
     s.index.name = "index"
     s.index = s.index.as_unit(unit)
@@ -1842,7 +1842,7 @@ def test_resample_equivalent_offsets(n1, freq1, n2, freq2, k, unit):
     # GH 24127
     n1_ = n1 * k
     n2_ = n2 * k
-    dti = date_range("1991-09-05", "1991-09-12", freq=freq1).as_unit(unit)
+    dti = date_range("1991-09-05", "1991-09-06", freq=freq1).as_unit(unit)
     ser = Series(range(len(dti)), index=dti)
 
     result1 = ser.resample(str(n1_) + freq1).mean()
diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py
@@ -130,7 +130,7 @@ def test_selection(self, freq, kwargs):
     def test_annual_upsample_cases(
         self, offset, period, conv, meth, month, simple_period_range_series
     ):
-        ts = simple_period_range_series("1/1/1990", "12/31/1991", freq=f"Y-{month}")
+        ts = simple_period_range_series("1/1/1990", "12/31/1990", freq=f"Y-{month}")
         warn = FutureWarning if period == "B" else None
         msg = r"PeriodDtype\[B\] is deprecated"
         with tm.assert_produces_warning(warn, match=msg):
@@ -214,7 +214,7 @@ def test_quarterly_upsample(
         self, month, offset, period, convention, simple_period_range_series
     ):
         freq = f"Q-{month}"
-        ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq)
+        ts = simple_period_range_series("1/1/1990", "12/31/1991", freq=freq)
         warn = FutureWarning if period == "B" else None
         msg = r"PeriodDtype\[B\] is deprecated"
         with tm.assert_produces_warning(warn, match=msg):
@@ -396,8 +396,7 @@ def test_fill_method_and_how_upsample(self):
     @pytest.mark.parametrize("convention", ["start", "end"])
     def test_weekly_upsample(self, day, target, convention, simple_period_range_series):
         freq = f"W-{day}"
-        ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq)
-
+        ts = simple_period_range_series("1/1/1990", "07/31/1990", freq=freq)
         warn = None if target == "D" else FutureWarning
         msg = r"PeriodDtype\[B\] is deprecated"
         with tm.assert_produces_warning(warn, match=msg):