STY: Add strict=True in zip() in \core, \tests and \scripts (#62984)

invain01 · web-flow · commit 842a2c61eff5 · 2025-11-05T10:10:16.000-08:00
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -1248,7 +1248,7 @@ def _maybe_restore_index_levels(self, result: DataFrame) -> None:
         """
         names_to_restore = []
         for name, left_key, right_key in zip(
-            self.join_names, self.left_on, self.right_on
+            self.join_names, self.left_on, self.right_on, strict=True
         ):
             if (
                 # Argument 1 to "_is_level_reference" of "NDFrame" has incompatible
@@ -1281,7 +1281,7 @@ def _maybe_add_join_keys(
 
         assert all(isinstance(x, _known) for x in self.left_join_keys)
 
-        keys = zip(self.join_names, self.left_on, self.right_on)
+        keys = zip(self.join_names, self.left_on, self.right_on, strict=True)
         for i, (name, lname, rname) in enumerate(keys):
             if not _should_fill(lname, rname):
                 continue
@@ -1590,7 +1590,7 @@ def _get_merge_keys(
 
         # ugh, spaghetti re #733
         if _any(self.left_on) and _any(self.right_on):
-            for lk, rk in zip(self.left_on, self.right_on):
+            for lk, rk in zip(self.left_on, self.right_on, strict=True):
                 lk = extract_array(lk, extract_numpy=True)
                 rk = extract_array(rk, extract_numpy=True)
                 if is_lkey(lk):
@@ -1653,7 +1653,7 @@ def _get_merge_keys(
                 right_keys = [
                     lev._values.take(lev_codes)
                     for lev, lev_codes in zip(
-                        self.right.index.levels, self.right.index.codes
+                        self.right.index.levels, self.right.index.codes, strict=True
                     )
                 ]
             else:
@@ -1675,7 +1675,7 @@ def _get_merge_keys(
                 left_keys = [
                     lev._values.take(lev_codes)
                     for lev, lev_codes in zip(
-                        self.left.index.levels, self.left.index.codes
+                        self.left.index.levels, self.left.index.codes, strict=True
                     )
                 ]
             else:
@@ -1692,7 +1692,7 @@ def _maybe_coerce_merge_keys(self) -> None:
         # or if we have object and integer dtypes
 
         for lk, rk, name in zip(
-            self.left_join_keys, self.right_join_keys, self.join_names
+            self.left_join_keys, self.right_join_keys, self.join_names, strict=True
         ):
             if (len(lk) and not len(rk)) or (not len(lk) and len(rk)):
                 continue
@@ -2084,7 +2084,7 @@ def get_join_indexers(
             _factorize_keys(left_keys[n], right_keys[n], sort=sort)
             for n in range(len(left_keys))
         )
-        zipped = zip(*mapped)
+        zipped = zip(*mapped, strict=True)
         llab, rlab, shape = (list(x) for x in zipped)
 
         # get flat i8 keys from label lists
@@ -2469,7 +2469,7 @@ def _check_dtype_match(left: ArrayLike, right: ArrayLike, i: int) -> None:
                 raise MergeError(msg)
 
         # validate index types are the same
-        for i, (lk, rk) in enumerate(zip(left_join_keys, right_join_keys)):
+        for i, (lk, rk) in enumerate(zip(left_join_keys, right_join_keys, strict=True)):
             _check_dtype_match(lk, rk, i)
 
         if self.left_index:
@@ -2654,7 +2654,7 @@ def _get_multiindex_indexer(
         _factorize_keys(index.levels[n]._values, join_keys[n], sort=sort)
         for n in range(index.nlevels)
     )
-    zipped = zip(*mapped)
+    zipped = zip(*mapped, strict=True)
     rcodes, lcodes, shape = (list(x) for x in zipped)
     if sort:
         rcodes = list(map(np.take, rcodes, index.codes))
diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py
@@ -183,7 +183,7 @@ def test_plot_title(self):
 
     @pytest.mark.slow
     def test_plot_multiindex(self):
-        tuples = zip(string.ascii_letters[:10], range(10))
+        tuples = zip(string.ascii_letters[:10], range(10), strict=True)
         df = DataFrame(
             np.random.default_rng(2).random((10, 3)),
             index=MultiIndex.from_tuples(tuples),
@@ -513,7 +513,7 @@ def test_negative_log(self):
 
     def _compare_stacked_y_cood(self, normal_lines, stacked_lines):
         base = np.zeros(len(normal_lines[0].get_data()[1]))
-        for nl, sl in zip(normal_lines, stacked_lines):
+        for nl, sl in zip(normal_lines, stacked_lines, strict=True):
             base += nl.get_data()[1]  # get y coordinates
             sy = sl.get_data()[1]
             tm.assert_numpy_array_equal(base, sy)
@@ -920,7 +920,10 @@ def test_scatterplot_color_by_categorical(self, ordered, categories):
 
         expected_yticklabels = categories
         result_yticklabels = [i.get_text() for i in colorbar.ax.get_ymajorticklabels()]
-        assert all(i == j for i, j in zip(result_yticklabels, expected_yticklabels))
+        assert all(
+            i == j
+            for i, j in zip(result_yticklabels, expected_yticklabels, strict=True)
+        )
 
     @pytest.mark.parametrize("x, y", [("x", "y"), ("y", "x"), ("y", "y")])
     def test_plot_scatter_with_categorical_data(self, x, y):
@@ -1131,7 +1134,7 @@ def test_boxplot_vertical_subplots(self, hist_df):
         )
         _check_axes_shape(axes, axes_num=3, layout=(1, 3))
         _check_ax_scales(axes, xaxis="log")
-        for ax, label in zip(axes, labels):
+        for ax, label in zip(axes, labels, strict=True):
             _check_text_labels(ax.get_yticklabels(), [label])
             assert len(ax.lines) == 7
 
@@ -1258,7 +1261,13 @@ def test_hist_weights(self, weight_shape):
         # GH 33173
         weights = 0.1 * np.ones(shape=weight_shape)
         df = DataFrame(
-            dict(zip(["A", "B"], np.random.default_rng(2).standard_normal((2, 100))))
+            dict(
+                zip(
+                    ["A", "B"],
+                    np.random.default_rng(2).standard_normal((2, 100)),
+                    strict=True,
+                )
+            )
         )
 
         ax1 = _check_plot_works(df.plot, kind="hist", weights=weights)
@@ -1679,7 +1688,7 @@ def test_pie_df_subplots(self):
         assert len(axes) == len(df.columns)
         for ax in axes:
             _check_text_labels(ax.texts, df.index)
-        for ax, ylabel in zip(axes, df.columns):
+        for ax, ylabel in zip(axes, df.columns, strict=True):
             assert ax.get_ylabel() == ""
 
     def test_pie_df_labels_colors(self):
@@ -2381,7 +2390,7 @@ def test_x_string_values_ticks(self):
         ax = df.plot.area(x="day")
         ax.set_xlim(-1, 3)
         xticklabels = [t.get_text() for t in ax.get_xticklabels()]
-        labels_position = dict(zip(xticklabels, ax.get_xticks()))
+        labels_position = dict(zip(xticklabels, ax.get_xticks(), strict=False))
         # Testing if the label stayed at the right position
         assert labels_position["Monday"] == 0.0
         assert labels_position["Tuesday"] == 1.0
@@ -2399,7 +2408,7 @@ def test_x_multiindex_values_ticks(self):
         ax = df.plot()
         ax.set_xlim(-1, 4)
         xticklabels = [t.get_text() for t in ax.get_xticklabels()]
-        labels_position = dict(zip(xticklabels, ax.get_xticks()))
+        labels_position = dict(zip(xticklabels, ax.get_xticks(), strict=False))
         # Testing if the label stayed at the right position
         assert labels_position["(2012, 1)"] == 0.0
         assert labels_position["(2012, 2)"] == 1.0
@@ -2475,7 +2484,7 @@ def test_group_subplot(self, kind):
         assert len(axes) == 3  # 2 groups + single column a
 
         expected_labels = (["b", "e"], ["c", "d"], ["a"])
-        for ax, labels in zip(axes, expected_labels):
+        for ax, labels in zip(axes, expected_labels, strict=True):
             if kind != "pie":
                 _check_legend_labels(ax, labels=labels)
             if kind == "line":
diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py
@@ -3249,14 +3249,14 @@ def test_timedelta_tolerance_nearest(self, unit):
             )
 
         left = pd.DataFrame(
-            list(zip([0, 5, 10, 15, 20, 25], [0, 1, 2, 3, 4, 5])),
+            list(zip([0, 5, 10, 15, 20, 25], [0, 1, 2, 3, 4, 5], strict=True)),
             columns=["time", "left"],
         )
 
         left["time"] = pd.to_timedelta(left["time"], "ms").astype(f"m8[{unit}]")
 
         right = pd.DataFrame(
-            list(zip([0, 3, 9, 12, 15, 18], [0, 1, 2, 3, 4, 5])),
+            list(zip([0, 3, 9, 12, 15, 18], [0, 1, 2, 3, 4, 5], strict=True)),
             columns=["time", "right"],
         )
 
@@ -3268,6 +3268,7 @@ def test_timedelta_tolerance_nearest(self, unit):
                     [0, 5, 10, 15, 20, 25],
                     [0, 1, 2, 3, 4, 5],
                     [0, np.nan, 2, 4, np.nan, np.nan],
+                    strict=True,
                 )
             ),
             columns=["time", "left", "right"],
diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py
@@ -266,7 +266,9 @@ def has_wrong_whitespace(first_line: str, second_line: str) -> bool:
 
     tokens: list = list(tokenize.generate_tokens(file_obj.readline))
 
-    for first_token, second_token, third_token in zip(tokens, tokens[1:], tokens[2:]):
+    for first_token, second_token, third_token in zip(
+        tokens, tokens[1:], tokens[2:], strict=False
+    ):
         # Checking if we are in a block of concated string
         if (
             first_token.type == third_token.type == token.STRING

Original file line number	Diff line number	Diff line change
`@@ -3249,14 +3249,14 @@ def test_timedelta_tolerance_nearest(self, unit):`
`3249`	`3249`	`)`
`3250`	`3250`
`3251`	`3251`	`left = pd.DataFrame(`
`3252`		`- list(zip([0, 5, 10, 15, 20, 25], [0, 1, 2, 3, 4, 5])),`
	`3252`	`+ list(zip([0, 5, 10, 15, 20, 25], [0, 1, 2, 3, 4, 5], strict=True)),`
`3253`	`3253`	`columns=["time", "left"],`
`3254`	`3254`	`)`
`3255`	`3255`
`3256`	`3256`	`left["time"] = pd.to_timedelta(left["time"], "ms").astype(f"m8[{unit}]")`
`3257`	`3257`
`3258`	`3258`	`right = pd.DataFrame(`
`3259`		`- list(zip([0, 3, 9, 12, 15, 18], [0, 1, 2, 3, 4, 5])),`
	`3259`	`+ list(zip([0, 3, 9, 12, 15, 18], [0, 1, 2, 3, 4, 5], strict=True)),`
`3260`	`3260`	`columns=["time", "right"],`
`3261`	`3261`	`)`
`3262`	`3262`
`@@ -3268,6 +3268,7 @@ def test_timedelta_tolerance_nearest(self, unit):`
`3268`	`3268`	`[0, 5, 10, 15, 20, 25],`
`3269`	`3269`	`[0, 1, 2, 3, 4, 5],`
`3270`	`3270`	`[0, np.nan, 2, 4, np.nan, np.nan],`
	`3271`	`+ strict=True,`
`3271`	`3272`	`)`
`3272`	`3273`	`),`
`3273`	`3274`	`columns=["time", "left", "right"],`