Skip to content

Commit 2619e36

Browse files
Refactor: Move non-ISO date format deprecation to .loc-specific path due to other test failures
1 parent 42c889a commit 2619e36

File tree

6 files changed

+61
-62
lines changed

6 files changed

+61
-62
lines changed

pandas/core/indexes/datetimes.py

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import datetime as dt
44
import operator
5-
import re
65
from typing import (
76
TYPE_CHECKING,
87
Self,
@@ -111,29 +110,6 @@ def _new_DatetimeIndex(cls, d):
111110
return result
112111

113112

114-
def _is_iso_format_string(date_str: str) -> bool:
115-
"""
116-
Check if a date string follows ISO8601 format.
117-
118-
ISO format must start with a 4-digit year (YYYY), optionally followed by
119-
hyphen-separated month and day or 'T' for time component.
120-
121-
Examples of ISO format (True):
122-
- 2024
123-
- 2024-01
124-
- 2024-01-10
125-
- 2024-01-10T00:00:00
126-
127-
Examples of non-ISO format (False):
128-
- 2024/01/10 (/ separator)
129-
- 2024 01 10 (space separator)
130-
- 01/10/2024 (MM/DD/YYYY)
131-
- 10/01/2024 (DD/MM/YYYY)
132-
- 01-10-2024 (MM-DD-YYYY)
133-
"""
134-
return re.match(r"^\d{4}(?:-|T|$)", date_str) is not None
135-
136-
137113
@inherit_names(
138114
DatetimeArray._field_ops
139115
+ [
@@ -637,14 +613,6 @@ def get_loc(self, key):
637613
parsed, reso = self._parse_with_reso(key)
638614
except ValueError as err:
639615
raise KeyError(key) from err
640-
# GH#58302 - Deprecate non-ISO string formats in .loc indexing
641-
if not _is_iso_format_string(key):
642-
msg = (
643-
"Parsing non-ISO datetime strings in .loc is deprecated "
644-
"and will be removed in a future version. Use ISO format "
645-
f"(YYYY-MM-DD) instead. Got '{key}'."
646-
)
647-
warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level())
648616
self._disallow_mismatched_indexing(parsed)
649617

650618
if self._can_partial_date_slice(reso):
@@ -720,23 +688,6 @@ def slice_indexer(self, start=None, end=None, step=None):
720688
def check_str_or_none(point) -> bool:
721689
return point is not None and not isinstance(point, str)
722690

723-
# GH#58302 - Deprecate non-ISO string formats in .loc indexing
724-
if isinstance(start, str) and not _is_iso_format_string(start):
725-
msg = (
726-
"Parsing non-ISO datetime strings in .loc is deprecated "
727-
"and will be removed in a future version. Use ISO format "
728-
f"(YYYY-MM-DD) instead. Got '{start}'."
729-
)
730-
warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level())
731-
732-
if isinstance(end, str) and not _is_iso_format_string(end):
733-
msg = (
734-
"Parsing non-ISO datetime strings in .loc is deprecated "
735-
"and will be removed in a future version. Use ISO format "
736-
f"(YYYY-MM-DD) instead. Got '{end}'."
737-
)
738-
warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level())
739-
740691
# GH#33146 if start and end are combinations of str and None and Index is not
741692
# monotonic, we can not use Index.slice_indexer because it does not honor the
742693
# actual elements, is only searching for start and end
@@ -756,7 +707,6 @@ def check_str_or_none(point) -> bool:
756707

757708
if end is not None:
758709
end_casted = self._maybe_cast_slice_bound(end, "right")
759-
760710
mask = (self <= end_casted) & mask
761711
in_index &= (end_casted == self).any()
762712

pandas/core/indexing.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from contextlib import suppress
4+
import re
45
import sys
56
from typing import (
67
TYPE_CHECKING,
@@ -26,11 +27,13 @@
2627
IndexingError,
2728
InvalidIndexError,
2829
LossySetitemError,
30+
Pandas4Warning,
2931
)
3032
from pandas.errors.cow import _chained_assignment_msg
3133
from pandas.util._decorators import (
3234
doc,
3335
)
36+
from pandas.util._exceptions import find_stack_level
3437

3538
from pandas.core.dtypes.cast import (
3639
can_hold_element,
@@ -1249,6 +1252,31 @@ class _LocIndexer(_LocationIndexer):
12491252
"index is integers), listlike of labels, boolean"
12501253
)
12511254

1255+
# -------------------------------------------------------------------
1256+
# Helpers
1257+
1258+
@staticmethod
1259+
def _is_iso_format_string(date_str: str) -> bool:
1260+
"""
1261+
Check if a date string follows ISO 8601 format.
1262+
1263+
ISO format must start with a 4-digit year (YYYY), optionally followed by
1264+
hyphen or 'T' for time component.
1265+
1266+
Examples of ISO format (True):
1267+
- "2024"
1268+
- "2024-01"
1269+
- "2024-01-10"
1270+
- "2024-01-10T00:00:00"
1271+
1272+
Examples of non-ISO format (False):
1273+
- "2024/01/10" (/ separator)
1274+
- "2024 01 10" (space separator)
1275+
- "01/10/2024" (MM/DD/YYYY)
1276+
- "1Q01" (quarter format)
1277+
"""
1278+
return re.match(r"^\d{4}(?:-|T|$)", date_str) is not None
1279+
12521280
# -------------------------------------------------------------------
12531281
# Key Checks
12541282

@@ -1459,6 +1487,19 @@ def _getitem_axis(self, key, axis: AxisInt):
14591487

14601488
# fall thru to straight lookup
14611489
self._validate_key(key, axis)
1490+
1491+
# GH#58302 - Deprecate non-ISO string formats in .loc direct access
1492+
# Only warn for DatetimeIndex to avoid false positives with other index types
1493+
labels = self.obj._get_axis(axis)
1494+
if type(labels).__name__ == "DatetimeIndex":
1495+
if isinstance(key, str) and not self._is_iso_format_string(key):
1496+
msg = (
1497+
"Parsing non-ISO datetime strings in .loc is deprecated "
1498+
"and will be removed in a future version. Use ISO format "
1499+
f"(YYYY-MM-DD) instead. Got '{key}'."
1500+
)
1501+
warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level())
1502+
14621503
return self._get_label(key, axis=axis)
14631504

14641505
def _get_slice_axis(self, slice_obj: slice, axis: AxisInt):
@@ -1471,6 +1512,19 @@ def _get_slice_axis(self, slice_obj: slice, axis: AxisInt):
14711512
return obj.copy(deep=False)
14721513

14731514
labels = obj._get_axis(axis)
1515+
1516+
# GH#58302 - Deprecate non-ISO string formats in .loc slicing
1517+
# Only warn for DatetimeIndex to avoid false positives with other index types
1518+
if type(labels).__name__ == "DatetimeIndex":
1519+
for key in [slice_obj.start, slice_obj.stop]:
1520+
if isinstance(key, str) and not self._is_iso_format_string(key):
1521+
msg = (
1522+
"Parsing non-ISO datetime strings in .loc is deprecated "
1523+
"and will be removed in a future version. Use ISO format "
1524+
f"(YYYY-MM-DD) instead. Got '{key}'."
1525+
)
1526+
warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level())
1527+
14741528
indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step)
14751529

14761530
if isinstance(indexer, slice):

pandas/tests/groupby/test_groupby.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2851,9 +2851,6 @@ def test_groupby_with_Time_Grouper(unit):
28512851
tm.assert_frame_equal(result, expected_output)
28522852

28532853

2854-
@pytest.mark.filterwarnings(
2855-
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
2856-
)
28572854
def test_groupby_series_with_datetimeindex_month_name():
28582855
# GH 48509
28592856
s = Series([0, 1, 0], index=date_range("2022-01-01", periods=3), name="jan")

pandas/tests/indexes/datetimes/test_partial_slicing.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,9 @@
2121

2222

2323
class TestSlicing:
24-
pytestmark = pytest.mark.filterwarnings(
24+
@pytest.mark.filterwarnings(
2525
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
2626
)
27-
2827
def test_string_index_series_name_converted(self):
2928
# GH#1644
3029
df = DataFrame(
@@ -159,6 +158,9 @@ def test_slice_end_of_period_resolution(self, partial_dtime):
159158
expected = ser.iloc[:5]
160159
tm.assert_series_equal(result, expected)
161160

161+
@pytest.mark.filterwarnings(
162+
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
163+
)
162164
def test_slice_quarter(self):
163165
dti = date_range(freq="D", start=datetime(2000, 6, 1), periods=500)
164166

pandas/tests/indexes/period/test_partial_slicing.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,6 @@
1212

1313

1414
class TestPeriodIndex:
15-
pytestmark = pytest.mark.filterwarnings(
16-
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
17-
)
18-
1915
def test_getitem_periodindex_duplicates_string_slice(self):
2016
# monotonic
2117
idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="Y-JUN")
@@ -127,6 +123,9 @@ def test_range_slice_outofbounds(self, make_range):
127123
tm.assert_frame_equal(df["2013-11":"2013-12"], empty)
128124

129125
@pytest.mark.parametrize("make_range", [date_range, period_range])
126+
@pytest.mark.filterwarnings(
127+
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
128+
)
130129
def test_maybe_cast_slice_bound(self, make_range, frame_or_series):
131130
idx = make_range(start="2013/10/01", freq="D", periods=10)
132131

pandas/tests/series/indexing/test_getitem.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -236,9 +236,6 @@ def test_getitem_partial_str_slice_with_datetimeindex(self):
236236

237237
tm.assert_series_equal(result, expected)
238238

239-
@pytest.mark.filterwarnings(
240-
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
241-
)
242239
def test_getitem_slice_strings_with_datetimeindex(self):
243240
idx = DatetimeIndex(
244241
["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"]

0 commit comments

Comments
 (0)