Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/source/user_guide/10min.rst
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ For a :class:`DataFrame`, passing a slice ``:`` selects matching rows:
.. ipython:: python

df[0:3]
df["20130102":"20130104"]
df["2013-01-02":"2013-01-04"]

Selection by label
~~~~~~~~~~~~~~~~~~
Expand All @@ -226,7 +226,7 @@ For label slicing, both endpoints are *included*:

.. ipython:: python

df.loc["20130102":"20130104", ["A", "B"]]
df.loc["2013-01-02":"2013-01-04", ["A", "B"]]

Selecting a single row and column label returns a scalar:

Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ Selection by label

.. ipython:: python

dfl.loc['20130102':'20130104']
dfl.loc['2013-01-02':'2013-01-04']

pandas provides a suite of methods in order to have **purely label based indexing**. This is a strict inclusion based protocol.
Every label asked for must be in the index, or a ``KeyError`` will be raised.
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,7 @@ Other Deprecations
- Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.shift` and :meth:`DataFrame.shift` (:issue:`53802`)
- Deprecated allowing strings representing full dates in :meth:`DataFrame.at_time` and :meth:`Series.at_time` (:issue:`50839`)
- Deprecated backward-compatibility behavior for :meth:`DataFrame.select_dtypes` matching "str" dtype when ``np.object_`` is specified (:issue:`61916`)
- Deprecated non-ISO date string formats in :meth:`DatetimeIndex.__getitem__` with string labels. Use ISO format (YYYY-MM-DD) instead. (:issue:`58302`)
- Deprecated option "future.no_silent_downcasting", as it is no longer used. In a future version accessing this option will raise (:issue:`59502`)
- Deprecated passing non-Index types to :meth:`Index.join`; explicitly convert to Index first (:issue:`62897`)
- Deprecated silent casting of non-datetime 'other' to datetime in :meth:`Series.combine_first` (:issue:`62931`)
Expand Down
49 changes: 49 additions & 0 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import datetime as dt
import operator
import re
from typing import (
TYPE_CHECKING,
Self,
Expand Down Expand Up @@ -110,6 +111,29 @@ def _new_DatetimeIndex(cls, d):
return result


def _is_iso_format_string(date_str: str) -> bool:
"""
Check if a date string follows ISO8601 format.

ISO format must start with a 4-digit year (YYYY), optionally followed by
hyphen-separated month and day or 'T' for time component.

Examples of ISO format (True):
- 2024
- 2024-01
- 2024-01-10
- 2024-01-10T00:00:00

Examples of non-ISO format (False):
- 2024/01/10 (/ separator)
- 2024 01 10 (space separator)
- 01/10/2024 (MM/DD/YYYY)
- 10/01/2024 (DD/MM/YYYY)
- 01-10-2024 (MM-DD-YYYY)
"""
return re.match(r"^\d{4}(?:-|T|$)", date_str) is not None


@inherit_names(
DatetimeArray._field_ops
+ [
Expand Down Expand Up @@ -613,6 +637,14 @@ def get_loc(self, key):
parsed, reso = self._parse_with_reso(key)
except ValueError as err:
raise KeyError(key) from err
# GH#58302 - Deprecate non-ISO string formats in .loc indexing
if not _is_iso_format_string(key):
msg = (
"Parsing non-ISO datetime strings in .loc is deprecated "
"and will be removed in a future version. Use ISO format "
f"(YYYY-MM-DD) instead. Got '{key}'."
)
warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level())
self._disallow_mismatched_indexing(parsed)

if self._can_partial_date_slice(reso):
Expand Down Expand Up @@ -688,6 +720,23 @@ def slice_indexer(self, start=None, end=None, step=None):
def check_str_or_none(point) -> bool:
return point is not None and not isinstance(point, str)

# GH#58302 - Deprecate non-ISO string formats in .loc indexing
if isinstance(start, str) and not _is_iso_format_string(start):
msg = (
"Parsing non-ISO datetime strings in .loc is deprecated "
"and will be removed in a future version. Use ISO format "
f"(YYYY-MM-DD) instead. Got '{start}'."
)
warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level())

if isinstance(end, str) and not _is_iso_format_string(end):
msg = (
"Parsing non-ISO datetime strings in .loc is deprecated "
"and will be removed in a future version. Use ISO format "
f"(YYYY-MM-DD) instead. Got '{end}'."
)
warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level())

# GH#33146 if start and end are combinations of str and None and Index is not
# monotonic, we can not use Index.slice_indexer because it does not honor the
# actual elements, is only searching for start and end
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/groupby/methods/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ def seed_df(seed_nans, n, m):
@pytest.mark.parametrize("bins", [None, [0, 5]], ids=repr)
@pytest.mark.parametrize("isort", [True, False])
@pytest.mark.parametrize("normalize, name", [(True, "proportion"), (False, "count")])
@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_series_groupby_value_counts(
seed_nans,
num_rows,
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2851,6 +2851,9 @@ def test_groupby_with_Time_Grouper(unit):
tm.assert_frame_equal(result, expected_output)


@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_groupby_series_with_datetimeindex_month_name():
# GH 48509
s = Series([0, 1, 0], index=date_range("2022-01-01", periods=3), name="jan")
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/indexes/datetimes/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,9 @@ def test_get_loc_timedelta_invalid_key(self, key):
with pytest.raises(TypeError, match=msg):
dti.get_loc(key)

@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_get_loc_reasonable_key_error(self):
# GH#1062
index = DatetimeIndex(["1/3/2000"])
Expand Down
97 changes: 97 additions & 0 deletions pandas/tests/indexes/datetimes/test_partial_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import numpy as np
import pytest

from pandas.errors import Pandas4Warning

from pandas import (
DataFrame,
DatetimeIndex,
Expand All @@ -19,6 +21,10 @@


class TestSlicing:
pytestmark = pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)

def test_string_index_series_name_converted(self):
# GH#1644
df = DataFrame(
Expand Down Expand Up @@ -464,3 +470,94 @@ def test_slice_reduce_to_series(self):
)
result = df.loc["2000", "A"]
tm.assert_series_equal(result, expected)


class TestDatetimeIndexNonISODeprecation:
"""Tests for deprecation of non-ISO string formats in .loc indexing. GH#58302"""

@pytest.fixture
def ser_daily(self):
"""Create a Series with daily DatetimeIndex for testing."""
return Series(
range(15),
index=DatetimeIndex(date_range(start="2024-01-01", freq="D", periods=15)),
)

@pytest.mark.parametrize(
"date_string",
[
"1/10/2024", # MM/DD/YYYY format
"01/10/2024", # MM/DD/YYYY format with leading zero
],
)
def test_loc_indexing_non_iso_single_key_deprecation(self, ser_daily, date_string):
# GH#58302
msg = "Parsing non-ISO datetime strings in .loc is deprecated"

with tm.assert_produces_warning(Pandas4Warning, match=msg):
result = ser_daily.loc[date_string]
assert result == 9

@pytest.mark.parametrize(
"date_string,expected",
[
("2024-01-10", 9), # YYYY-MM-DD (ISO format)
],
)
def test_loc_indexing_iso_format_no_warning(self, ser_daily, date_string, expected):
# GH#58302 - ISO format (YYYY-MM-DD) should NOT warn
with tm.assert_produces_warning(None):
result = ser_daily.loc[date_string]
assert result == expected

@pytest.mark.parametrize(
"start_string",
[
"1/10/2024", # MM/DD/YYYY format
"01/10/2024", # MM/DD/YYYY format with leading zero
],
)
def test_loc_slicing_non_iso_start_deprecation(self, ser_daily, start_string):
# GH#58302 - Non-ISO start in slice should warn
msg = "Parsing non-ISO datetime strings in .loc is deprecated"

with tm.assert_produces_warning(Pandas4Warning, match=msg):
result = ser_daily.loc[start_string:"2024-01-15"]
assert len(result) > 0

@pytest.mark.parametrize(
"end_string",
[
"5-01-2024", # DD-MM-YYYY format
"05-01-2024", # DD-MM-YYYY format with leading zero
],
)
def test_loc_slicing_non_iso_end_deprecation(self, ser_daily, end_string):
# GH#58302 - Non-ISO end in slice should warn
msg = "Parsing non-ISO datetime strings in .loc is deprecated"

with tm.assert_produces_warning(Pandas4Warning, match=msg):
result = ser_daily.loc["2024-01-01":end_string]
assert len(result) > 0

def test_loc_slicing_both_non_iso_deprecation(self, ser_daily):
# GH#58302 - Both non-ISO should warn (twice)
msg = "Parsing non-ISO datetime strings in .loc is deprecated"

with tm.assert_produces_warning(
Pandas4Warning, match=msg, check_stacklevel=False
):
result = ser_daily.loc["1/10/2024":"5-01-2024"]
assert len(result) > 0

def test_loc_slicing_iso_formats_no_warning(self, ser_daily):
# GH#58302 - ISO slice formats should NOT warn
with tm.assert_produces_warning(None):
result = ser_daily.loc["2024-01-05":"2024-01-10"]
assert len(result) == 6

def test_loc_non_string_keys_no_warning(self, ser_daily):
# GH#58302 - Non-string keys should not warn
with tm.assert_produces_warning(None):
result = ser_daily.loc[Timestamp("2024-01-10")]
assert result == 9
6 changes: 6 additions & 0 deletions pandas/tests/indexes/period/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,9 @@ def test_getitem_list_periods(self):
tm.assert_series_equal(ts[[Period("2012-01-02", freq="D")]], exp)

@pytest.mark.arm_slow
@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_getitem_seconds(self):
# GH#6716
didx = date_range(start="2013/01/01 09:00:00", freq="s", periods=4000)
Expand Down Expand Up @@ -206,6 +209,9 @@ def test_getitem_seconds(self):
period_range,
],
)
@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_getitem_day(self, idx_range):
# GH#6716
# Confirm DatetimeIndex and PeriodIndex works identically
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/indexes/period/test_partial_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@


class TestPeriodIndex:
pytestmark = pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)

def test_getitem_periodindex_duplicates_string_slice(self):
# monotonic
idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="Y-JUN")
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/indexing/multiindex/test_slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,9 @@ def test_multiindex_slicers_datetimelike(self):
]
tm.assert_frame_equal(result, expected)

@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_multiindex_slicers_edges(self):
# GH 8132
# various edge cases
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,9 @@ def test_loc_getitem_single_boolean_arg(self, obj, key, exp):

class TestLocBaseIndependent:
# Tests for loc that do not depend on subclassing Base
@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_loc_npstr(self):
# GH#45580
df = DataFrame(index=date_range("2021", "2022"))
Expand Down Expand Up @@ -1262,6 +1265,9 @@ def test_loc_setitem_str_to_small_float_conversion_type(self, using_infer_string
expected = DataFrame(col_data, columns=["A"], dtype=float)
tm.assert_frame_equal(result, expected)

@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_loc_getitem_time_object(self, frame_or_series):
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
mask = (rng.hour == 9) & (rng.minute == 30)
Expand Down Expand Up @@ -2415,6 +2421,9 @@ def test_loc_getitem_partial_slice_non_monotonicity(


class TestLabelSlicing:
@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_loc_getitem_slicing_datetimes_frame(self):
# GH#7523

Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/resample/test_datetime_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,9 @@ def test_resample_rounding(unit):
tm.assert_frame_equal(result, expected)


@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_resample_basic_from_daily(unit):
# from daily
dti = date_range(
Expand Down Expand Up @@ -551,6 +554,9 @@ def test_resample_ohlc(unit):
assert xs["close"] == s.iloc[4]


@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_resample_ohlc_result(unit):
# GH 12332
index = date_range("1-1-2000", "2-15-2000", freq="h").as_unit(unit)
Expand Down Expand Up @@ -662,6 +668,9 @@ def test_resample_timestamp_to_period(
tm.assert_series_equal(result, expected)


@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_ohlc_5min(unit):
def _ohlc(group):
if isna(group).all():
Expand Down Expand Up @@ -1576,6 +1585,9 @@ def test_resample_dst_anchor(unit):
)


@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_resample_dst_anchor2(unit):
dti = date_range(
"2013-09-30", "2013-11-02", freq="30Min", tz="Europe/Paris"
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/resample/test_resampler_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ def test_groupby_resample_on_api_with_getitem():
tm.assert_series_equal(result, exp)


@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_groupby_with_origin():
# GH 31809

Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/series/indexing/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
import pandas._testing as tm


@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_fancy_getitem():
dti = date_range(
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
Expand All @@ -46,6 +49,9 @@ def test_fancy_getitem():
)


@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_fancy_setitem():
dti = date_range(
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
Expand Down
Loading
Loading