Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1274,6 +1274,7 @@ Other
- Fixed bug in the :meth:`Series.rank` with object dtype and extremely small float values (:issue:`62036`)
- Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`)
- Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`)
- Fixed bug when parsing datetime array formatting in :func:`_guess_datetime_format_for_array` where the format string was based on the first item and not the entire series of entered dates

.. ***DO NOT USE THIS SECTION***

Expand Down
25 changes: 0 additions & 25 deletions pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1049,7 +1049,6 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None:
# rebuild string, capturing any inferred padding
dt_str = "".join(tokens)
if parsed_datetime.strftime(guessed_format) == dt_str:
_maybe_warn_about_dayfirst(guessed_format, dayfirst)
return guessed_format
else:
return None
Expand All @@ -1072,30 +1071,6 @@ cdef str _fill_token(token: str, padding: int):
return token_filled


cdef void _maybe_warn_about_dayfirst(format: str, bint dayfirst) noexcept:
"""Warn if guessed datetime format doesn't respect dayfirst argument."""
cdef:
int day_index = format.find("%d")
int month_index = format.find("%m")

if (day_index != -1) and (month_index != -1):
if (day_index > month_index) and dayfirst:
warnings.warn(
f"Parsing dates in {format} format when dayfirst=True was specified. "
"Pass `dayfirst=False` or specify a format to silence this warning.",
UserWarning,
stacklevel=find_stack_level(),
)
if (day_index < month_index) and not dayfirst:
warnings.warn(
f"Parsing dates in {format} format when dayfirst=False (the default) "
"was specified. "
"Pass `dayfirst=True` or specify a format to silence this warning.",
UserWarning,
stacklevel=find_stack_level(),
)

Comment on lines -1075 to -1097
Copy link
Author

@Ben-Cutler Ben-Cutler Nov 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't sure what to do with this code. Are these kinds of warnings helpful? They were causing my tests to fail on pytest teardown, and was being logged on tests which had different formatting for the input string (So if I specified dayfirst = True on a month first input it'd raise the warning, and if I specified dayfirst=False on a day first test case it'd fail.

Happy to revert this or make some other change


cpdef str get_rule_month(str source):
"""
Return starting month of given freq, default is December.
Expand Down
52 changes: 30 additions & 22 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from __future__ import annotations

from collections import abc
from datetime import date
from datetime import (
date,
datetime,
)
from functools import partial
from itertools import islice
from typing import (
Expand All @@ -12,7 +15,6 @@
cast,
overload,
)
import warnings

import numpy as np

Expand Down Expand Up @@ -42,7 +44,6 @@
DateTimeErrorChoices,
)
from pandas.util._decorators import set_module
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.common import (
ensure_object,
Expand Down Expand Up @@ -84,7 +85,6 @@
Callable,
Hashable,
)

from pandas._libs.tslibs.nattype import NaTType
from pandas._libs.tslibs.timedeltas import UnitChoices

Expand Down Expand Up @@ -129,26 +129,34 @@ class FulldatetimeDict(YearMonthDayDict, total=False):
# ---------------------------------------------------------------------


def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False) -> str | None:
def _guess_datetime_format_for_array(
arr: np.ndarray, dayfirst: bool | None = False
) -> str | None:
# Try to guess the format based on the first non-NaN element, return None if can't
if (first_non_null := tslib.first_non_null(arr)) != -1:
if type(first_non_nan_element := arr[first_non_null]) is str:
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to replace if type(x) == str with isinstance(x, str) so that we could consume np.str types

# GH#32264 np.str_ object
guessed_format = guess_datetime_format(
first_non_nan_element, dayfirst=dayfirst
)
search_start = 0
allowed_formats = set()
while not search_start >= len(arr):
non_null_offset = tslib.first_non_null(arr[search_start:])
if non_null_offset == -1:
break
idx = search_start + non_null_offset
element = arr[idx]
if isinstance(element, str):
guessed_format = guess_datetime_format(str(element), dayfirst=dayfirst)
if guessed_format is not None:
return guessed_format
# If there are multiple non-null elements, warn about
# how parsing might not be consistent
if tslib.first_non_null(arr[first_non_null + 1 :]) != -1:
warnings.warn(
"Could not infer format, so each element will be parsed "
"individually, falling back to `dateutil`. To ensure parsing is "
"consistent and as-expected, please specify a format.",
UserWarning,
stacklevel=find_stack_level(),
)
allowed_formats.add(guessed_format)
search_start = idx + 1
# Look through the formats and see if one satisfies each item in the array
for fmt in list(allowed_formats):
try:
[
datetime.strptime(date_string, fmt)
for date_string in arr
if date_string and isinstance(date_string, str)
]
return fmt
except ValueError:
pass
return None


Expand Down
16 changes: 5 additions & 11 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1429,10 +1429,7 @@ def test_datetime_invalid_index(self, values, format):
else:
warn = None

with tm.assert_produces_warning(
warn, match="Could not infer format", raise_on_extra_warnings=False
):
res = to_datetime(values, errors="coerce", format=format)
res = to_datetime(values, errors="coerce", format=format)
tm.assert_index_equal(res, DatetimeIndex([NaT] * len(values)))

msg = "|".join(
Expand Down Expand Up @@ -1628,13 +1625,10 @@ def test_to_datetime_malformed_raise(self):
ValueError,
match=msg,
):
with tm.assert_produces_warning(
UserWarning, match="Could not infer format"
):
to_datetime(
ts_strings,
errors="raise",
)
to_datetime(
ts_strings,
errors="raise",
)

def test_iso_8601_strings_with_same_offset(self):
# GH 17697, 11736
Expand Down
81 changes: 56 additions & 25 deletions pandas/tests/tslibs/test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import pytest

from pandas._libs.tslibs import (
NaT,
parsing,
strptime,
)
Expand All @@ -29,6 +30,7 @@
option_context,
)
import pandas._testing as tm
from pandas.core.tools.datetimes import _guess_datetime_format_for_array


@pytest.mark.skipif(WASM, reason="tzset is not available on WASM")
Expand Down Expand Up @@ -230,10 +232,7 @@ def test_parsers_month_freq(date_str, expected):
],
)
def test_guess_datetime_format_with_parseable_formats(string, fmt):
with tm.maybe_produces_warning(
UserWarning, fmt is not None and re.search(r"%d.*%m", fmt)
):
result = parsing.guess_datetime_format(string)
result = parsing.guess_datetime_format(string)
assert result == fmt


Expand Down Expand Up @@ -290,31 +289,25 @@ def test_guess_datetime_format_wrong_type_inputs(invalid_type_dt):


@pytest.mark.parametrize(
"string,fmt,dayfirst,warning",
"string,fmt,dayfirst",
[
("2011-1-1", "%Y-%m-%d", False, None),
("2011-1-1", "%Y-%d-%m", True, None),
("1/1/2011", "%m/%d/%Y", False, None),
("1/1/2011", "%d/%m/%Y", True, None),
("30-1-2011", "%d-%m-%Y", False, UserWarning),
("30-1-2011", "%d-%m-%Y", True, None),
("2011-1-1 0:0:0", "%Y-%m-%d %H:%M:%S", False, None),
("2011-1-1 0:0:0", "%Y-%d-%m %H:%M:%S", True, None),
("2011-1-3T00:00:0", "%Y-%m-%dT%H:%M:%S", False, None),
("2011-1-3T00:00:0", "%Y-%d-%mT%H:%M:%S", True, None),
("2011-1-1 00:00:00", "%Y-%m-%d %H:%M:%S", False, None),
("2011-1-1 00:00:00", "%Y-%d-%m %H:%M:%S", True, None),
("2011-1-1", "%Y-%m-%d", False),
("2011-1-1", "%Y-%d-%m", True),
("1/1/2011", "%m/%d/%Y", False),
("1/1/2011", "%d/%m/%Y", True),
("30-1-2011", "%d-%m-%Y", False),
("30-1-2011", "%d-%m-%Y", True),
("2011-1-1 0:0:0", "%Y-%m-%d %H:%M:%S", False),
("2011-1-1 0:0:0", "%Y-%d-%m %H:%M:%S", True),
("2011-1-3T00:00:0", "%Y-%m-%dT%H:%M:%S", False),
("2011-1-3T00:00:0", "%Y-%d-%mT%H:%M:%S", True),
("2011-1-1 00:00:00", "%Y-%m-%d %H:%M:%S", False),
("2011-1-1 00:00:00", "%Y-%d-%m %H:%M:%S", True),
],
)
def test_guess_datetime_format_no_padding(string, fmt, dayfirst, warning):
def test_guess_datetime_format_no_padding(string, fmt, dayfirst):
# see gh-11142
msg = (
rf"Parsing dates in {fmt} format when dayfirst=False \(the default\) "
"was specified. "
"Pass `dayfirst=True` or specify a format to silence this warning."
)
with tm.assert_produces_warning(warning, match=msg):
result = parsing.guess_datetime_format(string, dayfirst=dayfirst)
result = parsing.guess_datetime_format(string, dayfirst=dayfirst)
assert result == fmt


Expand Down Expand Up @@ -424,3 +417,41 @@ def test_parse_datetime_string_with_reso_yearfirst(yearfirst, input):
)
assert except_out_dateutil == except_in_dateutil
assert result[0] == expected


@pytest.mark.parametrize(
"expected_format, array",
[
("%d/%m/%Y", np.array(["01/02/2025", "30/07/2025"])),
("%Y-%m-%d", np.array(["2025-08-09", "2025-08-13", None])),
("%m/%d/%Y", np.array(["02/01/2025", "12/31/2025"])),
("%d-%m-%Y", np.array(["01-02-2025", "30-07-2025"])),
("%d.%m.%Y", np.array(["01.02.2025", "30.07.2025"])),
("%Y/%m/%d", np.array(["2025/08/09", "2025/12/01"])),
("%b %d, %Y", np.array(["Feb 01, 2025", "Jul 30, 2025"])),
("%B %d, %Y", np.array(["February 01, 2025", "July 30, 2025"])),
("%d %b %Y", np.array(["01 Feb 2025", "30 Jul 2025"])),
("%d-%b-%Y", np.array(["01-Feb-2025", "30-Jul-2025"])),
("%Y%m%d", np.array(["20250201", "20250730"])),
(None, np.array(["02/01/25", "12/31/25"])),
("%Y-%m-%d %H:%M:%S", np.array(["2025-08-09 14:30:00", "2025-12-01 00:00:00"])),
("%Y-%m-%dT%H:%M:%S", np.array(["2025-08-09T14:30:00", "2025-12-01T00:00:00"])),
(
"%Y-%m-%dT%H:%M:%S.%f",
np.array(["2025-08-09T14:30:00.123456", "2025-12-01T00:00:00.5"]),
),
(
"%Y-%m-%d %H:%M:%S%z",
np.array(["2025-08-09 14:30:00+0000", "2025-12-01 09:15:00-0500"]),
),
("%Y-%m-%d", np.array(["2025-08-09", None, "2025-12-01"])),
(None, np.array(["2025/13/01", "not-a-date", "", NaT])),
(
None,
np.array(["01/02/2025", "2025-02-01", np.nan]),
),
],
)
def test_guess_datetime_format_for_array(expected_format: str, array: np.array) -> None:
fmt = _guess_datetime_format_for_array(array, dayfirst=False)
assert fmt == expected_format, f"{fmt} does not match {expected_format}"
Loading