diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 09539089b3904..459f518d076db 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -27,6 +27,8 @@ ) from pandas.util._validators import check_dtype_backend +from pandas.core.dtypes.common import is_file_like + from pandas import ( DataFrame, get_option, @@ -658,6 +660,13 @@ def read_parquet( 0 3 8 1 4 9 """ + # gh-62922: validate path type early to match documented API expectations + # and provide a consistent, clear user error immediately. + if not (isinstance(path, (str, os.PathLike)) or is_file_like(path)): + raise TypeError( + f"read_parquet expected str/os.PathLike or file-like object, " + f"got {type(path).__name__} type" + ) impl = get_engine(engine) check_dtype_backend(dtype_backend) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 2927b24624026..eff6763e807e1 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -248,6 +248,42 @@ def check_partition_names(path, expected): assert dataset.partitioning.schema.names == expected +def test_read_parquet_invalid_path_types(tmp_path, engine): + # GH #62922 + df = pd.DataFrame({"a": [1]}) + path = tmp_path / "test_read_parquet.parquet" + df.to_parquet(path, engine=engine) + + bad_path_types = [ + [str(path)], # list + (str(path),), # tuple + b"raw-bytes", # bytes + ] + for bad in bad_path_types: + match = ( + f"read_parquet expected str/os.PathLike or file-like object, " + f"got {type(bad).__name__} type" + ) + with pytest.raises(TypeError, match=match): + read_parquet(bad, engine=engine) + + +def test_read_parquet_valid_path_types(tmp_path, engine): + # GH #62922 + df = pd.DataFrame({"a": [1]}) + path = tmp_path / "test_read_parquet.parquet" + df.to_parquet(path, engine=engine) + # str + read_parquet(str(path), engine=engine) + # os.PathLike + read_parquet(pathlib.Path(path), engine=engine) + # file-like object + buf = BytesIO() + df.to_parquet(buf, engine=engine) + buf.seek(0) + read_parquet(buf, engine=engine) + + def test_invalid_engine(df_compat, temp_file): msg = "engine must be one of 'pyarrow', 'fastparquet'" with pytest.raises(ValueError, match=msg):