From 2bda5bd2ae312f5d880cb7bd28cdfb1f46f462c4 Mon Sep 17 00:00:00 2001 From: nail Date: Tue, 4 Nov 2025 22:22:01 -0800 Subject: [PATCH 1/3] DOC: Add Google Colab data loading guide Adds comprehensive guide for loading data in Google Colab environment. Covers Google Drive, local uploads, URLs, and Google Sheets with working code examples for each method. Closes #62708 --- doc/source/user_guide/io.rst | 85 ++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 7092a0f8fa8d8..c128161993139 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -6316,6 +6316,91 @@ More information about the SAV and ZSAV file formats is available here_. .. _here: https://www.ibm.com/docs/en/spss-statistics/22.0.0 + +.. _io.colab: + +Loading Data in Google Colab +----------------------------- + +Google Colab is a popular cloud-based Jupyter notebook environment. pandas works seamlessly in Colab, and there are several ways to load data: + +From Google Drive +~~~~~~~~~~~~~~~~~ + +To access files stored in your Google Drive: + +.. code-block:: python + + from google.colab import drive + import pandas as pd + + # Mount your Google Drive + drive.mount('/content/drive') + + # Read file from Drive + df = pd.read_csv('/content/drive/MyDrive/your_file.csv') + +From Local Computer +~~~~~~~~~~~~~~~~~~~ + +To upload files from your local machine: + +.. code-block:: python + + from google.colab import files + import pandas as pd + import io + + # Upload file (opens file picker dialog) + uploaded = files.upload() + + # Read the uploaded file + for filename in uploaded.keys(): + df = pd.read_csv(io.BytesIO(uploaded[filename])) + +From URL +~~~~~~~~ + +Direct URL loading works the same as in standard pandas: + +.. code-block:: python + + import pandas as pd + + # Read from any public URL + url = 'https://raw.githubusercontent.com/example/repo/main/data.csv' + df = pd.read_csv(url) + +From Google Sheets +~~~~~~~~~~~~~~~~~~ + +To read data from Google Sheets: + +.. code-block:: python + + import pandas as pd + + # Option 1: Export as CSV (sheet must be publicly accessible) + sheet_id = 'your-spreadsheet-id' + sheet_name = 'Sheet1' + url = f'https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}' + df = pd.read_csv(url) + + # Option 2: Using authentication for private sheets + from google.colab import auth + import gspread + from google.auth import default + + auth.authenticate_user() + creds, _ = default() + gc = gspread.authorize(creds) + + worksheet = gc.open('Your Spreadsheet Name').sheet1 + data = worksheet.get_all_values() + df = pd.DataFrame(data[1:], columns=data[0]) + +For more details on Colab-specific I/O operations, see the `official Google Colab I/O guide `_. + .. _io.other: Other file formats From a46c4bab58d7393c9f0a90bcbec0a486f8acf515 Mon Sep 17 00:00:00 2001 From: nail Date: Wed, 5 Nov 2025 14:44:28 -0800 Subject: [PATCH 2/3] docs: Add gspread installation note for Google Sheets authentication --- doc/source/user_guide/io.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index c128161993139..42c684cf92307 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -6387,6 +6387,7 @@ To read data from Google Sheets: df = pd.read_csv(url) # Option 2: Using authentication for private sheets + # Note: Requires gspread library (pip install gspread) from google.colab import auth import gspread from google.auth import default From 305720ffab79feac3527f3503968e0d4152697d5 Mon Sep 17 00:00:00 2001 From: nail Date: Wed, 5 Nov 2025 20:52:50 -0800 Subject: [PATCH 3/3] DOC: Remove trailing whitespace from io.rst --- doc/source/user_guide/io.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 42c684cf92307..64303d15a3a62 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -6333,10 +6333,10 @@ To access files stored in your Google Drive: from google.colab import drive import pandas as pd - + # Mount your Google Drive drive.mount('/content/drive') - + # Read file from Drive df = pd.read_csv('/content/drive/MyDrive/your_file.csv') @@ -6350,10 +6350,10 @@ To upload files from your local machine: from google.colab import files import pandas as pd import io - + # Upload file (opens file picker dialog) uploaded = files.upload() - + # Read the uploaded file for filename in uploaded.keys(): df = pd.read_csv(io.BytesIO(uploaded[filename])) @@ -6366,7 +6366,7 @@ Direct URL loading works the same as in standard pandas: .. code-block:: python import pandas as pd - + # Read from any public URL url = 'https://raw.githubusercontent.com/example/repo/main/data.csv' df = pd.read_csv(url) @@ -6379,23 +6379,23 @@ To read data from Google Sheets: .. code-block:: python import pandas as pd - + # Option 1: Export as CSV (sheet must be publicly accessible) sheet_id = 'your-spreadsheet-id' sheet_name = 'Sheet1' url = f'https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}' df = pd.read_csv(url) - + # Option 2: Using authentication for private sheets # Note: Requires gspread library (pip install gspread) from google.colab import auth import gspread from google.auth import default - + auth.authenticate_user() creds, _ = default() gc = gspread.authorize(creds) - + worksheet = gc.open('Your Spreadsheet Name').sheet1 data = worksheet.get_all_values() df = pd.DataFrame(data[1:], columns=data[0])