diff --git a/docs/examples/data_connectors/GoogleSheetsDemo.ipynb b/docs/examples/data_connectors/GoogleSheetsDemo.ipynb
new file mode 100644
index 00000000000000..89358490b8c98d
--- /dev/null
+++ b/docs/examples/data_connectors/GoogleSheetsDemo.ipynb
@@ -0,0 +1,684 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# **Google Sheets Reader**\n",
+ "Demonstrates Google Sheets Reader in LlamaIndex\n",
+ "\n",
+ "\n",
+ "* Make Sure you have token.json or credentials.json file in the Environment, More on that [here](https://developers.google.com/workspace/guides/create-credentials)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from llama_index.readers.google import GoogleSheetsReader"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Load Sheets as a List of Pandas Dataframe"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "list_of_sheets = [\"1ZF5iIeLLqROHbHsb1vOeRaLWKIgLU7rDDTSOZaqjpk0\"]\n",
+ "sheets = GoogleSheetsReader()\n",
+ "dataframes = sheets.load_data_in_pandas(list_of_sheets)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe"
+ },
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " spotify_id | \n",
+ " name | \n",
+ " artists | \n",
+ " daily_rank | \n",
+ " daily_movement | \n",
+ " weekly_movement | \n",
+ " country | \n",
+ " snapshot_date | \n",
+ " popularity | \n",
+ " is_explicit | \n",
+ " ... | \n",
+ " key | \n",
+ " loudness | \n",
+ " mode | \n",
+ " speechiness | \n",
+ " acousticness | \n",
+ " instrumentalness | \n",
+ " liveness | \n",
+ " valence | \n",
+ " tempo | \n",
+ " time_signature | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2HafqoJbgXdtjwCOvNEF14 | \n",
+ " Si No Estás | \n",
+ " iñigo quintero | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " | \n",
+ " 2023-10-27 | \n",
+ " 97 | \n",
+ " FALSE | \n",
+ " ... | \n",
+ " 5 | \n",
+ " -8.72 | \n",
+ " 1 | \n",
+ " 0.0285 | \n",
+ " 0.827 | \n",
+ " 0 | \n",
+ " 0.138 | \n",
+ " 0.524 | \n",
+ " 98.224 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 7x9aauaA9cu6tyfpHnqDLo | \n",
+ " Seven (feat. Latto) (Explicit Ver.) | \n",
+ " Jung Kook, Latto | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " | \n",
+ " 2023-10-27 | \n",
+ " 97 | \n",
+ " TRUE | \n",
+ " ... | \n",
+ " 11 | \n",
+ " -4.107 | \n",
+ " 1 | \n",
+ " 0.0434 | \n",
+ " 0.311 | \n",
+ " 0 | \n",
+ " 0.0815 | \n",
+ " 0.89 | \n",
+ " 124.997 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3rUGC1vUpkDG9CZFHMur1t | \n",
+ " greedy | \n",
+ " Tate McRae | \n",
+ " 3 | \n",
+ " -1 | \n",
+ " 2 | \n",
+ " | \n",
+ " 2023-10-27 | \n",
+ " 99 | \n",
+ " TRUE | \n",
+ " ... | \n",
+ " 6 | \n",
+ " -3.18 | \n",
+ " 0 | \n",
+ " 0.0319 | \n",
+ " 0.256 | \n",
+ " 0 | \n",
+ " 0.114 | \n",
+ " 0.844 | \n",
+ " 111.018 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4MjDJD8cW7iVeWInc2Bdyj | \n",
+ " MONACO | \n",
+ " Bad Bunny | \n",
+ " 4 | \n",
+ " -1 | \n",
+ " -3 | \n",
+ " | \n",
+ " 2023-10-27 | \n",
+ " 96 | \n",
+ " TRUE | \n",
+ " ... | \n",
+ " 4 | \n",
+ " -5.009 | \n",
+ " 0 | \n",
+ " 0.068 | \n",
+ " 0.15 | \n",
+ " 0.000402 | \n",
+ " 0.58 | \n",
+ " 0.13 | \n",
+ " 139.056 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 7iQXYTyuG13aoeHxGG28Nh | \n",
+ " PERRO NEGRO | \n",
+ " Bad Bunny, Feid | \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " | \n",
+ " 2023-10-27 | \n",
+ " 94 | \n",
+ " TRUE | \n",
+ " ... | \n",
+ " 5 | \n",
+ " -2.248 | \n",
+ " 1 | \n",
+ " 0.262 | \n",
+ " 0.0887 | \n",
+ " 2.16E-05 | \n",
+ " 0.179 | \n",
+ " 0.345 | \n",
+ " 96.057 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 36395 | \n",
+ " 0AYt6NMyyLd0rLuvr0UkMH | \n",
+ " Slime You Out (feat. SZA) | \n",
+ " Drake, SZA | \n",
+ " 46 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " AE | \n",
+ " 2023-10-18 | \n",
+ " 84 | \n",
+ " TRUE | \n",
+ " ... | \n",
+ " 5 | \n",
+ " -9.243 | \n",
+ " 0 | \n",
+ " 0.0502 | \n",
+ " 0.508 | \n",
+ " 0 | \n",
+ " 0.259 | \n",
+ " 0.105 | \n",
+ " 88.88 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 36396 | \n",
+ " 2Gk6fi0dqt91NKvlzGsmm7 | \n",
+ " SAY MY GRACE (feat. Travis Scott) | \n",
+ " Offset, Travis Scott | \n",
+ " 47 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " AE | \n",
+ " 2023-10-18 | \n",
+ " 80 | \n",
+ " TRUE | \n",
+ " ... | \n",
+ " 10 | \n",
+ " -5.06 | \n",
+ " 1 | \n",
+ " 0.0452 | \n",
+ " 0.0585 | \n",
+ " 0 | \n",
+ " 0.132 | \n",
+ " 0.476 | \n",
+ " 121.879 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 36397 | \n",
+ " 26b3oVLrRUaaybJulow9kz | \n",
+ " People | \n",
+ " Libianca | \n",
+ " 48 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " AE | \n",
+ " 2023-10-18 | \n",
+ " 88 | \n",
+ " FALSE | \n",
+ " ... | \n",
+ " 10 | \n",
+ " -7.621 | \n",
+ " 0 | \n",
+ " 0.0678 | \n",
+ " 0.551 | \n",
+ " 1.31E-05 | \n",
+ " 0.102 | \n",
+ " 0.693 | \n",
+ " 124.357 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " 36398 | \n",
+ " 5ydjxBSUIDn26MFzU3asP4 | \n",
+ " Rainy Days | \n",
+ " V | \n",
+ " 49 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " AE | \n",
+ " 2023-10-18 | \n",
+ " 88 | \n",
+ " FALSE | \n",
+ " ... | \n",
+ " 9 | \n",
+ " -8.016 | \n",
+ " 0 | \n",
+ " 0.0875 | \n",
+ " 0.739 | \n",
+ " 0 | \n",
+ " 0.148 | \n",
+ " 0.282 | \n",
+ " 74.828 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 36399 | \n",
+ " 59NraMJsLaMCVtwXTSia8i | \n",
+ " Prada | \n",
+ " cassö, RAYE, D-Block Europe | \n",
+ " 50 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " AE | \n",
+ " 2023-10-18 | \n",
+ " 94 | \n",
+ " TRUE | \n",
+ " ... | \n",
+ " 8 | \n",
+ " -5.804 | \n",
+ " 1 | \n",
+ " 0.0375 | \n",
+ " 0.001 | \n",
+ " 1.79E-06 | \n",
+ " 0.113 | \n",
+ " 0.422 | \n",
+ " 141.904 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
36400 rows × 25 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "text/plain": [
+ " spotify_id name \\\n",
+ "0 2HafqoJbgXdtjwCOvNEF14 Si No Estás \n",
+ "1 7x9aauaA9cu6tyfpHnqDLo Seven (feat. Latto) (Explicit Ver.) \n",
+ "2 3rUGC1vUpkDG9CZFHMur1t greedy \n",
+ "3 4MjDJD8cW7iVeWInc2Bdyj MONACO \n",
+ "4 7iQXYTyuG13aoeHxGG28Nh PERRO NEGRO \n",
+ "... ... ... \n",
+ "36395 0AYt6NMyyLd0rLuvr0UkMH Slime You Out (feat. SZA) \n",
+ "36396 2Gk6fi0dqt91NKvlzGsmm7 SAY MY GRACE (feat. Travis Scott) \n",
+ "36397 26b3oVLrRUaaybJulow9kz People \n",
+ "36398 5ydjxBSUIDn26MFzU3asP4 Rainy Days \n",
+ "36399 59NraMJsLaMCVtwXTSia8i Prada \n",
+ "\n",
+ " artists daily_rank daily_movement weekly_movement \\\n",
+ "0 iñigo quintero 1 0 3 \n",
+ "1 Jung Kook, Latto 2 4 0 \n",
+ "2 Tate McRae 3 -1 2 \n",
+ "3 Bad Bunny 4 -1 -3 \n",
+ "4 Bad Bunny, Feid 5 0 1 \n",
+ "... ... ... ... ... \n",
+ "36395 Drake, SZA 46 4 0 \n",
+ "36396 Offset, Travis Scott 47 3 0 \n",
+ "36397 Libianca 48 2 0 \n",
+ "36398 V 49 1 0 \n",
+ "36399 cassö, RAYE, D-Block Europe 50 0 0 \n",
+ "\n",
+ " country snapshot_date popularity is_explicit ... key loudness mode \\\n",
+ "0 2023-10-27 97 FALSE ... 5 -8.72 1 \n",
+ "1 2023-10-27 97 TRUE ... 11 -4.107 1 \n",
+ "2 2023-10-27 99 TRUE ... 6 -3.18 0 \n",
+ "3 2023-10-27 96 TRUE ... 4 -5.009 0 \n",
+ "4 2023-10-27 94 TRUE ... 5 -2.248 1 \n",
+ "... ... ... ... ... ... .. ... ... \n",
+ "36395 AE 2023-10-18 84 TRUE ... 5 -9.243 0 \n",
+ "36396 AE 2023-10-18 80 TRUE ... 10 -5.06 1 \n",
+ "36397 AE 2023-10-18 88 FALSE ... 10 -7.621 0 \n",
+ "36398 AE 2023-10-18 88 FALSE ... 9 -8.016 0 \n",
+ "36399 AE 2023-10-18 94 TRUE ... 8 -5.804 1 \n",
+ "\n",
+ " speechiness acousticness instrumentalness liveness valence tempo \\\n",
+ "0 0.0285 0.827 0 0.138 0.524 98.224 \n",
+ "1 0.0434 0.311 0 0.0815 0.89 124.997 \n",
+ "2 0.0319 0.256 0 0.114 0.844 111.018 \n",
+ "3 0.068 0.15 0.000402 0.58 0.13 139.056 \n",
+ "4 0.262 0.0887 2.16E-05 0.179 0.345 96.057 \n",
+ "... ... ... ... ... ... ... \n",
+ "36395 0.0502 0.508 0 0.259 0.105 88.88 \n",
+ "36396 0.0452 0.0585 0 0.132 0.476 121.879 \n",
+ "36397 0.0678 0.551 1.31E-05 0.102 0.693 124.357 \n",
+ "36398 0.0875 0.739 0 0.148 0.282 74.828 \n",
+ "36399 0.0375 0.001 1.79E-06 0.113 0.422 141.904 \n",
+ "\n",
+ " time_signature \n",
+ "0 4 \n",
+ "1 4 \n",
+ "2 1 \n",
+ "3 4 \n",
+ "4 4 \n",
+ "... ... \n",
+ "36395 3 \n",
+ "36396 4 \n",
+ "36397 5 \n",
+ "36398 4 \n",
+ "36399 4 \n",
+ "\n",
+ "[36400 rows x 25 columns]"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataframes[0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Or Load Sheets as a List of Document Objects"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "documents = sheets.load_data(list_of_sheets)"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/llama-index-integrations/readers/llama-index-readers-google/README.md b/llama-index-integrations/readers/llama-index-readers-google/README.md
index 1e86347f51ba7b..bc2095634da9df 100644
--- a/llama-index-integrations/readers/llama-index-readers-google/README.md
+++ b/llama-index-integrations/readers/llama-index-readers-google/README.md
@@ -1 +1,37 @@
-# LlamaIndex Readers Integration: Google
+# LlamaIndex Integration: Google Readers
+
+Effortlessly incorporate Google-based data loaders into your Python workflow using LlamaIndex. Unlock the potential of various readers to enhance your data loading capabilities. Below are examples of integrating Google Docs and Google Sheets readers:
+
+### Google Docs Reader
+
+```python
+from llama_index.readers.google import GoogleDocsReader
+
+# Specify the document IDs you want to load
+document_ids = [""]
+
+# Load data from Google Docs
+documents = GoogleDocsReader().load_data(document_ids=document_ids)
+```
+
+### Google Sheets Reader (Documents and Dataframes)
+
+```python
+from llama_index.readers.google import GoogleSheetsReader
+
+# Specify the list of sheet IDs you want to load
+list_of_sheets = ["spreadsheet_id"]
+
+# Create a Google Sheets Reader instance
+sheets_reader = GoogleSheetsReader()
+
+# Load data into Pandas in Data Classes of choice (Documents or Dataframes)
+documents = sheets.load_data(list_of_sheets)
+dataframes = sheets_reader.load_data_in_pandas(list_of_sheets)
+```
+
+Integrate these readers seamlessly to efficiently manage and process your data within your Python environment, providing a robust foundation for your data-driven workflows with LlamaIndex.
+
+### Note
+
+Make sure you have a "token.json" or a "credentials.json" file in your environment to authenticate the Google Cloud Platform
diff --git a/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/sheets/base.py b/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/sheets/base.py
index f95d877155805f..abf3a25a2f2d1e 100644
--- a/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/sheets/base.py
+++ b/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/sheets/base.py
@@ -2,6 +2,7 @@
import logging
import os
+import pandas as pd
from typing import Any, List
import googleapiclient.discovery as discovery
@@ -79,6 +80,21 @@ def load_data(self, spreadsheet_ids: List[str]) -> List[Document]:
)
return results
+ def load_data_in_pandas(self, spreadsheet_ids: List[str]) -> List[pd.DataFrame]:
+ """Load data from the input directory.
+
+ Args:
+ spreadsheet_ids (List[str]): a list of document ids.
+ """
+ if spreadsheet_ids is None:
+ raise ValueError('Must specify a "spreadsheet_ids" in `load_kwargs`.')
+
+ results = []
+ for spreadsheet_id in spreadsheet_ids:
+ dataframes = self._load_sheet_in_pandas(spreadsheet_id)
+ results.extend(dataframes)
+ return results
+
def _load_sheet(self, spreadsheet_id: str) -> str:
"""Load a sheet from Google Sheets.
@@ -115,6 +131,43 @@ def _load_sheet(self, spreadsheet_id: str) -> str:
)
return sheet_text
+ def _load_sheet_in_pandas(self, spreadsheet_id: str) -> List[pd.DataFrame]:
+ """Load a sheet from Google Sheets.
+
+ Args:
+ spreadsheet_id: the sheet id.
+ sheet_name: the sheet name.
+
+ Returns:
+ The sheet data.
+ """
+ credentials = self._get_credentials()
+ sheets_service = discovery.build("sheets", "v4", credentials=credentials)
+ sheet = sheets_service.spreadsheets()
+ spreadsheet_data = sheet.get(spreadsheetId=spreadsheet_id).execute()
+ sheets = spreadsheet_data.get("sheets")
+ dataframes = []
+ for sheet in sheets:
+ properties = sheet.get("properties")
+ title = properties.get("title")
+ grid_props = properties.get("gridProperties")
+ rows = grid_props.get("rowCount")
+ cols = grid_props.get("columnCount")
+ range_pattern = f"{title}!R1C1:R{rows}C{cols}"
+ response = (
+ sheets_service.spreadsheets()
+ .values()
+ .get(spreadsheetId=spreadsheet_id, range=range_pattern)
+ .execute()
+ )
+ values = response.get("values", [])
+ if not values:
+ print(f"No data found in {title}")
+ else:
+ df = pd.DataFrame(values[1:], columns=values[0])
+ dataframes.append(df)
+ return dataframes
+
def _get_credentials(self) -> Any:
"""Get valid user credentials from storage.
diff --git a/llama-index-integrations/readers/llama-index-readers-google/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-google/pyproject.toml
index 79a58c41596465..c962ed793746bd 100644
--- a/llama-index-integrations/readers/llama-index-readers-google/pyproject.toml
+++ b/llama-index-integrations/readers/llama-index-readers-google/pyproject.toml
@@ -34,7 +34,7 @@ license = "MIT"
maintainers = ["bbornsztein", "jerryjliu", "ong", "piroz", "pycui", "ravi03071991"]
name = "llama-index-readers-google"
readme = "README.md"
-version = "0.1.4"
+version = "0.1.5"
[tool.poetry.dependencies]
python = ">=3.10,<4.0"