From 6a08a96270b5ef0ddf829fa2d94835d0f91ae2cf Mon Sep 17 00:00:00 2001 From: Jonathan Liu <81734282+jonathanhliu21@users.noreply.github.com> Date: Wed, 26 Jun 2024 13:51:30 -0700 Subject: [PATCH] Creates Data Loader for Google Chat (#14397) --- .../data_connectors/GoogleChatDemo.ipynb | 471 ++++++++++++++++++ .../llama-index-readers-google/README.md | 15 + .../llama_index/readers/google/__init__.py | 2 + .../llama_index/readers/google/chat/BUILD | 1 + .../llama_index/readers/google/chat/README.md | 42 ++ .../readers/google/chat/__init__.py | 0 .../llama_index/readers/google/chat/base.py | 271 ++++++++++ .../llama-index-readers-google/pyproject.toml | 3 +- 8 files changed, 804 insertions(+), 1 deletion(-) create mode 100644 docs/docs/examples/data_connectors/GoogleChatDemo.ipynb create mode 100644 llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/chat/BUILD create mode 100644 llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/chat/README.md create mode 100644 llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/chat/__init__.py create mode 100644 llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/chat/base.py diff --git a/docs/docs/examples/data_connectors/GoogleChatDemo.ipynb b/docs/docs/examples/data_connectors/GoogleChatDemo.ipynb new file mode 100644 index 0000000000000..08d67c5a98f6e --- /dev/null +++ b/docs/docs/examples/data_connectors/GoogleChatDemo.ipynb @@ -0,0 +1,471 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Google Chat Reader Test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Demonstrates our Google Chat data connector.\n", + "\n", + "If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: llama-index in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (0.10.47)\n", + "Requirement already satisfied: llama-index-readers-google in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (0.2.8)\n", + "Requirement already satisfied: llama-index-agent-openai<0.3.0,>=0.1.4 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index) (0.2.7)\n", + "Requirement already satisfied: llama-index-cli<0.2.0,>=0.1.2 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index) (0.1.12)\n", + "Requirement already satisfied: llama-index-core==0.10.47 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index) (0.10.47)\n", + "Requirement already satisfied: llama-index-embeddings-openai<0.2.0,>=0.1.5 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index) (0.1.10)\n", + "Requirement already satisfied: llama-index-indices-managed-llama-cloud<0.2.0,>=0.1.2 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index) (0.1.6)\n", + "Requirement already satisfied: llama-index-legacy<0.10.0,>=0.9.48 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index) (0.9.48)\n", + "Requirement already satisfied: llama-index-llms-openai<0.2.0,>=0.1.13 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index) (0.1.22)\n", + "Requirement already satisfied: llama-index-multi-modal-llms-openai<0.2.0,>=0.1.3 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index) (0.1.6)\n", + "Requirement already satisfied: llama-index-program-openai<0.2.0,>=0.1.3 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index) (0.1.6)\n", + "Requirement already satisfied: llama-index-question-gen-openai<0.2.0,>=0.1.2 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index) (0.1.3)\n", + "Requirement already satisfied: llama-index-readers-file<0.2.0,>=0.1.4 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index) (0.1.25)\n", + "Requirement already satisfied: llama-index-readers-llama-parse<0.2.0,>=0.1.2 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index) (0.1.4)\n", + "Requirement already satisfied: PyYAML>=6.0.1 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (6.0.1)\n", + "Requirement already satisfied: SQLAlchemy>=1.4.49 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from SQLAlchemy[asyncio]>=1.4.49->llama-index-core==0.10.47->llama-index) (2.0.31)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.6 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (3.9.5)\n", + "Requirement already satisfied: dataclasses-json in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (0.6.7)\n", + "Requirement already satisfied: deprecated>=1.2.9.3 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (1.2.14)\n", + "Requirement already satisfied: dirtyjson<2.0.0,>=1.0.8 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (1.0.8)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (2024.6.0)\n", + "Requirement already satisfied: httpx in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (0.27.0)\n", + "Requirement already satisfied: llamaindex-py-client<0.2.0,>=0.1.18 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (0.1.19)\n", + "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (1.6.0)\n", + "Requirement already satisfied: networkx>=3.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (3.3)\n", + "Requirement already satisfied: nltk<4.0.0,>=3.8.1 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (3.8.1)\n", + "Requirement already satisfied: numpy<2.0.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (1.26.4)\n", + "Requirement already satisfied: openai>=1.1.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (1.35.3)\n", + "Requirement already satisfied: pandas in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (2.2.2)\n", + "Requirement already satisfied: pillow>=9.0.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (10.3.0)\n", + "Requirement already satisfied: requests>=2.31.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (2.32.3)\n", + "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.2.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (8.4.1)\n", + "Requirement already satisfied: tiktoken>=0.3.3 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (0.7.0)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.66.1 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (4.66.4)\n", + "Requirement already satisfied: typing-extensions>=4.5.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (4.12.2)\n", + "Requirement already satisfied: typing-inspect>=0.8.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (0.9.0)\n", + "Requirement already satisfied: wrapt in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-core==0.10.47->llama-index) (1.16.0)\n", + "Requirement already satisfied: gkeepapi<0.16.0,>=0.15.1 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-readers-google) (0.15.1)\n", + "Requirement already satisfied: google-api-python-client<3.0.0,>=2.115.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-readers-google) (2.134.0)\n", + "Requirement already satisfied: google-auth-httplib2<0.3.0,>=0.2.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-readers-google) (0.2.0)\n", + "Requirement already satisfied: google-auth-oauthlib<2.0.0,>=1.2.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-readers-google) (1.2.0)\n", + "Requirement already satisfied: pydrive<2.0.0,>=1.3.1 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-readers-google) (1.3.1)\n", + "Requirement already satisfied: gpsoauth>=1.0.3 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from gkeepapi<0.16.0,>=0.15.1->llama-index-readers-google) (1.1.1)\n", + "Requirement already satisfied: future>=0.16.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from gkeepapi<0.16.0,>=0.15.1->llama-index-readers-google) (1.0.0)\n", + "Requirement already satisfied: httplib2<1.dev0,>=0.19.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from google-api-python-client<3.0.0,>=2.115.0->llama-index-readers-google) (0.22.0)\n", + "Requirement already satisfied: google-auth!=2.24.0,!=2.25.0,<3.0.0.dev0,>=1.32.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from google-api-python-client<3.0.0,>=2.115.0->llama-index-readers-google) (2.30.0)\n", + "Requirement already satisfied: google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0.dev0,>=1.31.5 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from google-api-python-client<3.0.0,>=2.115.0->llama-index-readers-google) (2.19.0)\n", + "Requirement already satisfied: uritemplate<5,>=3.0.1 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from google-api-python-client<3.0.0,>=2.115.0->llama-index-readers-google) (4.1.1)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from google-auth-oauthlib<2.0.0,>=1.2.0->llama-index-readers-google) (2.0.0)\n", + "Requirement already satisfied: beautifulsoup4<5.0.0,>=4.12.3 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-readers-file<0.2.0,>=0.1.4->llama-index) (4.12.3)\n", + "Requirement already satisfied: pypdf<5.0.0,>=4.0.1 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-readers-file<0.2.0,>=0.1.4->llama-index) (4.2.0)\n", + "Requirement already satisfied: striprtf<0.0.27,>=0.0.26 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-readers-file<0.2.0,>=0.1.4->llama-index) (0.0.26)\n", + "Requirement already satisfied: llama-parse<0.5.0,>=0.4.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llama-index-readers-llama-parse<0.2.0,>=0.1.2->llama-index) (0.4.4)\n", + "Requirement already satisfied: oauth2client>=4.0.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from pydrive<2.0.0,>=1.3.1->llama-index-readers-google) (4.1.3)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core==0.10.47->llama-index) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core==0.10.47->llama-index) (23.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core==0.10.47->llama-index) (1.4.1)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core==0.10.47->llama-index) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core==0.10.47->llama-index) (1.9.4)\n", + "Requirement already satisfied: soupsieve>1.2 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from beautifulsoup4<5.0.0,>=4.12.3->llama-index-readers-file<0.2.0,>=0.1.4->llama-index) (2.5)\n", + "Requirement already satisfied: googleapis-common-protos<2.0.dev0,>=1.56.2 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0.dev0,>=1.31.5->google-api-python-client<3.0.0,>=2.115.0->llama-index-readers-google) (1.63.1)\n", + "Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0.dev0,>=3.19.5 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0.dev0,>=1.31.5->google-api-python-client<3.0.0,>=2.115.0->llama-index-readers-google) (4.25.3)\n", + "Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.3 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0.dev0,>=1.31.5->google-api-python-client<3.0.0,>=2.115.0->llama-index-readers-google) (1.24.0)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from google-auth!=2.24.0,!=2.25.0,<3.0.0.dev0,>=1.32.0->google-api-python-client<3.0.0,>=2.115.0->llama-index-readers-google) (5.3.3)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from google-auth!=2.24.0,!=2.25.0,<3.0.0.dev0,>=1.32.0->google-api-python-client<3.0.0,>=2.115.0->llama-index-readers-google) (0.4.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from google-auth!=2.24.0,!=2.25.0,<3.0.0.dev0,>=1.32.0->google-api-python-client<3.0.0,>=2.115.0->llama-index-readers-google) (4.9)\n", + "Requirement already satisfied: pycryptodomex>=3.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from gpsoauth>=1.0.3->gkeepapi<0.16.0,>=0.15.1->llama-index-readers-google) (3.20.0)\n", + "Requirement already satisfied: urllib3<2.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from gpsoauth>=1.0.3->gkeepapi<0.16.0,>=0.15.1->llama-index-readers-google) (1.26.19)\n", + "Requirement already satisfied: pyparsing!=3.0.0,!=3.0.1,!=3.0.2,!=3.0.3,<4,>=2.4.2 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from httplib2<1.dev0,>=0.19.0->google-api-python-client<3.0.0,>=2.115.0->llama-index-readers-google) (3.1.2)\n", + "Requirement already satisfied: pydantic>=1.10 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from llamaindex-py-client<0.2.0,>=0.1.18->llama-index-core==0.10.47->llama-index) (2.7.4)\n", + "Requirement already satisfied: anyio in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from httpx->llama-index-core==0.10.47->llama-index) (4.4.0)\n", + "Requirement already satisfied: certifi in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from httpx->llama-index-core==0.10.47->llama-index) (2024.6.2)\n", + "Requirement already satisfied: httpcore==1.* in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from httpx->llama-index-core==0.10.47->llama-index) (1.0.5)\n", + "Requirement already satisfied: idna in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from httpx->llama-index-core==0.10.47->llama-index) (3.7)\n", + "Requirement already satisfied: sniffio in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from httpx->llama-index-core==0.10.47->llama-index) (1.3.1)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from httpcore==1.*->httpx->llama-index-core==0.10.47->llama-index) (0.14.0)\n", + "Requirement already satisfied: click in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from nltk<4.0.0,>=3.8.1->llama-index-core==0.10.47->llama-index) (8.1.7)\n", + "Requirement already satisfied: joblib in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from nltk<4.0.0,>=3.8.1->llama-index-core==0.10.47->llama-index) (1.4.2)\n", + "Requirement already satisfied: regex>=2021.8.3 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from nltk<4.0.0,>=3.8.1->llama-index-core==0.10.47->llama-index) (2024.5.15)\n", + "Requirement already satisfied: pyasn1>=0.1.7 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from oauth2client>=4.0.0->pydrive<2.0.0,>=1.3.1->llama-index-readers-google) (0.6.0)\n", + "Requirement already satisfied: six>=1.6.1 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from oauth2client>=4.0.0->pydrive<2.0.0,>=1.3.1->llama-index-readers-google) (1.16.0)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from openai>=1.1.0->llama-index-core==0.10.47->llama-index) (1.9.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from requests>=2.31.0->llama-index-core==0.10.47->llama-index) (3.3.2)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2.0.0,>=1.2.0->llama-index-readers-google) (3.2.2)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from SQLAlchemy>=1.4.49->SQLAlchemy[asyncio]>=1.4.49->llama-index-core==0.10.47->llama-index) (3.0.3)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from typing-inspect>=0.8.0->llama-index-core==0.10.47->llama-index) (1.0.0)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from dataclasses-json->llama-index-core==0.10.47->llama-index) (3.21.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from pandas->llama-index-core==0.10.47->llama-index) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from pandas->llama-index-core==0.10.47->llama-index) (2024.1)\n", + "Requirement already satisfied: tzdata>=2022.7 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from pandas->llama-index-core==0.10.47->llama-index) (2024.1)\n", + "Requirement already satisfied: packaging>=17.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->llama-index-core==0.10.47->llama-index) (24.1)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from pydantic>=1.10->llamaindex-py-client<0.2.0,>=0.1.18->llama-index-core==0.10.47->llama-index) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.18.4 in /Users/jonathanliu/Library/Caches/pypoetry/virtualenvs/llama-index-RvIdVF4_-py3.11/lib/python3.11/site-packages (from pydantic>=1.10->llamaindex-py-client<0.2.0,>=0.1.18->llama-index-core==0.10.47->llama-index) (2.18.4)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install llama-index llama-index-readers-google" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This loader takes in IDs of Google Chat spaces or messages and parses the chat history into `Document`s. The space/message ID can be found in the URL, as shown below:\n", + "\n", + "- mail.google.com/chat/u/0/#chat/space/****\n", + "\n", + "Before using this loader, you need to create a Google Cloud Platform (GCP) project with a Google Workspace account. Then, you need to authorize the app with user credentials. Follow the prerequisites and steps 1 and 2 of [this guide](https://developers.google.com/workspace/chat/authenticate-authorize-chat-user). After downloading the client secret JSON file, rename it as **`credentials.json`** and save it into your project folder.\n", + "\n", + "This example parses a chat between two users. They first discuss math homework, then they plan a trip to San Francisco in a thread. At the end, they discuss finishing an essay. See the full thread [here](https://pastebin.com/FrYscMAa)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Basic Usage" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The example below loads the entire chat history into a `SummaryIndex`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from llama_index.core import SummaryIndex\n", + "from llama_index.readers.google import GoogleChatReader\n", + "\n", + "space_ids = [\n", + " \"AAAAtTPwdzg\"\n", + "] # The Google account you authenticated with must have access to this space\n", + "reader = GoogleChatReader()\n", + "docs = reader.load_data(space_names=space_ids)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "index = SummaryIndex.from_documents(docs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "query_engine = index.as_query_engine()\n", + "response = query_engine.query(\"What was the overall conversation about?\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "The overall conversation was about discussing and planning a trip to San Francisco, including visiting various landmarks and using public transportation to get around the city. Additionally, there was a brief mention of finishing homework and essays before the trip." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from IPython.display import Markdown, display\n", + "\n", + "display(Markdown(f\"{response}\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Filtering and Ordering" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Ordering\n", + "You can order the chat history by ascending or descending order." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "1. Visiting San Francisco\n", + "2. Planning a trip itinerary in San Francisco\n", + "3. Taking public transit to San Francisco\n", + "4. Discussing transportation options in San Francisco\n", + "5. Working on a math problem together" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "docs = reader.load_data(space_names=space_ids, order_asc=False)\n", + "\n", + "index = SummaryIndex.from_documents(docs)\n", + "query_engine = index.as_query_engine()\n", + "response = query_engine.query(\n", + " \"List the things that the users discussed in the order they were discussed in. Make the list short.\"\n", + ")\n", + "display(Markdown(f\"{response}\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Even though the messages were retrieved in reverse order, the list is still in the correct order because messages have a timestamp in their metadata." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Message Limiting\n", + "Messages can be limited to a certain number using the `num_messages` parameter. However, the number of messages that are loaded may not be exactly this number. If `order_asc` is True, then takes the first `num_messages` messages within the given time frame. If `order_desc` is True, then takes the last `num_messages` messages within the time frame." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "The conversation revolved around a student seeking help with their math homework, specifically understanding and applying the chain rule in calculus to find the derivative of a function involving cosine. The student was stuck on problem 4b and needed assistance with taking the derivative of cos(2x). The other participant explained the application of the chain rule step by step, leading to the correct derivative of -2sin(2x). The student expressed gratitude for the explanation and indicated that they could now proceed with solving the remaining problems." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "docs = reader.load_data(\n", + " space_names=space_ids, num_messages=10\n", + ") # in ascending order, only contains messages about math HW\n", + "\n", + "index = SummaryIndex.from_documents(docs)\n", + "query_engine = index.as_query_engine()\n", + "response = query_engine.query(\"What was discussed in this conversation?\")\n", + "display(Markdown(f\"{response}\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice that the summary is only about the first 10 messages, which only involves help on the math homework. Below is an example of retrieving the last 16 messages, which only involves the essay. The \"cost of a trip\" refers to a reply in the SF trip thread that was made during the discussion of the essay." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "The conversation revolved around the completion of an essay, specifically focusing on the contrast between old money and new money rather than the American Dream and The Great Gatsby. There were mentions of procrastination, starting work on the essay, and concerns about the cost of a trip." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "docs = reader.load_data(\n", + " space_names=space_ids, num_messages=16, order_asc=False\n", + ") # in descending order, only contains messages about essay\n", + "\n", + "index = SummaryIndex.from_documents(docs)\n", + "query_engine = index.as_query_engine()\n", + "response = query_engine.query(\"What was discussed in this conversation?\")\n", + "display(Markdown(f\"{response}\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Time Frame\n", + "\n", + "A `before` and `after` time frame can also be specified. These parameters take in `datetime` objects." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "The conversation revolved around a student seeking help with understanding the chain rule in calculus, specifically how to take the derivative of cos(2x). The student was stuck on problem 4b and requested assistance, to which the other student explained the application of the chain rule step by step. The explanation clarified how to differentiate cos(2x) using the chain rule, resulting in the derivative being -2sin(2x). The student who sought help expressed gratitude and indicated that they could now proceed with solving the remaining problems." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import datetime\n", + "\n", + "date1 = datetime.datetime.fromisoformat(\n", + " \"2024-06-25 14:27:00-07:00\"\n", + ") # when they start talking about trip\n", + "docs = reader.load_data(space_names=space_ids, before=date1)\n", + "\n", + "index = SummaryIndex.from_documents(docs)\n", + "query_engine = index.as_query_engine()\n", + "response = query_engine.query(\n", + " \"What was discussed in this conversation?\"\n", + ") # should only be about math HW\n", + "display(Markdown(f\"{response}\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "The conversation revolved around finishing an essay on the contrast between old money and new money, concerns about the cost of a trip, and reassurances that transportation expenses would be affordable." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "date2 = datetime.datetime.fromisoformat(\n", + " \"2024-06-25 14:51:00-07:00\"\n", + ") # when they start talking about essay\n", + "docs = reader.load_data(space_names=space_ids, after=date2)\n", + "\n", + "index = SummaryIndex.from_documents(docs)\n", + "query_engine = index.as_query_engine()\n", + "response = query_engine.query(\n", + " \"What was discussed in this conversation?\"\n", + ") # should only be about essay + cost of trip (in thread)\n", + "display(Markdown(f\"{response}\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "The conversation revolved around planning a trip to San Francisco for the weekend. They discussed visiting various landmarks such as the Golden Gate Bridge, Fisherman's Wharf, and the Ferry Building, with the possibility of going to Alcatraz or Twin Peaks if time allowed. They also talked about transportation options like taking Caltrain or BART, renting a scooter or BayWheels bike, and using public transit to move around the city. Additionally, they mentioned exploring Chinatown and taking bus routes back to the city center for flexibility." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "docs = reader.load_data(space_names=space_ids, after=date1, before=date2)\n", + "\n", + "index = SummaryIndex.from_documents(docs)\n", + "query_engine = index.as_query_engine()\n", + "response = query_engine.query(\n", + " \"What was discussed in this conversation?\"\n", + ") # should only be about trip\n", + "display(Markdown(f\"{response}\"))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "llama-index-RvIdVF4_-py3.11", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/llama-index-integrations/readers/llama-index-readers-google/README.md b/llama-index-integrations/readers/llama-index-readers-google/README.md index 7ec90499d19e7..701f2979039ab 100644 --- a/llama-index-integrations/readers/llama-index-readers-google/README.md +++ b/llama-index-integrations/readers/llama-index-readers-google/README.md @@ -3,6 +3,7 @@ Effortlessly incorporate Google-based data loaders into your Python workflow using LlamaIndex. Unlock the potential of various readers to enhance your data loading capabilities, including: - Google Calendar +- Google Chat - Google Docs - Google Drive - Gmail @@ -78,3 +79,17 @@ documents = loader.load_data( index = VectorStoreIndex.from_documents(documents) index.query("Which Turkish restaurant has the best reviews?") ``` + +### Google Chat Reader + +```py +from llama_index.readers.google import GoogleChatReader +from llama_index.core import VectorStoreIndex + +space_names = [""] +chatReader = GoogleChatReader() +docs = chatReader.load_data(space_names=space_names) +index = VectorStoreIndex.from_documents(docs) +query_eng = index.as_query_engine() +print(query_eng.query("What was this conversation about?")) +``` diff --git a/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/__init__.py b/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/__init__.py index 9593188736e0f..02501814b9a25 100644 --- a/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/__init__.py +++ b/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/__init__.py @@ -1,4 +1,5 @@ from llama_index.readers.google.calendar.base import GoogleCalendarReader +from llama_index.readers.google.chat.base import GoogleChatReader from llama_index.readers.google.docs.base import GoogleDocsReader from llama_index.readers.google.drive.base import GoogleDriveReader from llama_index.readers.google.gmail.base import GmailReader @@ -14,4 +15,5 @@ "GmailReader", "GoogleKeepReader", "GoogleMapsTextSearchReader", + "GoogleChatReader", ] diff --git a/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/chat/BUILD b/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/chat/BUILD new file mode 100644 index 0000000000000..db46e8d6c978c --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/chat/BUILD @@ -0,0 +1 @@ +python_sources() diff --git a/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/chat/README.md b/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/chat/README.md new file mode 100644 index 0000000000000..bfe0848bea424 --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/chat/README.md @@ -0,0 +1,42 @@ +# Google Chat Loader + +`pip install llama-index-readers-google` + +This loader takes in IDs of Google Chat spaces or messages and parses the chat history into `Document`s. The space/message ID can be found in the URL, as shown below: + +- mail.google.com/chat/u/0/#chat/space/**\** + +Before using this loader, you need to create a Google Cloud Platform (GCP) project with a Google Workspace account. Then, you need to authorize the app with user credentials. Follow the prerequisites and steps 1 and 2 of [this guide](https://developers.google.com/workspace/chat/authenticate-authorize-chat-user). After downloading the client secret JSON file, rename it as **`credentials.json`** and save it into your project folder. + +## Usage + +To use this loader, pass in an array of Google Chat IDs. + +```py +from llama_index.readers.google import GoogleChatReader + +space_names = [""] +chatReader = GoogleChatReader() +docs = chatReader.load_data(space_names=space_names) +``` + +There are also additional parameters that allow you to specify which chat messages you want to read: + +- `num_messages`: The number of messages to load (may not be exact). If `order_asc` is True, then loads `num_messages` from the beginning of the chat. If `order_asc` is False, then loads `num_messages` from the end of the chat. +- `after`: Only loads messages after this timestamp (a datetime object) +- `before`: Only loads messages before this timestamp (a datetime object) +- `order_asc`: If True, then orders messages in ascending order. Otherwise orders messages in descending order. + +## Examples + +```py +from llama_index.readers.google import GoogleChatReader +from llama_index.core import VectorStoreIndex + +space_names = [""] +chatReader = GoogleChatReader() +docs = chatReader.load_data(space_names=space_names) +index = VectorStoreIndex.from_documents(docs) +query_eng = index.as_query_engine() +print(query_eng.query("What was this conversation about?")) +``` diff --git a/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/chat/__init__.py b/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/chat/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/chat/base.py b/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/chat/base.py new file mode 100644 index 0000000000000..6b821ec47c4de --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/chat/base.py @@ -0,0 +1,271 @@ +"""Google Chat Reader.""" + +import logging +from datetime import datetime +from typing import Any, List, Dict + +from llama_index.core.readers.base import BasePydanticReader +from llama_index.core.schema import Document + +logger = logging.getLogger(__name__) + +SCOPES = [ + "https://www.googleapis.com/auth/chat.messages.readonly", +] + + +class GoogleChatReader(BasePydanticReader): + """Google Chat Reader. + + Reads messages from Google Chat + """ + + is_remote: bool = True + + @classmethod + def class_name(cls) -> str: + """Gets name identifier of class.""" + return "GoogleChatReader" + + def load_data( + self, + space_names: List[str], + num_messages: int = -1, + after: datetime = None, + before: datetime = None, + order_asc: bool = True, + ) -> List[Document]: + """Loads documents from Google Chat. + + Args: + space_name (List[str]): List of Space ID names found at top of URL (without the "space/"). + num_messages (int, optional): Number of messages to load (may exceed this number). If -1, then loads all messages. Defaults to -1. + after (datetime, optional): Only search for messages after this datetime (UTC). Defaults to None. + before (datetime, optional): Only search for messages before this datetime (UTC). Defaults to None. + order_asc (bool, optional): If messages should be ordered by ascending time order. Defaults to True. + + Returns: + List[Document]: List of document objects + """ + from googleapiclient.discovery import build + + # get credentials and create chat service + credentials = self._get_credentials() + service = build("chat", "v1", credentials=credentials) + + logger.info("Credentials successfully obtained.") + + res = [] + for space_name in space_names: + all_msgs = self._get_msgs( + service, space_name, num_messages, after, before, order_asc + ) # gets raw API output in list of dict + msgs_sorted = self._sort_msgs( + space_name, all_msgs + ) # puts messages into list of Document objects + res.extend(msgs_sorted) + logger.info(f"Successfully retrieved messages from {space_name}") + + return res + + def _sort_msgs(self, space_name: str, all_msgs: List[Dict[str, Any]]) -> Document: + """Sorts messages from space and puts them into Document. + + Args: + space_name (str): Space ID + all_msgs (List[Dict[str, Any]]): All messages + order_asc (bool): If ordered by ascending order + + Returns: + Document: Document with messages + """ + res = [] + id_to_text = self._id_to_text( + all_msgs + ) # maps message ID to text (useful for retrieving info about quote replies) + thread_msg_cnt = self._get_thread_msg_cnt( + all_msgs + ) # gets message count in each thread + for msg in all_msgs: + if any( + i not in msg for i in ("name", "text", "thread", "sender", "createTime") + ): + # invalid message + continue + + if "name" not in msg["thread"] or "name" not in msg["sender"]: + # invalid message + continue + + metadata = { + "space_id": space_name, + "sender_id": msg["sender"]["name"], + "timestamp": msg["createTime"], + } + + if ( + "quotedMessageMetadata" in msg + and msg["quotedMessageMetadata"]["name"] in id_to_text + ): + # metadata for a quote reply + metadata["quoted_msg"] = id_to_text[ + msg["quotedMessageMetadata"]["name"] + ] + + # adds metadata for threads + # all threads with a message count of 1 gets counted as the "main thread" + thread_id = msg["thread"]["name"] + if thread_msg_cnt[thread_id] > 1: + metadata["thread_id"] = thread_id + else: + metadata["thread_id"] = "Main Thread" + + doc = Document(id_=msg["name"], text=msg["text"], metadata=metadata) + res.append(doc) + + return res + + def _id_to_text(self, all_msgs: List[Dict[str, Any]]) -> Dict[str, str]: + """Maps message ID to text, used for quote replies. + + Args: + all_msgs (List[Dict[str, Any]]): All messages + + Returns: + Dict[str, str]: Map message ID -> message text + """ + res = {} + + for msg in all_msgs: + if "text" not in msg or "name" not in msg: + continue + + res[msg["name"]] = msg["text"] + + return res + + def _get_thread_msg_cnt(self, all_msgs: List[Dict[str, Any]]) -> Dict[str, int]: + """Gets message count for each thread ID. + + Args: + all_msgs (List[Dict[str, Any]]): All messages + + Returns: + Dict[str, int]: Maps thread ID -> count of messages that were in that thread + """ + # maps thread ID -> count + threads_dict = {} + for msg in all_msgs: + thread_name = msg["thread"]["name"] + if thread_name not in threads_dict: + # add thread name to dict + threads_dict[thread_name] = 1 + else: + threads_dict[thread_name] += 1 + + return threads_dict + + def _get_msgs( + self, + service: Any, + space_name: str, + num_messages: int = -1, + after: datetime = None, + before: datetime = None, + order_asc: bool = True, + ) -> List[Dict[str, Any]]: + """Puts raw API output of chat messages from one space into a list. + + Args: + service (Any): Google Chat API service object + space_name (str): Space ID name found at top of URL (without the "space/"). + num_messages (int, optional): Number of messages to load (may exceed this number). If -1, then loads all messages. Defaults to -1. + after (datetime, optional): Only search for messages after this datetime (UTC). Defaults to None. + before (datetime, optional): Only search for messages before this datetime (UTC). Defaults to None. + order_asc (bool, optional): If messages should be ordered by ascending time order. Defaults to True. + + Returns: + List[Dict[str, Any]]: List of message objects + """ + all_msgs = [] + + # API parameters + parent = f"spaces/{space_name}" + page_token = "" + filter_str = "" + if after is not None: + offset_str = "" + if after.utcoffset() is None: + offset_str = "+00:00" + filter_str += f'createTime > "{after.isoformat("T") + offset_str}" AND ' + if before is not None: + offset_str = "" + if before.utcoffset() is None: + offset_str = "+00:00" + filter_str += f'createTime < "{before.isoformat("T") + offset_str}" AND ' + filter_str = filter_str[:-4] + order_by = f"createTime {'ASC' if order_asc else 'DESC'}" + + # Get all messages from space + while num_messages == -1 or len(all_msgs) < num_messages: + req_msg = num_messages - len(all_msgs) + + result = ( + service.spaces() + .messages() + .list( + parent=parent, + pageSize=req_msg if num_messages != -1 else 1000, + pageToken=page_token, + filter=filter_str, + orderBy=order_by, + showDeleted=False, + ) + .execute() + ) + + if result and "messages" in result: + all_msgs.extend(result["messages"]) + + # if no more messages to load + if not result or "nextPageToken" not in result: + break + + page_token = result["nextPageToken"] + + return all_msgs + + def _get_credentials(self) -> Any: + """Get valid user credentials from storage. + + The file token.json stores the user's access and refresh tokens, and is + created automatically when the authorization flow completes for the first + time. + + Returns: + Credentials, the obtained credential. + """ + import os + + from google_auth_oauthlib.flow import InstalledAppFlow + from google.auth.transport.requests import Request + + from google.oauth2.credentials import Credentials + + creds = None + if os.path.exists("token.json"): + creds = Credentials.from_authorized_user_file("token.json", SCOPES) + # If there are no (valid) credentials available, let the user log in. + if not creds or not creds.valid: + if creds and creds.expired and creds.refresh_token: + creds.refresh(Request()) + else: + flow = InstalledAppFlow.from_client_secrets_file( + "credentials.json", SCOPES + ) + creds = flow.run_local_server(port=0) + # Save the credentials for the next run + with open("token.json", "w") as token: + token.write(creds.to_json()) + + return creds diff --git a/llama-index-integrations/readers/llama-index-readers-google/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-google/pyproject.toml index 8667f540131da..167432e5f6a99 100644 --- a/llama-index-integrations/readers/llama-index-readers-google/pyproject.toml +++ b/llama-index-integrations/readers/llama-index-readers-google/pyproject.toml @@ -14,6 +14,7 @@ import_path = "llama_index.readers.google" [tool.llamahub.class_authors] GmailReader = "bbornsztein" GoogleCalendarReader = "ong" +GoogleChatReader = "jonathanhliu21" GoogleDocsReader = "jerryjliu" GoogleDriveReader = "ravi03071991" GoogleKeepReader = "pycui" @@ -46,7 +47,7 @@ maintainers = [ ] name = "llama-index-readers-google" readme = "README.md" -version = "0.2.8" +version = "0.2.9" [tool.poetry.dependencies] python = ">=3.10,<4.0"