diff --git a/examples/quivr-whisper/.env_example b/examples/quivr-whisper/.env_example new file mode 100644 index 000000000000..63cb0d4eb89f --- /dev/null +++ b/examples/quivr-whisper/.env_example @@ -0,0 +1,5 @@ +QUIVR_API_KEY=XXXX +QUIVR_CHAT_ID=1XXXX +QUIVR_BRAIN_ID=XXXX +QUIVR_URL=XXXX +OPENAI_API_KEY=XXXX diff --git a/examples/quivr-whisper/.gitignore b/examples/quivr-whisper/.gitignore new file mode 100644 index 000000000000..4c49bd78f1d0 --- /dev/null +++ b/examples/quivr-whisper/.gitignore @@ -0,0 +1 @@ +.env diff --git a/examples/quivr-whisper/.python-version b/examples/quivr-whisper/.python-version new file mode 100644 index 000000000000..2419ad5b0a32 --- /dev/null +++ b/examples/quivr-whisper/.python-version @@ -0,0 +1 @@ +3.11.9 diff --git a/examples/quivr-whisper/README.md b/examples/quivr-whisper/README.md new file mode 100644 index 000000000000..8143f7787d41 --- /dev/null +++ b/examples/quivr-whisper/README.md @@ -0,0 +1,3 @@ +# quivr-whisper + +Describe your project here. diff --git a/examples/quivr-whisper/Readme.md b/examples/quivr-whisper/Readme.md new file mode 100644 index 000000000000..f0a6dbfc81ae --- /dev/null +++ b/examples/quivr-whisper/Readme.md @@ -0,0 +1,65 @@ +# Quivr-Whisper + +Quivr-Whisper is a web application that allows users to ask questions via audio input. It leverages OpenAI's Whisper model for speech transcription and synthesizes responses using OpenAI's text-to-speech capabilities. The application queries the Quivr API to get a response based on the transcribed audio input. + + + +https://github.com/StanGirard/quivr-whisper/assets/19614572/9cc270c9-07e4-4ce1-bcff-380f195c9313 + + + +## Features + +- Audio input for asking questions +- Speech transcription using OpenAI's Whisper model +- Integration with Quivr API for intelligent responses +- Speech synthesis of the response for audio playback + +## Getting Started + +These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. + +### Prerequisites + +What things you need to install the software and how to install them: + +- Python 3.6+ +- pip for Python 3 +- Flask +- OpenAI Python package +- Requests package + +### Installing + +A step by step series of examples that tell you how to get a development environment running: + +1. Clone the repository to your local machine. +```bash +git clone https://github.com/stangirard/quivr-whisper.git +cd Quivr-talk +``` + +2. Install the required packages. +```bash +pip install flask openai requests python-dotenv +``` + +3. Create a `.env` file in the root directory of the project and add your API keys and other configuration variables. +```env +OPENAI_API_KEY='your_openai_api_key' +QUIVR_API_KEY='your_quivr_api_key' +QUIVR_CHAT_ID='your_quivr_chat_id' +QUIVR_BRAIN_ID='your_quivr_brain_id' +QUIVR_URL='https://api.quivr.app' # Optional, only if different from the default +``` + +4. Run the Flask application. +```bash +flask run +``` + +Your app should now be running on `http://localhost:5000`. + +## Usage + +To use Quivr-talk, navigate to `http://localhost:5000` in your web browser, click on "Ask a question to Quivr", and record your question. Wait for the transcription and response to be synthesized, and you will hear the response played back to you. diff --git a/examples/quivr-whisper/app.py b/examples/quivr-whisper/app.py new file mode 100644 index 000000000000..1ae27eac2399 --- /dev/null +++ b/examples/quivr-whisper/app.py @@ -0,0 +1,78 @@ +from flask import Flask, render_template, request, jsonify +import openai +import base64 +import os +import requests +from dotenv import load_dotenv +from tempfile import NamedTemporaryFile + +app = Flask(__name__) +load_dotenv() +openai.api_key = os.getenv("OPENAI_API_KEY") + +quivr_token = os.getenv("QUIVR_API_KEY", "") +quivr_chat_id = os.getenv("QUIVR_CHAT_ID", "") +quivr_brain_id = os.getenv("QUIVR_BRAIN_ID", "") +quivr_url = ( + os.getenv("QUIVR_URL", "https://api.quivr.app") + + f"/chat/{quivr_chat_id}/question?brain_id={quivr_brain_id}" +) + +headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {quivr_token}", +} + + +@app.route("/") +def index(): + return render_template("index.html") + + +@app.route("/transcribe", methods=["POST"]) +def transcribe_audio(): + audio_file = request.files["audio_data"] + transcript = transcribe_audio_file(audio_file) + quivr_response = ask_quivr_question(transcript) + audio_base64 = synthesize_speech(quivr_response) + return jsonify({"audio_base64": audio_base64}) + + +def transcribe_audio_file(audio_file): + with NamedTemporaryFile(suffix=".webm", delete=False) as temp_audio_file: + audio_file.save(temp_audio_file) + temp_audio_file_path = temp_audio_file.name + + try: + with open(temp_audio_file_path, "rb") as f: + transcript_response = openai.audio.transcriptions.create( + model="whisper-1", file=f + ) + transcript = transcript_response.text + finally: + os.unlink(temp_audio_file_path) + + return transcript + + +def ask_quivr_question(transcript): + response = requests.post(quivr_url, headers=headers, json={"question": transcript}) + if response.status_code == 200: + quivr_response = response.json().get("assistant") + return quivr_response + else: + print(f"Error from Quivr API: {response.status_code}, {response.text}") + return "Sorry, I couldn't understand that." + + +def synthesize_speech(text): + speech_response = openai.audio.speech.create( + model="tts-1", voice="nova", input=text + ) + audio_content = speech_response.content + audio_base64 = base64.b64encode(audio_content).decode("utf-8") + return audio_base64 + + +if __name__ == "__main__": + app.run(debug=True) diff --git a/examples/quivr-whisper/pyproject.toml b/examples/quivr-whisper/pyproject.toml new file mode 100644 index 000000000000..457e6c90e392 --- /dev/null +++ b/examples/quivr-whisper/pyproject.toml @@ -0,0 +1,28 @@ +[project] +name = "quivr-whisper" +version = "0.1.0" +description = "Add your description here" +authors = [ + { name = "Stan Girard", email = "stan@quivr.app" } +] +dependencies = [ + "flask>=3.1.0", + "openai>=1.54.5", + "quivr-core>=0.0.24", +] +readme = "README.md" +requires-python = ">= 3.11" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.rye] +managed = true +dev-dependencies = [] + +[tool.hatch.metadata] +allow-direct-references = true + +[tool.hatch.build.targets.wheel] +packages = ["src/quivr_whisper"] diff --git a/examples/quivr-whisper/requirements-dev.lock b/examples/quivr-whisper/requirements-dev.lock new file mode 100644 index 000000000000..8e93ec1b5ff3 --- /dev/null +++ b/examples/quivr-whisper/requirements-dev.lock @@ -0,0 +1,713 @@ +# generated by rye +# use `rye lock` or `rye sync` to update this lockfile +# +# last locked with the following flags: +# pre: false +# features: [] +# all-features: false +# with-sources: false +# generate-hashes: false +# universal: false + +-e file:. +aiofiles==24.1.0 + # via quivr-core +aiohappyeyeballs==2.4.3 + # via aiohttp +aiohttp==3.11.6 + # via langchain + # via langchain-community + # via llama-index-core + # via llama-index-legacy +aiosignal==1.3.1 + # via aiohttp +annotated-types==0.7.0 + # via pydantic +anthropic==0.39.0 + # via langchain-anthropic +antlr4-python3-runtime==4.9.3 + # via omegaconf +anyio==4.6.2.post1 + # via anthropic + # via httpx + # via openai + # via starlette +attrs==24.2.0 + # via aiohttp +backoff==2.2.1 + # via megaparse + # via unstructured +beautifulsoup4==4.12.3 + # via llama-index-readers-file + # via unstructured +blinker==1.9.0 + # via flask +cachetools==5.5.0 + # via google-auth +certifi==2024.8.30 + # via httpcore + # via httpx + # via requests +cffi==1.17.1 + # via cryptography +chardet==5.2.0 + # via unstructured +charset-normalizer==3.4.0 + # via pdfminer-six + # via requests +click==8.1.7 + # via flask + # via llama-parse + # via nltk + # via python-oxmsg + # via uvicorn +cohere==5.11.4 + # via langchain-cohere +coloredlogs==15.0.1 + # via onnxruntime +contourpy==1.3.1 + # via matplotlib +cryptography==43.0.3 + # via pdfminer-six + # via unstructured-client +cycler==0.12.1 + # via matplotlib +dataclasses-json==0.6.7 + # via langchain-community + # via llama-index-core + # via llama-index-legacy + # via unstructured +defusedxml==0.7.1 + # via langchain-anthropic +deprecated==1.2.15 + # via llama-index-core + # via llama-index-legacy + # via pikepdf +dirtyjson==1.0.8 + # via llama-index-core + # via llama-index-legacy +distro==1.9.0 + # via anthropic + # via openai +effdet==0.4.1 + # via unstructured +emoji==2.14.0 + # via unstructured +et-xmlfile==2.0.0 + # via openpyxl +eval-type-backport==0.2.0 + # via unstructured-client +faiss-cpu==1.9.0.post1 + # via quivr-core +fastapi==0.115.5 + # via megaparse +fastavro==1.9.7 + # via cohere +filelock==3.16.1 + # via huggingface-hub + # via torch + # via transformers + # via triton +filetype==1.2.0 + # via llama-index-core + # via unstructured +flask==3.1.0 + # via quivr-whisper +flatbuffers==24.3.25 + # via onnxruntime +fonttools==4.55.0 + # via matplotlib +frozenlist==1.5.0 + # via aiohttp + # via aiosignal +fsspec==2024.10.0 + # via huggingface-hub + # via llama-index-core + # via llama-index-legacy + # via torch +google-api-core==2.23.0 + # via google-cloud-vision +google-auth==2.36.0 + # via google-api-core + # via google-cloud-vision +google-cloud-vision==3.8.1 + # via unstructured +googleapis-common-protos==1.66.0 + # via google-api-core + # via grpcio-status +greenlet==3.1.1 + # via playwright + # via sqlalchemy +grpcio==1.68.0 + # via google-api-core + # via grpcio-status +grpcio-status==1.68.0 + # via google-api-core +h11==0.14.0 + # via httpcore + # via uvicorn +httpcore==1.0.7 + # via httpx +httpx==0.27.2 + # via anthropic + # via cohere + # via langgraph-sdk + # via langsmith + # via llama-cloud + # via llama-index-core + # via llama-index-legacy + # via openai + # via quivr-core + # via unstructured-client +httpx-sse==0.4.0 + # via cohere + # via langgraph-sdk +huggingface-hub==0.26.2 + # via timm + # via tokenizers + # via transformers + # via unstructured-inference +humanfriendly==10.0 + # via coloredlogs +idna==3.10 + # via anyio + # via httpx + # via requests + # via yarl +iopath==0.1.10 + # via layoutparser +itsdangerous==2.2.0 + # via flask +jinja2==3.1.4 + # via flask + # via torch +jiter==0.7.1 + # via anthropic + # via openai +joblib==1.4.2 + # via nltk +jsonpatch==1.33 + # via langchain-core +jsonpath-python==1.0.6 + # via unstructured-client +jsonpointer==3.0.0 + # via jsonpatch +kiwisolver==1.4.7 + # via matplotlib +langchain==0.2.17 + # via langchain-community + # via megaparse + # via quivr-core +langchain-anthropic==0.1.23 + # via megaparse + # via quivr-core +langchain-cohere==0.2.4 + # via quivr-core +langchain-community==0.2.19 + # via langchain-experimental + # via megaparse + # via quivr-core +langchain-core==0.2.43 + # via langchain + # via langchain-anthropic + # via langchain-cohere + # via langchain-community + # via langchain-experimental + # via langchain-openai + # via langchain-text-splitters + # via langgraph + # via langgraph-checkpoint + # via megaparse + # via quivr-core +langchain-experimental==0.0.65 + # via langchain-cohere +langchain-openai==0.1.25 + # via megaparse + # via quivr-core +langchain-text-splitters==0.2.4 + # via langchain +langdetect==1.0.9 + # via unstructured +langgraph==0.2.52 + # via quivr-core +langgraph-checkpoint==2.0.5 + # via langgraph +langgraph-sdk==0.1.36 + # via langgraph +langsmith==0.1.143 + # via langchain + # via langchain-community + # via langchain-core +layoutparser==0.3.4 + # via unstructured-inference +llama-cloud==0.1.5 + # via llama-index-indices-managed-llama-cloud +llama-index==0.12.0 + # via megaparse +llama-index-agent-openai==0.4.0 + # via llama-index + # via llama-index-program-openai +llama-index-cli==0.4.0 + # via llama-index +llama-index-core==0.12.0 + # via llama-index + # via llama-index-agent-openai + # via llama-index-cli + # via llama-index-embeddings-openai + # via llama-index-indices-managed-llama-cloud + # via llama-index-llms-openai + # via llama-index-multi-modal-llms-openai + # via llama-index-program-openai + # via llama-index-question-gen-openai + # via llama-index-readers-file + # via llama-index-readers-llama-parse + # via llama-parse +llama-index-embeddings-openai==0.3.0 + # via llama-index + # via llama-index-cli +llama-index-indices-managed-llama-cloud==0.6.2 + # via llama-index +llama-index-legacy==0.9.48.post4 + # via llama-index +llama-index-llms-openai==0.3.0 + # via llama-index + # via llama-index-agent-openai + # via llama-index-cli + # via llama-index-multi-modal-llms-openai + # via llama-index-program-openai + # via llama-index-question-gen-openai +llama-index-multi-modal-llms-openai==0.3.0 + # via llama-index +llama-index-program-openai==0.3.0 + # via llama-index + # via llama-index-question-gen-openai +llama-index-question-gen-openai==0.3.0 + # via llama-index +llama-index-readers-file==0.4.0 + # via llama-index +llama-index-readers-llama-parse==0.4.0 + # via llama-index +llama-parse==0.5.14 + # via llama-index-readers-llama-parse + # via megaparse +lxml==5.3.0 + # via pikepdf + # via python-docx + # via python-pptx + # via unstructured +markdown==3.7 + # via unstructured +markdown-it-py==3.0.0 + # via rich +markupsafe==3.0.2 + # via jinja2 + # via quivr-core + # via werkzeug +marshmallow==3.23.1 + # via dataclasses-json +matplotlib==3.9.2 + # via pycocotools + # via unstructured-inference +mdurl==0.1.2 + # via markdown-it-py +megaparse==0.0.43 + # via quivr-core +mpmath==1.3.0 + # via sympy +msgpack==1.1.0 + # via langgraph-checkpoint +multidict==6.1.0 + # via aiohttp + # via yarl +mypy-extensions==1.0.0 + # via typing-inspect +nest-asyncio==1.6.0 + # via llama-index-core + # via llama-index-legacy + # via unstructured-client +networkx==3.4.2 + # via llama-index-core + # via llama-index-legacy + # via torch + # via unstructured +nltk==3.9.1 + # via llama-index + # via llama-index-core + # via llama-index-legacy + # via unstructured +numpy==1.26.4 + # via contourpy + # via faiss-cpu + # via langchain + # via langchain-community + # via layoutparser + # via llama-index-core + # via llama-index-legacy + # via matplotlib + # via megaparse + # via onnx + # via onnxruntime + # via opencv-python + # via pandas + # via pycocotools + # via scipy + # via torchvision + # via transformers + # via unstructured +nvidia-cublas-cu12==12.4.5.8 + # via nvidia-cudnn-cu12 + # via nvidia-cusolver-cu12 + # via torch +nvidia-cuda-cupti-cu12==12.4.127 + # via torch +nvidia-cuda-nvrtc-cu12==12.4.127 + # via torch +nvidia-cuda-runtime-cu12==12.4.127 + # via torch +nvidia-cudnn-cu12==9.1.0.70 + # via torch +nvidia-cufft-cu12==11.2.1.3 + # via torch +nvidia-curand-cu12==10.3.5.147 + # via torch +nvidia-cusolver-cu12==11.6.1.9 + # via torch +nvidia-cusparse-cu12==12.3.1.170 + # via nvidia-cusolver-cu12 + # via torch +nvidia-nccl-cu12==2.21.5 + # via torch +nvidia-nvjitlink-cu12==12.4.127 + # via nvidia-cusolver-cu12 + # via nvidia-cusparse-cu12 + # via torch +nvidia-nvtx-cu12==12.4.127 + # via torch +olefile==0.47 + # via python-oxmsg +omegaconf==2.3.0 + # via effdet +onnx==1.17.0 + # via unstructured + # via unstructured-inference +onnxruntime==1.20.0 + # via unstructured-inference +openai==1.54.5 + # via langchain-openai + # via llama-index-agent-openai + # via llama-index-embeddings-openai + # via llama-index-legacy + # via llama-index-llms-openai + # via quivr-whisper +opencv-python==4.10.0.84 + # via layoutparser + # via unstructured-inference +openpyxl==3.1.5 + # via unstructured +orjson==3.10.11 + # via langgraph-sdk + # via langsmith +packaging==24.2 + # via faiss-cpu + # via huggingface-hub + # via langchain-core + # via marshmallow + # via matplotlib + # via onnxruntime + # via pikepdf + # via pytesseract + # via transformers + # via unstructured-pytesseract +pandas==2.2.3 + # via langchain-cohere + # via layoutparser + # via llama-index-legacy + # via llama-index-readers-file + # via unstructured +parameterized==0.9.0 + # via cohere +pdf2image==1.17.0 + # via layoutparser + # via unstructured +pdfminer-six==20231228 + # via pdfplumber + # via unstructured +pdfplumber==0.11.4 + # via layoutparser + # via megaparse +pikepdf==9.4.2 + # via unstructured +pillow==11.0.0 + # via layoutparser + # via llama-index-core + # via matplotlib + # via pdf2image + # via pdfplumber + # via pikepdf + # via pillow-heif + # via pytesseract + # via python-pptx + # via torchvision + # via unstructured-pytesseract +pillow-heif==0.20.0 + # via unstructured +playwright==1.48.0 + # via megaparse +portalocker==3.0.0 + # via iopath +propcache==0.2.0 + # via aiohttp + # via yarl +proto-plus==1.25.0 + # via google-api-core + # via google-cloud-vision +protobuf==5.28.3 + # via google-api-core + # via google-cloud-vision + # via googleapis-common-protos + # via grpcio-status + # via onnx + # via onnxruntime + # via proto-plus + # via transformers +psutil==6.1.0 + # via megaparse + # via unstructured +pyasn1==0.6.1 + # via pyasn1-modules + # via rsa +pyasn1-modules==0.4.1 + # via google-auth +pycocotools==2.0.8 + # via effdet +pycparser==2.22 + # via cffi +pycryptodome==3.21.0 + # via megaparse +pydantic==2.9.2 + # via anthropic + # via cohere + # via fastapi + # via langchain + # via langchain-core + # via langsmith + # via llama-cloud + # via llama-index-core + # via openai + # via pydantic-settings + # via quivr-core + # via unstructured-client +pydantic-core==2.23.4 + # via cohere + # via pydantic +pydantic-settings==2.6.1 + # via megaparse +pyee==12.0.0 + # via playwright +pygments==2.18.0 + # via rich +pypandoc==1.14 + # via unstructured +pyparsing==3.2.0 + # via matplotlib +pypdf==5.1.0 + # via llama-index-readers-file + # via megaparse + # via unstructured + # via unstructured-client +pypdfium2==4.30.0 + # via pdfplumber +pytesseract==0.3.13 + # via unstructured +python-dateutil==2.8.2 + # via matplotlib + # via pandas + # via unstructured-client +python-docx==1.1.2 + # via unstructured +python-dotenv==1.0.1 + # via megaparse + # via pydantic-settings +python-iso639==2024.10.22 + # via unstructured +python-magic==0.4.27 + # via megaparse + # via unstructured +python-multipart==0.0.17 + # via unstructured-inference +python-oxmsg==0.0.1 + # via unstructured +python-pptx==0.6.23 + # via unstructured +pytz==2024.2 + # via pandas +pyyaml==6.0.2 + # via huggingface-hub + # via langchain + # via langchain-community + # via langchain-core + # via layoutparser + # via llama-index-core + # via omegaconf + # via timm + # via transformers +quivr-core==0.0.24 + # via quivr-whisper +rapidfuzz==3.10.1 + # via quivr-core + # via unstructured + # via unstructured-inference +ratelimit==2.2.1 + # via megaparse +regex==2024.11.6 + # via nltk + # via tiktoken + # via transformers +requests==2.32.3 + # via cohere + # via google-api-core + # via huggingface-hub + # via langchain + # via langchain-community + # via langsmith + # via llama-index-core + # via llama-index-legacy + # via megaparse + # via requests-toolbelt + # via tiktoken + # via transformers + # via unstructured +requests-toolbelt==1.0.0 + # via langsmith + # via unstructured-client +rich==13.9.4 + # via quivr-core +rsa==4.9 + # via google-auth +safetensors==0.4.5 + # via timm + # via transformers +scipy==1.14.1 + # via layoutparser +sentencepiece==0.2.0 + # via transformers +six==1.16.0 + # via langdetect + # via python-dateutil +sniffio==1.3.1 + # via anthropic + # via anyio + # via httpx + # via openai +soupsieve==2.6 + # via beautifulsoup4 +sqlalchemy==2.0.36 + # via langchain + # via langchain-community + # via llama-index-core + # via llama-index-legacy +starlette==0.41.3 + # via fastapi +striprtf==0.0.26 + # via llama-index-readers-file +sympy==1.13.1 + # via onnxruntime + # via torch +tabulate==0.9.0 + # via langchain-cohere + # via unstructured +tenacity==8.5.0 + # via langchain + # via langchain-community + # via langchain-core + # via llama-index-core + # via llama-index-legacy +tiktoken==0.8.0 + # via langchain-openai + # via llama-index-core + # via llama-index-legacy + # via quivr-core +timm==1.0.11 + # via effdet + # via unstructured-inference +tokenizers==0.20.3 + # via cohere + # via transformers +torch==2.5.1 + # via effdet + # via timm + # via torchvision + # via unstructured-inference +torchvision==0.20.1 + # via effdet + # via timm +tqdm==4.67.0 + # via huggingface-hub + # via iopath + # via llama-index-core + # via nltk + # via openai + # via transformers + # via unstructured +transformers==4.46.3 + # via quivr-core + # via unstructured-inference +triton==3.1.0 + # via torch +types-pyyaml==6.0.12.20240917 + # via quivr-core +types-requests==2.32.0.20241016 + # via cohere +typing-extensions==4.12.2 + # via anthropic + # via cohere + # via fastapi + # via huggingface-hub + # via iopath + # via langchain-core + # via llama-index-core + # via llama-index-legacy + # via openai + # via pydantic + # via pydantic-core + # via pyee + # via python-docx + # via python-oxmsg + # via sqlalchemy + # via torch + # via typing-inspect + # via unstructured +typing-inspect==0.9.0 + # via dataclasses-json + # via llama-index-core + # via llama-index-legacy + # via unstructured-client +tzdata==2024.2 + # via pandas +unstructured==0.15.0 + # via megaparse +unstructured-client==0.27.0 + # via unstructured +unstructured-inference==0.7.36 + # via unstructured +unstructured-pytesseract==0.3.13 + # via unstructured +urllib3==2.2.3 + # via requests + # via types-requests +uvicorn==0.32.0 + # via megaparse +uvloop==0.21.0 + # via megaparse +werkzeug==3.1.3 + # via flask +wrapt==1.16.0 + # via deprecated + # via llama-index-core + # via unstructured +xlrd==2.0.1 + # via unstructured +xlsxwriter==3.2.0 + # via python-pptx +yarl==1.17.2 + # via aiohttp diff --git a/examples/quivr-whisper/requirements.lock b/examples/quivr-whisper/requirements.lock new file mode 100644 index 000000000000..8e93ec1b5ff3 --- /dev/null +++ b/examples/quivr-whisper/requirements.lock @@ -0,0 +1,713 @@ +# generated by rye +# use `rye lock` or `rye sync` to update this lockfile +# +# last locked with the following flags: +# pre: false +# features: [] +# all-features: false +# with-sources: false +# generate-hashes: false +# universal: false + +-e file:. +aiofiles==24.1.0 + # via quivr-core +aiohappyeyeballs==2.4.3 + # via aiohttp +aiohttp==3.11.6 + # via langchain + # via langchain-community + # via llama-index-core + # via llama-index-legacy +aiosignal==1.3.1 + # via aiohttp +annotated-types==0.7.0 + # via pydantic +anthropic==0.39.0 + # via langchain-anthropic +antlr4-python3-runtime==4.9.3 + # via omegaconf +anyio==4.6.2.post1 + # via anthropic + # via httpx + # via openai + # via starlette +attrs==24.2.0 + # via aiohttp +backoff==2.2.1 + # via megaparse + # via unstructured +beautifulsoup4==4.12.3 + # via llama-index-readers-file + # via unstructured +blinker==1.9.0 + # via flask +cachetools==5.5.0 + # via google-auth +certifi==2024.8.30 + # via httpcore + # via httpx + # via requests +cffi==1.17.1 + # via cryptography +chardet==5.2.0 + # via unstructured +charset-normalizer==3.4.0 + # via pdfminer-six + # via requests +click==8.1.7 + # via flask + # via llama-parse + # via nltk + # via python-oxmsg + # via uvicorn +cohere==5.11.4 + # via langchain-cohere +coloredlogs==15.0.1 + # via onnxruntime +contourpy==1.3.1 + # via matplotlib +cryptography==43.0.3 + # via pdfminer-six + # via unstructured-client +cycler==0.12.1 + # via matplotlib +dataclasses-json==0.6.7 + # via langchain-community + # via llama-index-core + # via llama-index-legacy + # via unstructured +defusedxml==0.7.1 + # via langchain-anthropic +deprecated==1.2.15 + # via llama-index-core + # via llama-index-legacy + # via pikepdf +dirtyjson==1.0.8 + # via llama-index-core + # via llama-index-legacy +distro==1.9.0 + # via anthropic + # via openai +effdet==0.4.1 + # via unstructured +emoji==2.14.0 + # via unstructured +et-xmlfile==2.0.0 + # via openpyxl +eval-type-backport==0.2.0 + # via unstructured-client +faiss-cpu==1.9.0.post1 + # via quivr-core +fastapi==0.115.5 + # via megaparse +fastavro==1.9.7 + # via cohere +filelock==3.16.1 + # via huggingface-hub + # via torch + # via transformers + # via triton +filetype==1.2.0 + # via llama-index-core + # via unstructured +flask==3.1.0 + # via quivr-whisper +flatbuffers==24.3.25 + # via onnxruntime +fonttools==4.55.0 + # via matplotlib +frozenlist==1.5.0 + # via aiohttp + # via aiosignal +fsspec==2024.10.0 + # via huggingface-hub + # via llama-index-core + # via llama-index-legacy + # via torch +google-api-core==2.23.0 + # via google-cloud-vision +google-auth==2.36.0 + # via google-api-core + # via google-cloud-vision +google-cloud-vision==3.8.1 + # via unstructured +googleapis-common-protos==1.66.0 + # via google-api-core + # via grpcio-status +greenlet==3.1.1 + # via playwright + # via sqlalchemy +grpcio==1.68.0 + # via google-api-core + # via grpcio-status +grpcio-status==1.68.0 + # via google-api-core +h11==0.14.0 + # via httpcore + # via uvicorn +httpcore==1.0.7 + # via httpx +httpx==0.27.2 + # via anthropic + # via cohere + # via langgraph-sdk + # via langsmith + # via llama-cloud + # via llama-index-core + # via llama-index-legacy + # via openai + # via quivr-core + # via unstructured-client +httpx-sse==0.4.0 + # via cohere + # via langgraph-sdk +huggingface-hub==0.26.2 + # via timm + # via tokenizers + # via transformers + # via unstructured-inference +humanfriendly==10.0 + # via coloredlogs +idna==3.10 + # via anyio + # via httpx + # via requests + # via yarl +iopath==0.1.10 + # via layoutparser +itsdangerous==2.2.0 + # via flask +jinja2==3.1.4 + # via flask + # via torch +jiter==0.7.1 + # via anthropic + # via openai +joblib==1.4.2 + # via nltk +jsonpatch==1.33 + # via langchain-core +jsonpath-python==1.0.6 + # via unstructured-client +jsonpointer==3.0.0 + # via jsonpatch +kiwisolver==1.4.7 + # via matplotlib +langchain==0.2.17 + # via langchain-community + # via megaparse + # via quivr-core +langchain-anthropic==0.1.23 + # via megaparse + # via quivr-core +langchain-cohere==0.2.4 + # via quivr-core +langchain-community==0.2.19 + # via langchain-experimental + # via megaparse + # via quivr-core +langchain-core==0.2.43 + # via langchain + # via langchain-anthropic + # via langchain-cohere + # via langchain-community + # via langchain-experimental + # via langchain-openai + # via langchain-text-splitters + # via langgraph + # via langgraph-checkpoint + # via megaparse + # via quivr-core +langchain-experimental==0.0.65 + # via langchain-cohere +langchain-openai==0.1.25 + # via megaparse + # via quivr-core +langchain-text-splitters==0.2.4 + # via langchain +langdetect==1.0.9 + # via unstructured +langgraph==0.2.52 + # via quivr-core +langgraph-checkpoint==2.0.5 + # via langgraph +langgraph-sdk==0.1.36 + # via langgraph +langsmith==0.1.143 + # via langchain + # via langchain-community + # via langchain-core +layoutparser==0.3.4 + # via unstructured-inference +llama-cloud==0.1.5 + # via llama-index-indices-managed-llama-cloud +llama-index==0.12.0 + # via megaparse +llama-index-agent-openai==0.4.0 + # via llama-index + # via llama-index-program-openai +llama-index-cli==0.4.0 + # via llama-index +llama-index-core==0.12.0 + # via llama-index + # via llama-index-agent-openai + # via llama-index-cli + # via llama-index-embeddings-openai + # via llama-index-indices-managed-llama-cloud + # via llama-index-llms-openai + # via llama-index-multi-modal-llms-openai + # via llama-index-program-openai + # via llama-index-question-gen-openai + # via llama-index-readers-file + # via llama-index-readers-llama-parse + # via llama-parse +llama-index-embeddings-openai==0.3.0 + # via llama-index + # via llama-index-cli +llama-index-indices-managed-llama-cloud==0.6.2 + # via llama-index +llama-index-legacy==0.9.48.post4 + # via llama-index +llama-index-llms-openai==0.3.0 + # via llama-index + # via llama-index-agent-openai + # via llama-index-cli + # via llama-index-multi-modal-llms-openai + # via llama-index-program-openai + # via llama-index-question-gen-openai +llama-index-multi-modal-llms-openai==0.3.0 + # via llama-index +llama-index-program-openai==0.3.0 + # via llama-index + # via llama-index-question-gen-openai +llama-index-question-gen-openai==0.3.0 + # via llama-index +llama-index-readers-file==0.4.0 + # via llama-index +llama-index-readers-llama-parse==0.4.0 + # via llama-index +llama-parse==0.5.14 + # via llama-index-readers-llama-parse + # via megaparse +lxml==5.3.0 + # via pikepdf + # via python-docx + # via python-pptx + # via unstructured +markdown==3.7 + # via unstructured +markdown-it-py==3.0.0 + # via rich +markupsafe==3.0.2 + # via jinja2 + # via quivr-core + # via werkzeug +marshmallow==3.23.1 + # via dataclasses-json +matplotlib==3.9.2 + # via pycocotools + # via unstructured-inference +mdurl==0.1.2 + # via markdown-it-py +megaparse==0.0.43 + # via quivr-core +mpmath==1.3.0 + # via sympy +msgpack==1.1.0 + # via langgraph-checkpoint +multidict==6.1.0 + # via aiohttp + # via yarl +mypy-extensions==1.0.0 + # via typing-inspect +nest-asyncio==1.6.0 + # via llama-index-core + # via llama-index-legacy + # via unstructured-client +networkx==3.4.2 + # via llama-index-core + # via llama-index-legacy + # via torch + # via unstructured +nltk==3.9.1 + # via llama-index + # via llama-index-core + # via llama-index-legacy + # via unstructured +numpy==1.26.4 + # via contourpy + # via faiss-cpu + # via langchain + # via langchain-community + # via layoutparser + # via llama-index-core + # via llama-index-legacy + # via matplotlib + # via megaparse + # via onnx + # via onnxruntime + # via opencv-python + # via pandas + # via pycocotools + # via scipy + # via torchvision + # via transformers + # via unstructured +nvidia-cublas-cu12==12.4.5.8 + # via nvidia-cudnn-cu12 + # via nvidia-cusolver-cu12 + # via torch +nvidia-cuda-cupti-cu12==12.4.127 + # via torch +nvidia-cuda-nvrtc-cu12==12.4.127 + # via torch +nvidia-cuda-runtime-cu12==12.4.127 + # via torch +nvidia-cudnn-cu12==9.1.0.70 + # via torch +nvidia-cufft-cu12==11.2.1.3 + # via torch +nvidia-curand-cu12==10.3.5.147 + # via torch +nvidia-cusolver-cu12==11.6.1.9 + # via torch +nvidia-cusparse-cu12==12.3.1.170 + # via nvidia-cusolver-cu12 + # via torch +nvidia-nccl-cu12==2.21.5 + # via torch +nvidia-nvjitlink-cu12==12.4.127 + # via nvidia-cusolver-cu12 + # via nvidia-cusparse-cu12 + # via torch +nvidia-nvtx-cu12==12.4.127 + # via torch +olefile==0.47 + # via python-oxmsg +omegaconf==2.3.0 + # via effdet +onnx==1.17.0 + # via unstructured + # via unstructured-inference +onnxruntime==1.20.0 + # via unstructured-inference +openai==1.54.5 + # via langchain-openai + # via llama-index-agent-openai + # via llama-index-embeddings-openai + # via llama-index-legacy + # via llama-index-llms-openai + # via quivr-whisper +opencv-python==4.10.0.84 + # via layoutparser + # via unstructured-inference +openpyxl==3.1.5 + # via unstructured +orjson==3.10.11 + # via langgraph-sdk + # via langsmith +packaging==24.2 + # via faiss-cpu + # via huggingface-hub + # via langchain-core + # via marshmallow + # via matplotlib + # via onnxruntime + # via pikepdf + # via pytesseract + # via transformers + # via unstructured-pytesseract +pandas==2.2.3 + # via langchain-cohere + # via layoutparser + # via llama-index-legacy + # via llama-index-readers-file + # via unstructured +parameterized==0.9.0 + # via cohere +pdf2image==1.17.0 + # via layoutparser + # via unstructured +pdfminer-six==20231228 + # via pdfplumber + # via unstructured +pdfplumber==0.11.4 + # via layoutparser + # via megaparse +pikepdf==9.4.2 + # via unstructured +pillow==11.0.0 + # via layoutparser + # via llama-index-core + # via matplotlib + # via pdf2image + # via pdfplumber + # via pikepdf + # via pillow-heif + # via pytesseract + # via python-pptx + # via torchvision + # via unstructured-pytesseract +pillow-heif==0.20.0 + # via unstructured +playwright==1.48.0 + # via megaparse +portalocker==3.0.0 + # via iopath +propcache==0.2.0 + # via aiohttp + # via yarl +proto-plus==1.25.0 + # via google-api-core + # via google-cloud-vision +protobuf==5.28.3 + # via google-api-core + # via google-cloud-vision + # via googleapis-common-protos + # via grpcio-status + # via onnx + # via onnxruntime + # via proto-plus + # via transformers +psutil==6.1.0 + # via megaparse + # via unstructured +pyasn1==0.6.1 + # via pyasn1-modules + # via rsa +pyasn1-modules==0.4.1 + # via google-auth +pycocotools==2.0.8 + # via effdet +pycparser==2.22 + # via cffi +pycryptodome==3.21.0 + # via megaparse +pydantic==2.9.2 + # via anthropic + # via cohere + # via fastapi + # via langchain + # via langchain-core + # via langsmith + # via llama-cloud + # via llama-index-core + # via openai + # via pydantic-settings + # via quivr-core + # via unstructured-client +pydantic-core==2.23.4 + # via cohere + # via pydantic +pydantic-settings==2.6.1 + # via megaparse +pyee==12.0.0 + # via playwright +pygments==2.18.0 + # via rich +pypandoc==1.14 + # via unstructured +pyparsing==3.2.0 + # via matplotlib +pypdf==5.1.0 + # via llama-index-readers-file + # via megaparse + # via unstructured + # via unstructured-client +pypdfium2==4.30.0 + # via pdfplumber +pytesseract==0.3.13 + # via unstructured +python-dateutil==2.8.2 + # via matplotlib + # via pandas + # via unstructured-client +python-docx==1.1.2 + # via unstructured +python-dotenv==1.0.1 + # via megaparse + # via pydantic-settings +python-iso639==2024.10.22 + # via unstructured +python-magic==0.4.27 + # via megaparse + # via unstructured +python-multipart==0.0.17 + # via unstructured-inference +python-oxmsg==0.0.1 + # via unstructured +python-pptx==0.6.23 + # via unstructured +pytz==2024.2 + # via pandas +pyyaml==6.0.2 + # via huggingface-hub + # via langchain + # via langchain-community + # via langchain-core + # via layoutparser + # via llama-index-core + # via omegaconf + # via timm + # via transformers +quivr-core==0.0.24 + # via quivr-whisper +rapidfuzz==3.10.1 + # via quivr-core + # via unstructured + # via unstructured-inference +ratelimit==2.2.1 + # via megaparse +regex==2024.11.6 + # via nltk + # via tiktoken + # via transformers +requests==2.32.3 + # via cohere + # via google-api-core + # via huggingface-hub + # via langchain + # via langchain-community + # via langsmith + # via llama-index-core + # via llama-index-legacy + # via megaparse + # via requests-toolbelt + # via tiktoken + # via transformers + # via unstructured +requests-toolbelt==1.0.0 + # via langsmith + # via unstructured-client +rich==13.9.4 + # via quivr-core +rsa==4.9 + # via google-auth +safetensors==0.4.5 + # via timm + # via transformers +scipy==1.14.1 + # via layoutparser +sentencepiece==0.2.0 + # via transformers +six==1.16.0 + # via langdetect + # via python-dateutil +sniffio==1.3.1 + # via anthropic + # via anyio + # via httpx + # via openai +soupsieve==2.6 + # via beautifulsoup4 +sqlalchemy==2.0.36 + # via langchain + # via langchain-community + # via llama-index-core + # via llama-index-legacy +starlette==0.41.3 + # via fastapi +striprtf==0.0.26 + # via llama-index-readers-file +sympy==1.13.1 + # via onnxruntime + # via torch +tabulate==0.9.0 + # via langchain-cohere + # via unstructured +tenacity==8.5.0 + # via langchain + # via langchain-community + # via langchain-core + # via llama-index-core + # via llama-index-legacy +tiktoken==0.8.0 + # via langchain-openai + # via llama-index-core + # via llama-index-legacy + # via quivr-core +timm==1.0.11 + # via effdet + # via unstructured-inference +tokenizers==0.20.3 + # via cohere + # via transformers +torch==2.5.1 + # via effdet + # via timm + # via torchvision + # via unstructured-inference +torchvision==0.20.1 + # via effdet + # via timm +tqdm==4.67.0 + # via huggingface-hub + # via iopath + # via llama-index-core + # via nltk + # via openai + # via transformers + # via unstructured +transformers==4.46.3 + # via quivr-core + # via unstructured-inference +triton==3.1.0 + # via torch +types-pyyaml==6.0.12.20240917 + # via quivr-core +types-requests==2.32.0.20241016 + # via cohere +typing-extensions==4.12.2 + # via anthropic + # via cohere + # via fastapi + # via huggingface-hub + # via iopath + # via langchain-core + # via llama-index-core + # via llama-index-legacy + # via openai + # via pydantic + # via pydantic-core + # via pyee + # via python-docx + # via python-oxmsg + # via sqlalchemy + # via torch + # via typing-inspect + # via unstructured +typing-inspect==0.9.0 + # via dataclasses-json + # via llama-index-core + # via llama-index-legacy + # via unstructured-client +tzdata==2024.2 + # via pandas +unstructured==0.15.0 + # via megaparse +unstructured-client==0.27.0 + # via unstructured +unstructured-inference==0.7.36 + # via unstructured +unstructured-pytesseract==0.3.13 + # via unstructured +urllib3==2.2.3 + # via requests + # via types-requests +uvicorn==0.32.0 + # via megaparse +uvloop==0.21.0 + # via megaparse +werkzeug==3.1.3 + # via flask +wrapt==1.16.0 + # via deprecated + # via llama-index-core + # via unstructured +xlrd==2.0.1 + # via unstructured +xlsxwriter==3.2.0 + # via python-pptx +yarl==1.17.2 + # via aiohttp diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js new file mode 100644 index 000000000000..0d788544dc23 --- /dev/null +++ b/examples/quivr-whisper/static/app.js @@ -0,0 +1,209 @@ +const recordBtn = document.getElementById('record-btn'); +const audioVisualizer = document.getElementById('audio-visualizer'); +const audioPlayback = document.getElementById('audio-playback'); +const canvasCtx = audioVisualizer.getContext('2d'); + +let isRecording = false; +let mediaRecorder; +let audioChunks = []; +let audioContext; +let analyser; +let dataArray; +let bufferLength; +let lastAudioLevel = 0; +let silenceTimer; + +recordBtn.addEventListener('click', toggleRecording); + +function toggleRecording() { + if (!isRecording) { + recordBtn.classList.add('hidden'); + audioVisualizer.classList.remove('hidden'); + startRecording(); + } else { + audioVisualizer.classList.add('hidden'); + stopRecording(); + } +} + +function drawWaveform() { + if (!analyser) return; + + requestAnimationFrame(drawWaveform); + + analyser.getByteTimeDomainData(dataArray); + + canvasCtx.fillStyle = 'rgb(255, 255, 255)'; + canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height); + + canvasCtx.lineWidth = 2; + canvasCtx.strokeStyle = 'rgb(0, 0, 0)'; + + canvasCtx.beginPath(); + + let sliceWidth = audioVisualizer.width * 1.0 / bufferLength; + let x = 0; + + let sum = 0; + + for (let i = 0; i < bufferLength; i++) { + let v = dataArray[i] / 128.0; + let y = v * audioVisualizer.height / 2; + + sum += v; + + if (i === 0) { + canvasCtx.moveTo(x, y); + } else { + canvasCtx.lineTo(x, y); + } + + x += sliceWidth; + } + + canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2); + canvasCtx.stroke(); + + let currentAudioLevel = sum / bufferLength; + + if (isRecording && Math.abs(currentAudioLevel - lastAudioLevel) < 0.01) { + if (!silenceTimer) { + silenceTimer = setTimeout(stopRecording, 1000); + } + } else { + clearTimeout(silenceTimer); + silenceTimer = null; + } + + lastAudioLevel = currentAudioLevel; +} + +async function startRecording() { + audioChunks = []; + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + mediaRecorder = new MediaRecorder(stream); + mediaRecorder.ondataavailable = event => { + audioChunks.push(event.data); + }; + mediaRecorder.start(); + isRecording = true; + + audioContext = new (window.AudioContext || window.webkitAudioContext)(); + analyser = audioContext.createAnalyser(); + const source = audioContext.createMediaStreamSource(stream); + + source.connect(analyser); + analyser.fftSize = 2048; + bufferLength = analyser.frequencyBinCount; + dataArray = new Uint8Array(bufferLength); + + drawWaveform(); +} + +function stopRecording() { + mediaRecorder.stop(); + mediaRecorder.onstop = async () => { + // The mediaRecorder has stopped; now we can process the chunks + const audioBlob = new Blob(audioChunks, { type: 'audio/wav' }); + const formData = new FormData(); + formData.append('audio_data', audioBlob); + + // Now we're sending the audio to the server and waiting for a response + try { + const response = await fetch('/transcribe', { + method: 'POST', + body: formData + }); + const data = await response.json(); + + // Once we have the response, we can source the playback element and play it + audioPlayback.src = 'data:audio/wav;base64,' + data.audio_base64; + audioPlayback.classList.remove('hidden'); + audioVisualizer.classList.add('hidden'); // hide the visualizer while playing back the response + setupAIResponseVisualization(); + audioPlayback.onloadedmetadata = () => { + // When metadata is loaded, start playback + audioPlayback.play(); + visualizeAIResponse(); + }; + + // We only reset the UI after the audio has finished playing + // audioPlayback.onended = () => { + // resetUI(); + // }; + } catch (error) { + console.error('Error during fetch/transcription:', error); + resetUI(); + } finally { + if (analyser) { + analyser.disconnect(); + analyser = null; + } + isRecording = false; + } + }; +} +function resetUI() { + document.getElementById('record-btn').classList.remove('hidden'); + document.getElementById('audio-visualizer').classList.add('hidden'); + document.getElementById('audio-playback').classList.add('hidden'); + // Reset any other UI elements as necessary +} + +function setupAIResponseVisualization() { + try { + // Create a new audio context for playback if it doesn't exist + if (!audioContext) { + audioContext = new (window.AudioContext || window.webkitAudioContext)(); + } + // Resume the audio context in case it's in a suspended state + audioContext.resume().then(() => { + analyser = audioContext.createAnalyser(); + const source = audioContext.createMediaElementSource(audioPlayback); + source.connect(analyser); + analyser.connect(audioContext.destination); + analyser.fftSize = 2048; + bufferLength = analyser.frequencyBinCount; + dataArray = new Uint8Array(bufferLength); + }); + } catch (error) { + console.error('Error setting up AI response visualization:', error); + } +} + +function visualizeAIResponse() { + const draw = () => { + requestAnimationFrame(draw); + + analyser.getByteTimeDomainData(dataArray); + + canvasCtx.fillStyle = 'rgb(255, 255, 255)'; + canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height); + + canvasCtx.lineWidth = 2; + canvasCtx.strokeStyle = 'rgb(0, 0, 0)'; + + canvasCtx.beginPath(); + + let sliceWidth = audioVisualizer.width * 1.0 / bufferLength; + let x = 0; + + for (let i = 0; i < bufferLength; i++) { + let v = dataArray[i] / 128.0; + let y = v * audioVisualizer.height / 2; + + if (i === 0) { + canvasCtx.moveTo(x, y); + } else { + canvasCtx.lineTo(x, y); + } + + x += sliceWidth; + } + + canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2); + canvasCtx.stroke(); + }; + + draw(); +} \ No newline at end of file diff --git a/examples/quivr-whisper/static/styles.css b/examples/quivr-whisper/static/styles.css new file mode 100644 index 000000000000..b7a0d74d47f0 --- /dev/null +++ b/examples/quivr-whisper/static/styles.css @@ -0,0 +1,36 @@ +.loader { + border: 4px solid #f3f3f3; + border-radius: 50%; + border-top: 4px solid #3498db; + width: 50px; + height: 50px; + -webkit-animation: spin 2s linear infinite; + animation: spin 2s linear infinite; + position: absolute; + /* Center the loader in the viewport */ + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + display: none; + /* Hide it by default */ +} + +@-webkit-keyframes spin { + 0% { + -webkit-transform: rotate(0deg); + } + + 100% { + -webkit-transform: rotate(360deg); + } +} + +@keyframes spin { + 0% { + transform: rotate(0deg); + } + + 100% { + transform: rotate(360deg); + } +} \ No newline at end of file diff --git a/examples/quivr-whisper/templates/index.html b/examples/quivr-whisper/templates/index.html new file mode 100644 index 000000000000..6f508034c5e8 --- /dev/null +++ b/examples/quivr-whisper/templates/index.html @@ -0,0 +1,26 @@ + + + + + + + Audio Interaction WebApp + + + + + +

Quivr.app

+
+ + + +
+ + + + \ No newline at end of file