diff --git a/.env.example b/.env.example index a1f7bf84d..949030fe9 100644 --- a/.env.example +++ b/.env.example @@ -12,9 +12,3 @@ export POSTGRES_HOST=your_host export POSTGRES_PORT=your_port export POSTGRES_DBNAME=your_db export POSTGRES_PROJECT_NAME=your_project_name - -# Environment variables for KG provider (currently only Neo4j) -# export NEO4J_USER=YOUR_NEO4J_USER -# export NEO4J_PASSWORD=YOUR_NEO4J_PASSWORD -# export NEO4J_URL=YOUR_NEO4J_URL -# export NEO4J_DATABASE=YOUR_NEO4J_DATABASE diff --git a/.github/workflows/integration-test-workflow-debian.yml b/.github/workflows/integration-test-workflow-debian.yml index 2c8a664ee..4b37be5cc 100644 --- a/.github/workflows/integration-test-workflow-debian.yml +++ b/.github/workflows/integration-test-workflow-debian.yml @@ -1,4 +1,4 @@ -name: Debian R2R Docker Build and Integration Test (Debian GNU/Linux 12 (bookworm) amd64) +name: R2R CLI Integration Test (Debian GNU/Linux 12 (bookworm) amd64) on: push: @@ -8,124 +8,141 @@ on: jobs: build-and-test: - runs-on: arm3 + runs-on: ubuntu-latest permissions: - packages: write contents: read id-token: write actions: write env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} TELEMETRY_ENABLED: false + POSTGRES_USER: ${{ secrets.POSTGRES_USER }} + POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }} + POSTGRES_DBNAME: ${{ secrets.POSTGRES_DBNAME }} + POSTGRES_HOST: ${{ secrets.POSTGRES_HOST }} + POSTGRES_PORT: ${{ secrets.POSTGRES_PORT }} + POSTGRES_PROJECT_NAME: ${{ secrets.POSTGRES_PROJECT_NAME }} steps: - uses: actions/checkout@v4 - - name: Clean up disk space - uses: jlumbroso/free-disk-space@main + - name: Set up Python + uses: actions/setup-python@v4 with: - tool-cache: true - android: true - dotnet: true - haskell: true - large-packages: true - swap-storage: true - - - name: Docker Auth - uses: docker/login-action@v3 - with: - username: ${{ secrets.RAGTORICHES_DOCKER_UNAME }} - password: ${{ secrets.RAGTORICHES_DOCKER_TOKEN }} - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 + python-version: '3.x' - - name: Set image name - id: image + - name: Install Poetry run: | - echo "IMAGE_NAME=ragtoriches/dev" >> $GITHUB_OUTPUT + curl -sSL https://install.python-poetry.org | python3 - - - name: Build and Push Docker Image - uses: docker/build-push-action@v5 - with: - context: ./py - file: ./py/Dockerfile - push: true - tags: ragtoriches/dev:latest - platforms: linux/amd64 - no-cache: true - pull: true - - - name: Run cloud LLM integration tests in Docker + - name: Install dependencies working-directory: ./py run: | - python3 -m venv venv - source venv/bin/activate - pip install -e . - - echo "R2R Version" - r2r version - - echo "R2R Serve --docker" - r2r serve --docker --exclude-neo4j=true --exclude-ollama=true --image=ragtoriches/dev:latest + poetry install -E core -E ingestion-bundle + - name: Start R2R server + working-directory: ./py + run: | + poetry run r2r serve & echo "Waiting for services to start..." sleep 30 + - name: Run integration tests + working-directory: ./py + run: | + echo "R2R Version" + poetry run r2r version + + - name: Walkthrough + working-directory: ./py + run: | echo "Ingest Data" - r2r ingest-sample-files + poetry run r2r ingest-sample-files echo "Get Documents Overview" - r2r documents-overview + poetry run r2r documents-overview echo "Get Document Chunks" - r2r document-chunks --document-id=77f67c65-6406-5076-8176-3844f3ef3688 + poetry run r2r document-chunks --document-id=9fbe403b-c11c-5aae-8ade-ef22980c3ad1 echo "Delete Documents" - r2r delete --filter="document_id:eq:f25fd516-5cac-5c09-b120-0fc841270c7e" + poetry run r2r delete --filter=document_id:eq:9fbe403b-c11c-5aae-8ade-ef22980c3ad1 + + echo "Update Document" + poetry run r2r update-files core/examples/data/aristotle_v2.txt --document-ids=9fbe403b-c11c-5aae-8ade-ef22980c3ad1 echo "Vector Search" - r2r search --query="What was Uber'\''s profit in 2020?" + poetry run r2r search --query="What was Uber's profit in 2020?" echo "Hybrid Search" - r2r search --query="What is a fierce nerd?" --use-hybrid-search + r2r search --query="What was Uber's profit in 2020?" --use-hybrid-search echo "Basic RAG" - r2r rag --query="What was Uber'\''s profit in 2020?" + poetry run r2r rag --query="What was Uber's profit in 2020?" echo "RAG with Hybrid Search" - r2r rag --query="Who is John Snow?" --use-hybrid-search + poetry run r2r rag --query="Who is John Snow?" --use-hybrid-search echo "Streaming RAG" - r2r rag --query="What was Lyft'\''s profit in 2020?" --stream + poetry run r2r rag --query="who was aristotle" --use-hybrid-search --stream + + echo "User Registration" + curl -X POST http://localhost:7272/v2/register \ + -H "Content-Type: application/json" \ + -d '{ + "email": "test@example.com", + "password": "password123" + }' + + echo "User Login" + curl -X POST http://localhost:7272/v2/login \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "username=test@example.com&password=password123" echo "Users Overview" - r2r users-overview + poetry run r2r users-overview + + echo "Logging" + poetry run r2r logs echo "Analytics" - r2r analytics --filters '{"search_latencies": "search_latency"}' --analysis-types '{"search_latencies": ["basic_statistics", "search_latency"]}' + poetry run r2r analytics --filters '{"search_latencies": "search_latency"}' --analysis-types '{"search_latencies": ["basic_statistics", "search_latency"]}' - echo "Logging" - r2r logs + - name: GraphRAG + working-directory: ./py + run: | + echo "Create Knowledge Graph" + poetry run r2r create-graph --document-ids=9fbe403b-c11c-5aae-8ade-ef22980c3ad1 - echo "Docker Down" - r2r docker-down + echo "Inspect Knowledge Graph" + poetry run r2r inspect-knowledge-graph - cleanup: - needs: build-and-test - runs-on: arm3 - if: always() - steps: - - name: Clean up Virtual Environment + echo "Graph Enrichment" + poetry run r2r enrich-graph + + echo "Local Search" + r2r search --query="Who is Aristotle?" --use-kg-search --kg-search-type=local + + echo "Global Search" + r2r search --query="What were Aristotles key contributions to philosophy?" --use-kg-search --kg-search-type=global --max-llm-queries-for-global-search=100 + + echo "RAG" + r2r rag --query="What are the key contributions of Aristotle to modern society?" --use-kg-search --kg-search-type=global --max-llm-queries-for-global-search=100 + + + + + + + - name: Advanced RAG + working-directory: ./py + run: | + echo "HyDE" + poetry run r2r rag --query="who was aristotle" --use-hybrid-search --stream --search-strategy=hyde + + echo "Rag-Fusion" + r2r rag --query="Explain the theory of relativity" --use-hybrid-search --stream --search-strategy=rag_fusion + + - name: Stop R2R server run: | - if [ -d "venv" ]; then - deactivate || true - rm -rf venv - fi - docker stop $(docker ps -a -q) || true - docker system prune -af --volumes - docker network prune --force - docker volume rm $(docker volume ls -qf dangling=true) || true + pkill -f "r2r serve" diff --git a/docs/api-reference/openapi.json b/docs/api-reference/openapi.json index a44a15e9d..99adc7967 100644 --- a/docs/api-reference/openapi.json +++ b/docs/api-reference/openapi.json @@ -1 +1 @@ - -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1_v2.txt;type=text/plain\" \\\n -F 'document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]'\n"}]}},"/v2/health":{"get":{"summary":"Health Check","operationId":"health_check_v2_health_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/v2/server_stats":{"get":{"summary":"Server Stats","operationId":"server_stats_v2_server_stats_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_ServerStats_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/update_prompt":{"post":{"summary":"Update Prompt App","operationId":"update_prompt_app_v2_update_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_prompt_app_v2_update_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/add_prompt":{"post":{"summary":"Add Prompt App","operationId":"add_prompt_app_v2_add_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_prompt_app_v2_add_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_prompt/{prompt_name}":{"get":{"summary":"Get Prompt App","operationId":"get_prompt_app_v2_get_prompt__prompt_name__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"},{"name":"inputs","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"description":"JSON-encoded prompt inputs","title":"Inputs"},"description":"JSON-encoded prompt inputs"},{"name":"prompt_override","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"description":"Prompt override","title":"Prompt Override"},"description":"Prompt override"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/get_all_prompts":{"get":{"summary":"Get All Prompts App","operationId":"get_all_prompts_app_v2_get_all_prompts_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AllPromptsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_prompt/{prompt_name}":{"delete":{"summary":"Delete Prompt App","operationId":"delete_prompt_app_v2_delete_prompt__prompt_name__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/analytics":{"get":{"summary":"Get Analytics App","operationId":"get_analytics_app_v2_analytics_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filter_criteria","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Filter Criteria"}},{"name":"analysis_types","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Analysis Types"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AnalyticsResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logs":{"get":{"summary":"Logs App","operationId":"logs_app_v2_logs_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"run_type_filter","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"default":"","title":"Run Type Filter"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_LogResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/app_settings":{"get":{"summary":"App Settings","operationId":"app_settings_v2_app_settings_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AppSettingsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/score_completion":{"post":{"summary":"Score Completion","operationId":"score_completion_v2_score_completion_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_score_completion_v2_score_completion_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_ScoreCompletionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/users_overview":{"get":{"summary":"Users Overview App","operationId":"users_overview_app_v2_users_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"default":[],"title":"User Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_UserOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete":{"delete":{"summary":"Delete App","operationId":"delete_app_v2_delete_delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filters","in":"query","required":true,"schema":{"type":"string","description":"JSON-encoded filters","title":"Filters"},"description":"JSON-encoded filters"}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/download_file/{document_id}":{"get":{"summary":"Download File App","description":"Download a file by its document ID as a stream.","operationId":"download_file_app_v2_download_file__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"}],"responses":{"200":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/documents_overview":{"get":{"summary":"Documents Overview App","operationId":"documents_overview_app_v2_documents_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_ids","in":"query","required":false,"schema":{"type":"array","items":{"type":"string"},"default":[],"title":"Document Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/document_chunks/{document_id}":{"get":{"summary":"Document Chunks App","operationId":"document_chunks_app_v2_document_chunks__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","title":"Document Id"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_DocumentChunkResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/inspect_knowledge_graph":{"get":{"summary":"Inspect Knowledge Graph","operationId":"inspect_knowledge_graph_v2_inspect_knowledge_graph_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"offset","in":"query","required":false,"schema":{"type":"integer","default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","default":100,"title":"Limit"}},{"name":"print_descriptions","in":"query","required":false,"schema":{"type":"boolean","default":false,"title":"Print Descriptions"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_str_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collections_overview":{"get":{"summary":"Collections Overview App","operationId":"collections_overview_app_v2_collections_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"title":"Collection Ids"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","maximum":1000,"minimum":1},{"type":"null"}],"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_GroupOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_collection":{"post":{"summary":"Create Collection App","operationId":"create_collection_app_v2_create_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_collection_app_v2_create_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GroupResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_collection/{collection_id}":{"get":{"summary":"Get Collection App","operationId":"get_collection_app_v2_get_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GroupResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/update_collection":{"put":{"summary":"Update Collection App","operationId":"update_collection_app_v2_update_collection_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_collection_app_v2_update_collection_put"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GroupResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_collection/{collection_id}":{"delete":{"summary":"Delete Collection App","operationId":"delete_collection_app_v2_delete_collection__collection_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/list_collections":{"get":{"summary":"List Collections App","operationId":"list_collections_app_v2_list_collections_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_GroupResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/add_user_to_collection":{"post":{"summary":"Add User To Collection App","operationId":"add_user_to_collection_app_v2_add_user_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_user_to_collection_app_v2_add_user_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AddUserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_user_from_collection":{"post":{"summary":"Remove User From Collection App","operationId":"remove_user_from_collection_app_v2_remove_user_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_users_in_collection/{collection_id}":{"get":{"summary":"Get Users In Collection App","operationId":"get_users_in_collection_app_v2_get_users_in_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user_collections/{user_id}":{"get":{"summary":"Get Collections For User App","operationId":"get_collections_for_user_app_v2_user_collections__user_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"User ID","title":"User Id"},"description":"User ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/assign_document_to_collection":{"post":{"summary":"Assign Document To Collection App","operationId":"assign_document_to_collection_app_v2_assign_document_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_document_from_collection":{"post":{"summary":"Remove Document From Collection App","operationId":"remove_document_from_collection_app_v2_remove_document_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/document_collections/{document_id}":{"get":{"summary":"Document Collections App","operationId":"document_collections_app_v2_document_collections__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_GroupResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collection/{collection_id}/documents":{"get":{"summary":"Documents In Collection App","operationId":"documents_in_collection_app_v2_collection__collection_id__documents_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/search":{"post":{"summary":"Search App","description":"Perform a search query on the vector database and knowledge graph.\n\nThis endpoint allows for complex filtering of search results using PostgreSQL-based queries.\nFilters can be applied to various fields such as document_id, and internal metadata values.\n\n\nAllowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.","operationId":"search_app_v2_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_search_app_v2_search_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_SearchResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.search(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/search\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n }'\n"}]}},"/v2/rag":{"post":{"summary":"Rag App","description":"Execute a RAG (Retrieval-Augmented Generation) query.\n\nThis endpoint combines search results with language model generation.\nIt supports the same filtering capabilities as the search endpoint,\nallowing for precise control over the retrieved context.\n\nThe generation process can be customized using the rag_generation_config parameter.","operationId":"rag_app_v2_rag_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_rag_app_v2_rag_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.rag(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True,\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/rag\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n }'\n"}]}},"/v2/agent":{"post":{"summary":"Agent App","description":"Implement an agent-based interaction for complex query processing.\n\nThis endpoint supports multi-turn conversations and can handle complex queries\nby breaking them down into sub-tasks. It uses the same filtering capabilities\nas the search and RAG endpoints for retrieving relevant information.\n\nThe agent's behavior can be customized using the rag_generation_config and\ntask_prompt_override parameters.","operationId":"agent_app_v2_agent_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_agent_app_v2_agent_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGAgentResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.agent(\n messages=[\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n include_title_if_available=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/agent\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"messages\": [\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": false # to enable this, please read the graphrag cookbook\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n \"include_title_if_available\": true\n }'\n"}]}},"/v2/register":{"post":{"summary":"Register App","description":"Register a new user with the given email and password.","operationId":"register_app_v2_register_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_register_app_v2_register_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/verify_email":{"post":{"summary":"Verify Email App","description":"Verify a user's email address.\n\nThis endpoint is used to confirm a user's email address using the verification code\nsent to their email after registration.","operationId":"verify_email_app_v2_verify_email_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_verify_email_app_v2_verify_email_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/login":{"post":{"summary":"Login App","description":"Authenticate a user and provide access tokens.\n\nThis endpoint authenticates a user using their email (username) and password,\nand returns access and refresh tokens upon successful authentication.","operationId":"login_app_v2_login_post","requestBody":{"content":{"application/x-www-form-urlencoded":{"schema":{"$ref":"#/components/schemas/Body_login_app_v2_login_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logout":{"post":{"summary":"Logout App","description":"Log out the current user.\n\nThis endpoint invalidates the user's current access token, effectively logging them out.","operationId":"logout_app_v2_logout_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}}},"security":[{"OAuth2PasswordBearer":[]},{"HTTPBearer":[]}]}},"/v2/user":{"get":{"summary":"Get User App","description":"Get the current user's profile information.\n\nThis endpoint returns the profile information of the currently authenticated user.","operationId":"get_user_app_v2_user_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}}},"security":[{"HTTPBearer":[]}]},"put":{"summary":"Put User App","description":"Update the current user's profile information.\n\nThis endpoint allows the authenticated user to update their profile information.","operationId":"put_user_app_v2_user_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_put_user_app_v2_user_put"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/refresh_access_token":{"post":{"summary":"Refresh Access Token App","description":"Refresh the access token using a refresh token.\n\nThis endpoint allows users to obtain a new access token using their refresh token.","operationId":"refresh_access_token_app_v2_refresh_access_token_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","title":"Refresh Token","description":"Refresh token"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/change_password":{"post":{"summary":"Change Password App","description":"Change the authenticated user's password.\n\nThis endpoint allows users to change their password by providing their current password\nand a new password.","operationId":"change_password_app_v2_change_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_change_password_app_v2_change_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/request_password_reset":{"post":{"summary":"Request Password Reset App","description":"Request a password reset for a user.\n\nThis endpoint initiates the password reset process by sending a reset link\nto the specified email address.","operationId":"request_password_reset_app_v2_request_password_reset_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","format":"email","title":"Email","description":"User's email address"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/reset_password":{"post":{"summary":"Reset Password App","operationId":"reset_password_app_v2_reset_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_reset_password_app_v2_reset_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user/{user_id}":{"delete":{"summary":"Delete User App","description":"Delete a user account.\n\nThis endpoint allows users to delete their own account or, for superusers,\nto delete any user account.","operationId":"delete_user_app_v2_user__user_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"ID of the user to delete","title":"User Id"},"description":"ID of the user to delete"}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_user_app_v2_user__user_id__delete"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_graph":{"post":{"summary":"Create Graph","description":"Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings. If document IDs are not provided, the graph will be created on all documents in the system.\n\nThis step extracts the relevant entities and relationships from the documents and creates a graph based on the extracted information. You can view the graph through the neo4j browser.\n\nIn order to do GraphRAG, you will need to run the enrich_graph endpoint.","operationId":"create_graph_v2_create_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_graph_v2_create_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGCreationResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/enrich_graph":{"post":{"summary":"Enrich Graph","description":"This endpoint enriches the graph with additional information. It creates communities of nodes based on their similarity and adds embeddings to the graph. This step is necessary for GraphRAG to work.","operationId":"enrich_graph_v2_enrich_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_enrich_graph_v2_enrich_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGEnrichmentResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/openapi_spec":{"get":{"summary":"Openapi Spec","operationId":"openapi_spec_v2_openapi_spec_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AddUserResponse":{"properties":{"result":{"type":"boolean","title":"Result"}},"type":"object","required":["result"],"title":"AddUserResponse"},"AllPromptsResponse":{"properties":{"prompts":{"additionalProperties":{"$ref":"#/components/schemas/PromptResponse"},"type":"object","title":"Prompts"}},"type":"object","required":["prompts"],"title":"AllPromptsResponse"},"AnalyticsResponse":{"properties":{"analytics_data":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Analytics Data"},"filtered_logs":{"type":"object","title":"Filtered Logs"}},"type":"object","required":["filtered_logs"],"title":"AnalyticsResponse"},"AppSettingsResponse":{"properties":{"config":{"type":"object","title":"Config"},"prompts":{"type":"object","title":"Prompts"}},"type":"object","required":["config","prompts"],"title":"AppSettingsResponse"},"Body_add_prompt_app_v2_add_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"type":"string","title":"Template","description":"Prompt template"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name","template"],"title":"Body_add_prompt_app_v2_add_prompt_post"},"Body_add_user_to_collection_app_v2_add_user_to_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_add_user_to_collection_app_v2_add_user_to_collection_post"},"Body_agent_app_v2_agent_post":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message-Input"},"type":"array","title":"Messages","description":"List of message objects"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":true}},"type":"object","required":["messages"],"title":"Body_agent_app_v2_agent_post"},"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"},"Body_change_password_app_v2_change_password_post":{"properties":{"current_password":{"type":"string","title":"Current Password","description":"Current password"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["current_password","new_password"],"title":"Body_change_password_app_v2_change_password_post"},"Body_create_collection_app_v2_create_collection_post":{"properties":{"name":{"type":"string","title":"Name","description":"Collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Collection description","default":""}},"type":"object","required":["name"],"title":"Body_create_collection_app_v2_create_collection_post"},"Body_create_graph_v2_create_graph_post":{"properties":{"document_ids":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Document Ids","description":"List of document IDs to create the graph on."},"kg_creation_settings":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"$ref":"#/components/schemas/KGCreationSettings"}},{"type":"null"}],"title":"Kg Creation Settings","description":"Settings for the graph creation process."}},"type":"object","title":"Body_create_graph_v2_create_graph_post"},"Body_delete_user_app_v2_user__user_id__delete":{"properties":{"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password","description":"User's current password"},"delete_vector_data":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Delete Vector Data","description":"Whether to delete the user's vector data","default":false}},"type":"object","title":"Body_delete_user_app_v2_user__user_id__delete"},"Body_enrich_graph_v2_enrich_graph_post":{"properties":{"skip_clustering":{"type":"boolean","title":"Skip Clustering","description":"Whether to skip leiden clustering on the graph or not.","default":false},"force_enrichment":{"type":"boolean","title":"Force Enrichment","description":"Force Enrichment step even if graph creation is still in progress for some documents.","default":false},"kg_enrichment_settings":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"$ref":"#/components/schemas/KGEnrichmentSettings"}},{"type":"null"}],"title":"Kg Enrichment Settings","description":"Settings for the graph enrichment process."}},"type":"object","title":"Body_enrich_graph_v2_enrich_graph_post"},"Body_ingest_files_app_v2_ingest_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to ingest into the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_user_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"chunking_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"$ref":"#/components/schemas/ChunkingConfig"}},{"type":"null"}],"title":"Chunking Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_ingest_files_app_v2_ingest_files_post"},"Body_login_app_v2_login_post":{"properties":{"grant_type":{"anyOf":[{"type":"string","pattern":"password"},{"type":"null"}],"title":"Grant Type"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"scope":{"type":"string","title":"Scope","default":""},"client_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Id"},"client_secret":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Secret"}},"type":"object","required":["username","password"],"title":"Body_login_app_v2_login_post"},"Body_put_user_app_v2_user_put":{"properties":{"email":{"anyOf":[{"type":"string","format":"email"},{"type":"null"}],"title":"Email","description":"Updated email address"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated user name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio","description":"Updated user bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture","description":"Updated profile picture URL"}},"type":"object","title":"Body_put_user_app_v2_user_put"},"Body_rag_app_v2_rag_post":{"properties":{"query":{"type":"string","title":"Query","description":"RAG query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":false}},"type":"object","required":["query"],"title":"Body_rag_app_v2_rag_post"},"Body_register_app_v2_register_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"password":{"type":"string","title":"Password","description":"User's password"}},"type":"object","required":["email","password"],"title":"Body_register_app_v2_register_post"},"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"},"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"},"Body_reset_password_app_v2_reset_password_post":{"properties":{"reset_token":{"type":"string","title":"Reset Token","description":"Password reset token"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["reset_token","new_password"],"title":"Body_reset_password_app_v2_reset_password_post"},"Body_retry_ingest_files_v2_retry_ingest_files_post":{"properties":{"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_user_document_id` method."}},"type":"object","required":["document_ids"],"title":"Body_retry_ingest_files_v2_retry_ingest_files_post"},"Body_score_completion_v2_score_completion_post":{"properties":{"message_id":{"type":"string","title":"Message Id","description":"Message ID"},"score":{"type":"number","title":"Score","description":"Completion score"}},"type":"object","required":["message_id","score"],"title":"Body_score_completion_v2_score_completion_post"},"Body_search_app_v2_search_post":{"properties":{"query":{"type":"string","title":"Query","description":"Search query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"}},"type":"object","required":["query"],"title":"Body_search_app_v2_search_post"},"Body_update_collection_app_v2_update_collection_put":{"properties":{"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Updated collection description"}},"type":"object","required":["collection_id"],"title":"Body_update_collection_app_v2_update_collection_put"},"Body_update_files_app_v2_update_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to update in the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_user_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"chunking_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"$ref":"#/components/schemas/ChunkingConfig"}},{"type":"null"}],"title":"Chunking Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_update_files_app_v2_update_files_post"},"Body_update_prompt_app_v2_update_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Template","description":"Prompt template"},"input_types":{"anyOf":[{"additionalProperties":{"type":"string"},"type":"object"},{"type":"null"}],"title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name"],"title":"Body_update_prompt_app_v2_update_prompt_post"},"Body_verify_email_app_v2_verify_email_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"verification_code":{"type":"string","title":"Verification Code","description":"Email verification code"}},"type":"object","required":["email","verification_code"],"title":"Body_verify_email_app_v2_verify_email_post"},"ChunkingConfig":{"properties":{"provider":{"type":"string"}},"type":"object","title":"ChunkingConfig","example":{"chunking_strategy":"by_title","combine_under_n_chars":128,"max_characters":1024,"new_after_n_chars":512,"overlap":20,"provider":"unstructured_local","strategy":"auto"}},"DocumentChunkResponse":{"properties":{"fragment_id":{"type":"string","format":"uuid","title":"Fragment Id"},"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["fragment_id","extraction_id","document_id","user_id","collection_ids","text","metadata"],"title":"DocumentChunkResponse"},"DocumentOverviewResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"title":{"type":"string","title":"Title"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"type":{"type":"string","title":"Type"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"ingestion_status":{"type":"string","title":"Ingestion Status"},"restructuring_status":{"type":"string","title":"Restructuring Status"},"version":{"type":"string","title":"Version"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["id","title","user_id","type","created_at","updated_at","ingestion_status","restructuring_status","version","collection_ids","metadata"],"title":"DocumentOverviewResponse"},"GenerationConfig":{"properties":{"model":{"type":"string","title":"Model"},"temperature":{"type":"number","title":"Temperature"},"top_p":{"type":"number","title":"Top P"},"max_tokens_to_sample":{"type":"integer","title":"Max Tokens To Sample"},"stream":{"type":"boolean","title":"Stream"},"functions":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Functions"},"tools":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tools"},"add_generation_kwargs":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Add Generation Kwargs"},"api_base":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api Base"}},"type":"object","title":"GenerationConfig","max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"GenericMessageResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"GenericMessageResponse"},"GroupOverviewResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"user_count":{"type":"integer","title":"User Count"},"document_count":{"type":"integer","title":"Document Count"}},"type":"object","required":["collection_id","name","description","created_at","updated_at","user_count","document_count"],"title":"GroupOverviewResponse"},"GroupResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"}},"type":"object","required":["collection_id","name","description","created_at","updated_at"],"title":"GroupResponse"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"HybridSearchSettings":{"properties":{"full_text_weight":{"type":"number","title":"Full Text Weight","description":"Weight to apply to full text search","default":1.0},"semantic_weight":{"type":"number","title":"Semantic Weight","description":"Weight to apply to semantic search","default":5.0},"full_text_limit":{"type":"integer","title":"Full Text Limit","description":"Maximum number of results to return from full text search","default":200},"rrf_k":{"type":"integer","title":"Rrf K","description":"K-value for RRF (Rank Reciprocal Fusion)","default":50}},"type":"object","title":"HybridSearchSettings"},"IndexMeasure":{"type":"string","enum":["cosine_distance","l2_distance","max_inner_product"],"title":"IndexMeasure","description":"An enum representing the types of distance measures available for indexing.\n\nAttributes:\n cosine_distance (str): The cosine distance measure for indexing.\n l2_distance (str): The Euclidean (L2) distance measure for indexing.\n max_inner_product (str): The maximum inner product measure for indexing."},"IngestionResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the ingestion request."},"document_id":{"type":"string","format":"uuid","title":"Document Id","description":"The ID of the document that was ingested."}},"type":"object","required":["message","task_id","document_id"],"title":"IngestionResponse","example":{"document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","message":"Ingestion task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGCreationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the restructure request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the restructure request."}},"type":"object","required":["message","task_id"],"title":"KGCreationResponse","example":{"message":"Graph creation queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGCreationSettings":{"properties":{"entity_types":{"items":{"type":"string"},"type":"array","title":"Entity Types","description":"The types of entities to extract.","default":[]},"relation_types":{"items":{"type":"string"},"type":"array","title":"Relation Types","description":"The types of relations to extract.","default":[]},"fragment_merge_count":{"type":"integer","title":"Fragment Merge Count","description":"The number of fragments to merge into a single KG extraction.","default":4},"max_knowledge_triples":{"type":"integer","title":"Max Knowledge Triples","description":"The maximum number of knowledge triples to extract from each chunk.","default":100},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"Configuration for text generation during graph enrichment."}},"type":"object","title":"KGCreationSettings","description":"Settings for knowledge graph creation."},"KGEnrichmentResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the restructure request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the restructure request."}},"type":"object","required":["message","task_id"],"title":"KGEnrichmentResponse","example":{"message":"Graph enrichment queued successfuly.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGEnrichmentSettings":{"properties":{"max_description_input_length":{"type":"integer","title":"Max Description Input Length","description":"The maximum length of the description for a node in the graph.","default":65536},"max_summary_input_length":{"type":"integer","title":"Max Summary Input Length","description":"The maximum length of the summary for a community.","default":65536},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"Configuration for text generation during graph enrichment."},"leiden_params":{"type":"object","title":"Leiden Params","description":"Parameters for the Leiden algorithm."}},"type":"object","title":"KGEnrichmentSettings","description":"Settings for knowledge graph enrichment."},"KGGlobalSearchResult":{"properties":{"query":{"type":"string","title":"Query"},"search_result":{"items":{"type":"string"},"type":"array","title":"Search Result"}},"type":"object","required":["query","search_result"],"title":"KGGlobalSearchResult","description":"Result of a global knowledge graph search operation.","query":"What were Aristotles key contributions to philosophy?","search_result":["### Aristotle's Key Contributions to Philosophy\n\nAristotle's extensive body of work laid the foundation for numerous fields within philosophy and beyond, significantly shaping the trajectory of Western thought. His systematic approach to data collection and analysis has had a lasting impact on modern scientific methods. Below, we explore some of his most influential contributions.\n\n#### Foundational Works and Systematic Approach\n\nAristotle's writings cover a broad spectrum of topics, including logic, biology, ethics, and political science. His key works such as 'Physics,' 'On the Soul,' and 'Nicomachean Ethics' delve into fundamental concepts like substance, memory, and the nature of the city [Data: Reports (1, 2, 3, 4, 5, +more)]. These texts not only provided a comprehensive framework for understanding various aspects of the natural and human world but also established methodologies that continue to influence contemporary scientific inquiry.\n\n#### Ethical and Political Philosophy\n\nIn 'Nicomachean Ethics,' Aristotle explores the concept of a virtuous character, emphasizing the importance of moral virtues and the development of good habits. His work 'Politics' further examines the structure and function of the city (polis), addressing issues related to property, trade, and governance. Aristotle's classification of political constitutions and his definition of the city as the natural political community have had a profound and enduring impact on political thought [Data: Reports (11, 12); Triples (21, 22, 23, 24, 25)].\n\n#### Theories on Memory and Perception\n\nAristotle's theories on memory and perception are articulated in his works 'On the Soul' and 'De Anima iii 3.' He defines memory as the retention of experiences shaped by sensation and discusses the faculty of imagination (phantasia). These theories have significantly influenced subsequent philosophical and psychological studies on cognition and perception [Data: Reports (13, 14); Triples (26, 27, 28, 29, 30)].\n\n#### Epistemology and Scientific Method\n\nAristotle's epistemology, known as immanent realism, is based on the study of things that exist or happen in the world. This approach emphasizes empirical observation and has been instrumental in shaping the development of scientific methods. His insistence on grounding knowledge in observable phenomena laid the groundwork for future empirical research [Data: Reports (3)].\n\n#### Engagement with Predecessors and Contemporaries\n\nAristotle was also known for his critical engagement with the ideas of his predecessors and contemporaries. For instance, he refuted Democritus's claim about the Milky Way and criticized Empedocles's materialist theory of 'survival of the fittest.' These critiques highlight Aristotle's active participation in the broader philosophical discourse of his time and his contributions to refining and advancing philosophical thought [Data: Reports (15, 16); Triples (31, 32, 33, 34, 35)].\n\n### Conclusion\n\nAristotle's contributions to philosophy are vast and multifaceted, encompassing ethics, politics, epistemology, and more. His works continue to be studied and revered for their depth, rigor, and enduring relevance. Through his systematic approach and critical engagement with existing ideas, Aristotle has left an indelible mark on the landscape of Western philosophy."]},"KGLocalSearchResult":{"properties":{"query":{"type":"string","title":"Query"},"entities":{"type":"object","title":"Entities"},"relationships":{"type":"object","title":"Relationships"},"communities":{"type":"object","title":"Communities"}},"type":"object","required":["query","entities","relationships","communities"],"title":"KGLocalSearchResult","description":"Result of a local knowledge graph search operation.","communities":{"0":{"summary":{"findings":[{"explanation":"Aristotle is credited with the earliest study of formal logic, and his conception of it was the dominant form of Western logic until the 19th-century advances in mathematical logic. His works compiled into a set of six books ...","summary":"Aristotle's Foundational Role in Logic"}],"rating":9.5,"rating_explanation":"The impact severity rating is high due to Aristotle's foundational influence on multiple disciplines and his enduring legacy in Western philosophy and science.","summary":"The community revolves around Aristotle, an ancient Greek philosopher and polymath, who made significant contributions to various fields including logic, biology, political science, and economics. His works, such as 'Politics' and 'Nicomachean Ethics', have influenced numerous disciplines and thinkers from antiquity through the Middle Ages and beyond. The relationships between his various works and the fields he contributed to highlight his profound impact on Western thought.","title":"Aristotle and His Contributions"}}},"entities":{"0":{"description":"Aristotle was an ancient Greek philosopher and polymath, recognized as the father of various fields including logic, biology, and political science. He authored significant works such as the *Nicomachean Ethics* and *Politics*, where he explored concepts of virtue, governance, and the nature of reality, while also critiquing Platos ideas. His teachings and observations laid the groundwork for numerous disciplines, influencing thinkers ...","name":"Aristotle"}},"query":"Who is Aristotle?","relationships":{}},"KGSearchResult":{"properties":{"local_result":{"anyOf":[{"$ref":"#/components/schemas/KGLocalSearchResult"},{"type":"null"}]},"global_result":{"anyOf":[{"$ref":"#/components/schemas/KGGlobalSearchResult"},{"type":"null"}]}},"type":"object","title":"KGSearchResult","description":"Result of a knowledge graph search operation.","global_result":{"query":"What were Aristotles key contributions to philosophy?","search_result":["### Aristotle's Key Contributions to Philosophy\n\nAristotle's extensive body of work laid the foundation for numerous fields within philosophy and beyond, significantly shaping the trajectory of Western thought. His systematic approach to data collection and analysis has had a lasting impact on modern scientific methods. Below, we explore some of his most influential contributions.\n\n#### Foundational Works and Systematic Approach\n\nAristotle's writings cover a broad spectrum of topics, including logic, biology, ethics, and political science. His key works such as 'Physics,' 'On the Soul,' and 'Nicomachean Ethics' delve into fundamental concepts like substance, memory, and the nature of the city [Data: Reports (1, 2, 3, 4, 5, +more)]. These texts not only provided a comprehensive framework for understanding various aspects of the natural and human world but also established methodologies that continue to influence contemporary scientific inquiry.\n\n#### Ethical and Political Philosophy\n\nIn 'Nicomachean Ethics,' Aristotle explores the concept of a virtuous character, emphasizing the importance of moral virtues and the development of good habits. His work 'Politics' further examines the structure and function of the city (polis), addressing issues related to property, trade, and governance. Aristotle's classification of political constitutions and his definition of the city as the natural political community have had a profound and enduring impact on political thought [Data: Reports (11, 12); Triples (21, 22, 23, 24, 25)].\n\n#### Theories on Memory and Perception\n\nAristotle's theories on memory and perception are articulated in his works 'On the Soul' and 'De Anima iii 3.' He defines memory as the retention of experiences shaped by sensation and discusses the faculty of imagination (phantasia). These theories have significantly influenced subsequent philosophical and psychological studies on cognition and perception [Data: Reports (13, 14); Triples (26, 27, 28, 29, 30)].\n\n#### Epistemology and Scientific Method\n\nAristotle's epistemology, known as immanent realism, is based on the study of things that exist or happen in the world. This approach emphasizes empirical observation and has been instrumental in shaping the development of scientific methods. His insistence on grounding knowledge in observable phenomena laid the groundwork for future empirical research [Data: Reports (3)].\n\n#### Engagement with Predecessors and Contemporaries\n\nAristotle was also known for his critical engagement with the ideas of his predecessors and contemporaries. For instance, he refuted Democritus's claim about the Milky Way and criticized Empedocles's materialist theory of 'survival of the fittest.' These critiques highlight Aristotle's active participation in the broader philosophical discourse of his time and his contributions to refining and advancing philosophical thought [Data: Reports (15, 16); Triples (31, 32, 33, 34, 35)].\n\n### Conclusion\n\nAristotle's contributions to philosophy are vast and multifaceted, encompassing ethics, politics, epistemology, and more. His works continue to be studied and revered for their depth, rigor, and enduring relevance. Through his systematic approach and critical engagement with existing ideas, Aristotle has left an indelible mark on the landscape of Western philosophy."]},"local_result":{"communities":{"0":{"summary":{"findings":[{"explanation":"Aristotle is credited with the earliest study of formal logic, and his conception of it was the dominant form of Western logic until the 19th-century advances in mathematical logic. His works compiled into a set of six books ...","summary":"Aristotle's Foundational Role in Logic"}],"rating":9.5,"rating_explanation":"The impact severity rating is high due to Aristotle's foundational influence on multiple disciplines and his enduring legacy in Western philosophy and science.","summary":"The community revolves around Aristotle, an ancient Greek philosopher and polymath, who made significant contributions to various fields including logic, biology, political science, and economics. His works, such as 'Politics' and 'Nicomachean Ethics', have influenced numerous disciplines and thinkers from antiquity through the Middle Ages and beyond. The relationships between his various works and the fields he contributed to highlight his profound impact on Western thought.","title":"Aristotle and His Contributions"}}},"entities":{"0":{"description":"Aristotle was an ancient Greek philosopher and polymath, recognized as the father of various fields including logic, biology, and political science. He authored significant works such as the *Nicomachean Ethics* and *Politics*, where he explored concepts of virtue, governance, and the nature of reality, while also critiquing Platos ideas. His teachings and observations laid the groundwork for numerous disciplines, influencing thinkers ...","name":"Aristotle"}},"query":"Who is Aristotle?","relationships":{}}},"KGSearchSettings":{"properties":{"use_kg_search":{"type":"boolean","title":"Use Kg Search","default":false},"kg_search_type":{"type":"string","title":"Kg Search Type","default":"global"},"kg_search_level":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kg Search Level"},"generation_config":{"anyOf":[{"$ref":"#/components/schemas/GenerationConfig"},{"type":"null"}]},"max_community_description_length":{"type":"integer","title":"Max Community Description Length","default":65536},"max_llm_queries_for_global_search":{"type":"integer","title":"Max Llm Queries For Global Search","default":250},"local_search_limits":{"additionalProperties":{"type":"integer"},"type":"object","title":"Local Search Limits","default":{"__Entity__":20,"__Relationship__":20,"__Community__":20}}},"type":"object","title":"KGSearchSettings","generation_config":{"max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"kg_search_level":"0","kg_search_type":"global","local_search_limits":{"__Community__":20,"__Entity__":20,"__Relationship__":20},"max_community_description_length":65536,"max_llm_queries_for_global_search":250,"use_kg_search":true},"LogEntry":{"properties":{"key":{"type":"string","title":"Key"},"value":{"title":"Value"},"timestamp":{"type":"string","format":"date-time","title":"Timestamp"}},"type":"object","required":["key","value","timestamp"],"title":"LogEntry"},"LogResponse":{"properties":{"run_id":{"type":"string","format":"uuid","title":"Run Id"},"run_type":{"type":"string","title":"Run Type"},"entries":{"items":{"$ref":"#/components/schemas/LogEntry"},"type":"array","title":"Entries"},"timestamp":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Timestamp"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"}},"type":"object","required":["run_id","run_type","entries","timestamp","user_id"],"title":"LogResponse"},"Message-Input":{"properties":{"role":{"anyOf":[{"$ref":"#/components/schemas/MessageType"},{"type":"string"}],"title":"Role"},"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"function_call":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Function Call"},"tool_calls":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"type":"object","required":["role"],"title":"Message"},"Message-Output":{"properties":{"role":{"anyOf":[{"$ref":"#/components/schemas/MessageType"},{"type":"string"}],"title":"Role"},"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"function_call":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Function Call"},"tool_calls":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"type":"object","required":["role"],"title":"Message","content":"This is a test message.","role":"user"},"MessageType":{"type":"string","enum":["system","user","assistant","function","tool"],"title":"MessageType"},"PromptResponse":{"properties":{"name":{"type":"string","title":"Name"},"template":{"type":"string","title":"Template"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types"}},"type":"object","required":["name","template","created_at","updated_at","input_types"],"title":"PromptResponse"},"RAGAgentResponse":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message-Output"},"type":"array","title":"Messages","description":"List of messages in the RAG agent response"}},"type":"object","required":["messages"],"title":"RAGAgentResponse","example":[{"content":"## You are a helpful assistant that can search for information.\n\nWhen asked a question, perform a search to find relevant information and provide a response.\n\nThe response should contain line-item attributions to relevent search results, and be as informative if possible.\nIf no relevant results are found, then state that no results were found.\nIf no obvious question is present, then do not carry out a search, and instead ask for clarification.","role":"system"},{"content":"You are a helpful assistant.","role":"system"},{"content":"Who is the greatest philospher of all time?","role":"user"},{"content":"Aristotle is widely considered the greatest philospher of all time.","role":"assistant"},{"content":"Can you tell me more about him?","role":"user"},{"function_call":{"arguments":"{\"query\":\"Aristotle biography\"}","name":"search"},"role":"assistant"},{"content":"1. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n2. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n3. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n4. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n5. Life\nIn general, the details of Aristotle's life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B]\n","name":"search","role":"function"},{"content":"Aristotle (384–322 BC) was an Ancient Greek philosopher and polymath whose contributions have had a profound impact on various fields of knowledge. Here are some key points about his life and work:\n\n1. **Early Life**: Aristotle was born in 384 BC in Stagira, Chalcidice, which is near modern-day Thessaloniki, Greece. His father, Nicomachus, was the personal physician to King Amyntas of Macedon, which exposed Aristotle to medical and biological knowledge from a young age [C].\n\n2. **Education and Career**: After the death of his parents, Aristotle was sent to Athens to study at Plato's Academy, where he remained for about 20 years. After Plato's death, Aristotle left Athens and eventually became the tutor of Alexander the Great [C].\n\n3. **Philosophical Contributions**: Aristotle founded the Lyceum in Athens, where he established the Peripatetic school of philosophy. His works cover a wide range of subjects, including metaphysics, ethics, politics, logic, biology, and aesthetics. His writings laid the groundwork for many modern scientific and philosophical inquiries [A].\n\n4. **Legacy**: Aristotle's influence extends beyond philosophy to the natural sciences, linguistics, economics, and psychology. His method of systematic observation and analysis has been foundational to the development of modern science [A].\n\nAristotle's comprehensive approach to knowledge and his systematic methodology have earned him a lasting legacy as one of the greatest philosophers of all time.\n\nSources:\n- [A] Aristotle's broad range of writings and influence on modern science.\n- [C] Details about Aristotle's early life and education.","role":"assistant"}]},"RAGResponse":{"properties":{"completion":{"title":"Completion","description":"The generated completion from the RAG process"},"search_results":{"$ref":"#/components/schemas/SearchResponse","description":"The search results used for the RAG process"}},"type":"object","required":["completion","search_results"],"title":"RAGResponse","example":{"completion":{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Paris is the capital of France.","role":"assistant"}}],"id":"chatcmpl-example123"},"search_results":{"kg_search_results":[{"global_result":{"query":"What were Aristotles key contributions to philosophy?","search_result":["### Aristotle's Key Contributions to Philosophy\n\nAristotle's extensive body of work laid the foundation for numerous fields within philosophy and beyond, significantly shaping the trajectory of Western thought. His systematic approach to data collection and analysis has had a lasting impact on modern scientific methods. Below, we explore some of his most influential contributions.\n\n#### Foundational Works and Systematic Approach\n\nAristotle's writings cover a broad spectrum of topics, including logic, biology, ethics, and political science. His key works such as 'Physics,' 'On the Soul,' and 'Nicomachean Ethics' delve into fundamental concepts like substance, memory, and the nature of the city [Data: Reports (1, 2, 3, 4, 5, +more)]. These texts not only provided a comprehensive framework for understanding various aspects of the natural and human world but also established methodologies that continue to influence contemporary scientific inquiry.\n\n#### Ethical and Political Philosophy\n\nIn 'Nicomachean Ethics,' Aristotle explores the concept of a virtuous character, emphasizing the importance of moral virtues and the development of good habits. His work 'Politics' further examines the structure and function of the city (polis), addressing issues related to property, trade, and governance. Aristotle's classification of political constitutions and his definition of the city as the natural political community have had a profound and enduring impact on political thought [Data: Reports (11, 12); Triples (21, 22, 23, 24, 25)].\n\n#### Theories on Memory and Perception\n\nAristotle's theories on memory and perception are articulated in his works 'On the Soul' and 'De Anima iii 3.' He defines memory as the retention of experiences shaped by sensation and discusses the faculty of imagination (phantasia). These theories have significantly influenced subsequent philosophical and psychological studies on cognition and perception [Data: Reports (13, 14); Triples (26, 27, 28, 29, 30)].\n\n#### Epistemology and Scientific Method\n\nAristotle's epistemology, known as immanent realism, is based on the study of things that exist or happen in the world. This approach emphasizes empirical observation and has been instrumental in shaping the development of scientific methods. His insistence on grounding knowledge in observable phenomena laid the groundwork for future empirical research [Data: Reports (3)].\n\n#### Engagement with Predecessors and Contemporaries\n\nAristotle was also known for his critical engagement with the ideas of his predecessors and contemporaries. For instance, he refuted Democritus's claim about the Milky Way and criticized Empedocles's materialist theory of 'survival of the fittest.' These critiques highlight Aristotle's active participation in the broader philosophical discourse of his time and his contributions to refining and advancing philosophical thought [Data: Reports (15, 16); Triples (31, 32, 33, 34, 35)].\n\n### Conclusion\n\nAristotle's contributions to philosophy are vast and multifaceted, encompassing ethics, politics, epistemology, and more. His works continue to be studied and revered for their depth, rigor, and enduring relevance. Through his systematic approach and critical engagement with existing ideas, Aristotle has left an indelible mark on the landscape of Western philosophy."]},"local_result":{"communities":{"0":{"summary":{"findings":[{"explanation":"Aristotle is credited with the earliest study of formal logic, and his conception of it was the dominant form of Western logic until the 19th-century advances in mathematical logic. His works compiled into a set of six books ...","summary":"Aristotle's Foundational Role in Logic"}],"rating":9.5,"rating_explanation":"The impact severity rating is high due to Aristotle's foundational influence on multiple disciplines and his enduring legacy in Western philosophy and science.","summary":"The community revolves around Aristotle, an ancient Greek philosopher and polymath, who made significant contributions to various fields including logic, biology, political science, and economics. His works, such as 'Politics' and 'Nicomachean Ethics', have influenced numerous disciplines and thinkers from antiquity through the Middle Ages and beyond. The relationships between his various works and the fields he contributed to highlight his profound impact on Western thought.","title":"Aristotle and His Contributions"}}},"entities":{"0":{"description":"Aristotle was an ancient Greek philosopher and polymath, recognized as the father of various fields including logic, biology, and political science. He authored significant works such as the *Nicomachean Ethics* and *Politics*, where he explored concepts of virtue, governance, and the nature of reality, while also critiquing Platos ideas. His teachings and observations laid the groundwork for numerous disciplines, influencing thinkers ...","name":"Aristotle"}},"query":"Who is Aristotle?","relationships":{}}}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","fragment_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}}},"ResultsWrapper_AddUserResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AddUserResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AddUserResponse]"},"ResultsWrapper_AllPromptsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AllPromptsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AllPromptsResponse]"},"ResultsWrapper_AnalyticsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AnalyticsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AnalyticsResponse]"},"ResultsWrapper_AppSettingsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AppSettingsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AppSettingsResponse]"},"ResultsWrapper_GenericMessageResponse_":{"properties":{"results":{"$ref":"#/components/schemas/GenericMessageResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[GenericMessageResponse]"},"ResultsWrapper_GroupResponse_":{"properties":{"results":{"$ref":"#/components/schemas/GroupResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[GroupResponse]"},"ResultsWrapper_KGCreationResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGCreationResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGCreationResponse]"},"ResultsWrapper_KGEnrichmentResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGEnrichmentResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGEnrichmentResponse]"},"ResultsWrapper_List_DocumentChunkResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentChunkResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[List[DocumentChunkResponse]]"},"ResultsWrapper_List_DocumentOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentOverviewResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[List[DocumentOverviewResponse]]"},"ResultsWrapper_List_GroupOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/GroupOverviewResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[List[GroupOverviewResponse]]"},"ResultsWrapper_List_GroupResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/GroupResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[List[GroupResponse]]"},"ResultsWrapper_List_LogResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/LogResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[List[LogResponse]]"},"ResultsWrapper_List_UserOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/UserOverviewResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[List[UserOverviewResponse]]"},"ResultsWrapper_RAGAgentResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGAgentResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGAgentResponse]"},"ResultsWrapper_RAGResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGResponse]"},"ResultsWrapper_ScoreCompletionResponse_":{"properties":{"results":{"$ref":"#/components/schemas/ScoreCompletionResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[ScoreCompletionResponse]"},"ResultsWrapper_SearchResponse_":{"properties":{"results":{"$ref":"#/components/schemas/SearchResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[SearchResponse]"},"ResultsWrapper_ServerStats_":{"properties":{"results":{"$ref":"#/components/schemas/ServerStats"}},"type":"object","required":["results"],"title":"ResultsWrapper[ServerStats]"},"ResultsWrapper_TokenResponse_":{"properties":{"results":{"$ref":"#/components/schemas/TokenResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[TokenResponse]"},"ResultsWrapper_UpdatePromptResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdatePromptResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdatePromptResponse]"},"ResultsWrapper_UpdateResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdateResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdateResponse]"},"ResultsWrapper_UserResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UserResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UserResponse]"},"ResultsWrapper_list_IngestionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/IngestionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[IngestionResponse]]"},"ResultsWrapper_str_":{"properties":{"results":{"type":"string","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[str]"},"ScoreCompletionResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"ScoreCompletionResponse"},"SearchResponse":{"properties":{"vector_search_results":{"items":{"$ref":"#/components/schemas/VectorSearchResult"},"type":"array","title":"Vector Search Results","description":"List of vector search results"},"kg_search_results":{"anyOf":[{"items":{"$ref":"#/components/schemas/KGSearchResult"},"type":"array"},{"type":"null"}],"title":"Kg Search Results","description":"Knowledge graph search results, if applicable"}},"type":"object","required":["vector_search_results"],"title":"SearchResponse","example":{"kg_search_results":[{"global_result":{"query":"What were Aristotles key contributions to philosophy?","search_result":["### Aristotle's Key Contributions to Philosophy\n\nAristotle's extensive body of work laid the foundation for numerous fields within philosophy and beyond, significantly shaping the trajectory of Western thought. His systematic approach to data collection and analysis has had a lasting impact on modern scientific methods. Below, we explore some of his most influential contributions.\n\n#### Foundational Works and Systematic Approach\n\nAristotle's writings cover a broad spectrum of topics, including logic, biology, ethics, and political science. His key works such as 'Physics,' 'On the Soul,' and 'Nicomachean Ethics' delve into fundamental concepts like substance, memory, and the nature of the city [Data: Reports (1, 2, 3, 4, 5, +more)]. These texts not only provided a comprehensive framework for understanding various aspects of the natural and human world but also established methodologies that continue to influence contemporary scientific inquiry.\n\n#### Ethical and Political Philosophy\n\nIn 'Nicomachean Ethics,' Aristotle explores the concept of a virtuous character, emphasizing the importance of moral virtues and the development of good habits. His work 'Politics' further examines the structure and function of the city (polis), addressing issues related to property, trade, and governance. Aristotle's classification of political constitutions and his definition of the city as the natural political community have had a profound and enduring impact on political thought [Data: Reports (11, 12); Triples (21, 22, 23, 24, 25)].\n\n#### Theories on Memory and Perception\n\nAristotle's theories on memory and perception are articulated in his works 'On the Soul' and 'De Anima iii 3.' He defines memory as the retention of experiences shaped by sensation and discusses the faculty of imagination (phantasia). These theories have significantly influenced subsequent philosophical and psychological studies on cognition and perception [Data: Reports (13, 14); Triples (26, 27, 28, 29, 30)].\n\n#### Epistemology and Scientific Method\n\nAristotle's epistemology, known as immanent realism, is based on the study of things that exist or happen in the world. This approach emphasizes empirical observation and has been instrumental in shaping the development of scientific methods. His insistence on grounding knowledge in observable phenomena laid the groundwork for future empirical research [Data: Reports (3)].\n\n#### Engagement with Predecessors and Contemporaries\n\nAristotle was also known for his critical engagement with the ideas of his predecessors and contemporaries. For instance, he refuted Democritus's claim about the Milky Way and criticized Empedocles's materialist theory of 'survival of the fittest.' These critiques highlight Aristotle's active participation in the broader philosophical discourse of his time and his contributions to refining and advancing philosophical thought [Data: Reports (15, 16); Triples (31, 32, 33, 34, 35)].\n\n### Conclusion\n\nAristotle's contributions to philosophy are vast and multifaceted, encompassing ethics, politics, epistemology, and more. His works continue to be studied and revered for their depth, rigor, and enduring relevance. Through his systematic approach and critical engagement with existing ideas, Aristotle has left an indelible mark on the landscape of Western philosophy."]},"local_result":{"communities":{"0":{"summary":{"findings":[{"explanation":"Aristotle is credited with the earliest study of formal logic, and his conception of it was the dominant form of Western logic until the 19th-century advances in mathematical logic. His works compiled into a set of six books ...","summary":"Aristotle's Foundational Role in Logic"}],"rating":9.5,"rating_explanation":"The impact severity rating is high due to Aristotle's foundational influence on multiple disciplines and his enduring legacy in Western philosophy and science.","summary":"The community revolves around Aristotle, an ancient Greek philosopher and polymath, who made significant contributions to various fields including logic, biology, political science, and economics. His works, such as 'Politics' and 'Nicomachean Ethics', have influenced numerous disciplines and thinkers from antiquity through the Middle Ages and beyond. The relationships between his various works and the fields he contributed to highlight his profound impact on Western thought.","title":"Aristotle and His Contributions"}}},"entities":{"0":{"description":"Aristotle was an ancient Greek philosopher and polymath, recognized as the father of various fields including logic, biology, and political science. He authored significant works such as the *Nicomachean Ethics* and *Politics*, where he explored concepts of virtue, governance, and the nature of reality, while also critiquing Platos ideas. His teachings and observations laid the groundwork for numerous disciplines, influencing thinkers ...","name":"Aristotle"}},"query":"Who is Aristotle?","relationships":{}}}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","fragment_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}},"ServerStats":{"properties":{"start_time":{"type":"string","format":"date-time","title":"Start Time"},"uptime_seconds":{"type":"number","title":"Uptime Seconds"},"cpu_usage":{"type":"number","title":"Cpu Usage"},"memory_usage":{"type":"number","title":"Memory Usage"}},"type":"object","required":["start_time","uptime_seconds","cpu_usage","memory_usage"],"title":"ServerStats"},"Token":{"properties":{"token":{"type":"string","title":"Token"},"token_type":{"type":"string","title":"Token Type"}},"type":"object","required":["token","token_type"],"title":"Token"},"TokenResponse":{"properties":{"access_token":{"$ref":"#/components/schemas/Token"},"refresh_token":{"$ref":"#/components/schemas/Token"}},"type":"object","required":["access_token","refresh_token"],"title":"TokenResponse"},"UpdatePromptResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"UpdatePromptResponse"},"UpdateResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the ingestion request."},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids","description":"The ID of the document that was ingested."}},"type":"object","required":["message","task_id","document_ids"],"title":"UpdateResponse","example":{"document_ids":["9fbe403b-c11c-5aae-8ade-ef22980c3ad1"],"message":"Update task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"UserOverviewResponse":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id"},"num_files":{"type":"integer","title":"Num Files"},"total_size_in_bytes":{"type":"integer","title":"Total Size In Bytes"},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids"}},"type":"object","required":["user_id","num_files","total_size_in_bytes","document_ids"],"title":"UserOverviewResponse"},"UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-09-18T18:14:34.700273"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-09-18T18:14:34.700279"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"VectorSearchResult":{"properties":{"fragment_id":{"type":"string","format":"uuid","title":"Fragment Id"},"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"score":{"type":"number","title":"Score"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["fragment_id","extraction_id","document_id","user_id","collection_ids","score","text","metadata"],"title":"VectorSearchResult","description":"Result of a search operation.","collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","fragment_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"},"VectorSearchSettings":{"properties":{"use_vector_search":{"type":"boolean","title":"Use Vector Search","description":"Whether to use vector search","default":true},"use_hybrid_search":{"type":"boolean","title":"Use Hybrid Search","description":"Whether to perform a hybrid search (combining vector and keyword search)","default":false},"filters":{"type":"object","title":"Filters","description":"Filters to apply to the vector search"},"search_limit":{"type":"integer","maximum":1000.0,"minimum":1.0,"title":"Search Limit","description":"Maximum number of results to return","default":10},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"index_measure":{"$ref":"#/components/schemas/IndexMeasure","description":"The distance measure to use for indexing","default":"cosine_distance"},"include_values":{"type":"boolean","title":"Include Values","description":"Whether to include search score values in the search results","default":true},"include_metadatas":{"type":"boolean","title":"Include Metadatas","description":"Whether to include element metadata in the search results","default":true},"probes":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Probes","description":"Number of ivfflat index lists to query. Higher increases accuracy but decreases speed.","default":10},"ef_search":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Ef Search","description":"Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed.","default":40},"hybrid_search_settings":{"anyOf":[{"$ref":"#/components/schemas/HybridSearchSettings"},{"type":"null"}],"description":"Settings for hybrid search","default":{"full_text_weight":1.0,"semantic_weight":5.0,"full_text_limit":200,"rrf_k":50}},"search_strategy":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Search Strategy","description":"Search strategy to use (e.g., 'default', 'query_fusion', 'hyde')","default":"vanilla"}},"type":"object","title":"VectorSearchSettings","ef_search":40,"filters":{"category":"technology"},"hybrid_search_settings":{"full_text_limit":200,"full_text_weight":1.0,"rrf_k":50,"semantic_weight":5.0},"include_metadata":true,"index_measure":"cosine_distance","probes":10,"search_limit":20,"selected_collection_ids":["2acb499e-8428-543b-bd85-0d9098718220","3e157b3a-8469-51db-90d9-52e7d896b49b"],"use_hybrid_search":true,"use_vector_search":true}},"securitySchemes":{"HTTPBearer":{"type":"http","scheme":"bearer"},"OAuth2PasswordBearer":{"type":"oauth2","flows":{"password":{"scopes":{},"tokenUrl":"token"}}}}}} +{"openapi":"3.1.0","info":{"title":"R2R Application API","version":"1.0.0"},"paths":{"/v2/ingest_files":{"post":{"summary":"Ingest Files App","description":"Ingest files into the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to ingest files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only ingest files for their own access. More expansive collection permissioning is under development.","operationId":"ingest_files_app_v2_ingest_files_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_ingest_files_app_v2_ingest_files_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.ingest_files(\n file_paths=[\"pg_essay_1.html\", \"got.txt\"],\n metadatas=[{\"metadata_1\":\"some random metadata\"}, {\"metadata_2\": \"some other random metadata\"}],\n document_ids=None,\n versions=None\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/ingest_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1.html;type=text/html\" \\\n -F \"file=@got.txt;type=text/plain\" \\\n -F 'metadatas=[{},{}]' \\\n -F 'document_ids=null' \\\n -F 'versions=null'\n"}]}},"/v2/update_files":{"post":{"summary":"Update Files App","description":"Update existing files in the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to update files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only update their own files. More expansive collection permissioning is under development.","operationId":"update_files_app_v2_update_files_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_update_files_app_v2_update_files_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.update_files(\n file_paths=[\"pg_essay_1_v2.txt\"],\n document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/update_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1_v2.txt;type=text/plain\" \\\n -F 'document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]'\n"}]}},"/v2/health":{"get":{"summary":"Health Check","operationId":"health_check_v2_health_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/v2/server_stats":{"get":{"summary":"Server Stats","operationId":"server_stats_v2_server_stats_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/update_prompt":{"post":{"summary":"Update Prompt App","operationId":"update_prompt_app_v2_update_prompt_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_prompt_app_v2_update_prompt_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/add_prompt":{"post":{"summary":"Add Prompt App","operationId":"add_prompt_app_v2_add_prompt_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_prompt_app_v2_add_prompt_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/get_prompt/{prompt_name}":{"get":{"summary":"Get Prompt App","operationId":"get_prompt_app_v2_get_prompt__prompt_name__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"},{"name":"inputs","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"description":"JSON-encoded prompt inputs","title":"Inputs"},"description":"JSON-encoded prompt inputs"},{"name":"prompt_override","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"description":"Prompt override","title":"Prompt Override"},"description":"Prompt override"},{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/get_all_prompts":{"get":{"summary":"Get All Prompts App","operationId":"get_all_prompts_app_v2_get_all_prompts_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete_prompt/{prompt_name}":{"delete":{"summary":"Delete Prompt App","operationId":"delete_prompt_app_v2_delete_prompt__prompt_name__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"},{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/analytics":{"get":{"summary":"Get Analytics App","operationId":"get_analytics_app_v2_analytics_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filter_criteria","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Filter Criteria"}},{"name":"analysis_types","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Analysis Types"}},{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logs":{"get":{"summary":"Logs App","operationId":"logs_app_v2_logs_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"run_type_filter","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"default":"","title":"Run Type Filter"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}},{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/app_settings":{"get":{"summary":"App Settings","operationId":"app_settings_v2_app_settings_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/score_completion":{"post":{"summary":"Score Completion","operationId":"score_completion_v2_score_completion_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_score_completion_v2_score_completion_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/users_overview":{"get":{"summary":"Users Overview App","operationId":"users_overview_app_v2_users_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"default":[],"title":"User Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}},{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete":{"delete":{"summary":"Delete App","operationId":"delete_app_v2_delete_delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filters","in":"query","required":true,"schema":{"type":"string","description":"JSON-encoded filters","title":"Filters"},"description":"JSON-encoded filters"},{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/download_file/{document_id}":{"get":{"summary":"Download File App","description":"Download a file by its document ID as a stream.","operationId":"download_file_app_v2_download_file__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"}],"responses":{"200":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/documents_overview":{"get":{"summary":"Documents Overview App","operationId":"documents_overview_app_v2_documents_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_ids","in":"query","required":false,"schema":{"type":"array","items":{"type":"string"},"default":[],"title":"Document Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}},{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/document_chunks/{document_id}":{"get":{"summary":"Document Chunks App","operationId":"document_chunks_app_v2_document_chunks__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","title":"Document Id"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":100,"title":"Limit"}},{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/inspect_knowledge_graph":{"get":{"summary":"Inspect Knowledge Graph","operationId":"inspect_knowledge_graph_v2_inspect_knowledge_graph_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"offset","in":"query","required":false,"schema":{"type":"integer","default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","default":100,"title":"Limit"}},{"name":"print_descriptions","in":"query","required":false,"schema":{"type":"boolean","default":false,"title":"Print Descriptions"}},{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collections_overview":{"get":{"summary":"Collections Overview App","operationId":"collections_overview_app_v2_collections_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"title":"Collection Ids"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","maximum":1000,"minimum":1},{"type":"null"}],"default":100,"title":"Limit"}},{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_collection":{"post":{"summary":"Create Collection App","operationId":"create_collection_app_v2_create_collection_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_collection_app_v2_create_collection_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/get_collection/{collection_id}":{"get":{"summary":"Get Collection App","operationId":"get_collection_app_v2_get_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/update_collection":{"put":{"summary":"Update Collection App","operationId":"update_collection_app_v2_update_collection_put","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_collection_app_v2_update_collection_put"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete_collection/{collection_id}":{"delete":{"summary":"Delete Collection App","operationId":"delete_collection_app_v2_delete_collection__collection_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/list_collections":{"get":{"summary":"List Collections App","operationId":"list_collections_app_v2_list_collections_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}},{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/add_user_to_collection":{"post":{"summary":"Add User To Collection App","operationId":"add_user_to_collection_app_v2_add_user_to_collection_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_user_to_collection_app_v2_add_user_to_collection_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/remove_user_from_collection":{"post":{"summary":"Remove User From Collection App","operationId":"remove_user_from_collection_app_v2_remove_user_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_users_in_collection/{collection_id}":{"get":{"summary":"Get Users In Collection App","operationId":"get_users_in_collection_app_v2_get_users_in_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"},{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user_collections/{user_id}":{"get":{"summary":"Get Collections For User App","operationId":"get_collections_for_user_app_v2_user_collections__user_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"User ID","title":"User Id"},"description":"User ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"},{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/assign_document_to_collection":{"post":{"summary":"Assign Document To Collection App","operationId":"assign_document_to_collection_app_v2_assign_document_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_document_from_collection":{"post":{"summary":"Remove Document From Collection App","operationId":"remove_document_from_collection_app_v2_remove_document_from_collection_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/document_collections/{document_id}":{"get":{"summary":"Document Collections App","operationId":"document_collections_app_v2_document_collections__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}},{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collection/{collection_id}/documents":{"get":{"summary":"Documents In Collection App","operationId":"documents_in_collection_app_v2_collection__collection_id__documents_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}},{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/search":{"post":{"summary":"Search App","description":"Perform a search query on the vector database and knowledge graph.\n\nThis endpoint allows for complex filtering of search results using PostgreSQL-based queries.\nFilters can be applied to various fields such as document_id, and internal metadata values.\n\n\nAllowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.","operationId":"search_app_v2_search_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_search_app_v2_search_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.search(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/search\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n }'\n"}]}},"/v2/rag":{"post":{"summary":"Rag App","description":"Execute a RAG (Retrieval-Augmented Generation) query.\n\nThis endpoint combines search results with language model generation.\nIt supports the same filtering capabilities as the search endpoint,\nallowing for precise control over the retrieved context.\n\nThe generation process can be customized using the rag_generation_config parameter.","operationId":"rag_app_v2_rag_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_rag_app_v2_rag_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.rag(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True,\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/rag\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n }'\n"}]}},"/v2/agent":{"post":{"summary":"Agent App","description":"Implement an agent-based interaction for complex query processing.\n\nThis endpoint supports multi-turn conversations and can handle complex queries\nby breaking them down into sub-tasks. It uses the same filtering capabilities\nas the search and RAG endpoints for retrieving relevant information.\n\nThe agent's behavior can be customized using the rag_generation_config and\ntask_prompt_override parameters.","operationId":"agent_app_v2_agent_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_agent_app_v2_agent_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.agent(\n messages=[\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n include_title_if_available=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/agent\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"messages\": [\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": false # to enable this, please read the graphrag cookbook\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n \"include_title_if_available\": true\n }'\n"}]}},"/v2/register":{"post":{"summary":"Register App","description":"Register a new user with the given email and password.","operationId":"register_app_v2_register_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_register_app_v2_register_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/verify_email":{"post":{"summary":"Verify Email App","description":"Verify a user's email address.\n\nThis endpoint is used to confirm a user's email address using the verification code\nsent to their email after registration.","operationId":"verify_email_app_v2_verify_email_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_verify_email_app_v2_verify_email_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/login":{"post":{"summary":"Login App","description":"Authenticate a user and provide access tokens.\n\nThis endpoint authenticates a user using their email (username) and password,\nand returns access and refresh tokens upon successful authentication.","operationId":"login_app_v2_login_post","requestBody":{"content":{"application/x-www-form-urlencoded":{"schema":{"$ref":"#/components/schemas/Body_login_app_v2_login_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logout":{"post":{"summary":"Logout App","description":"Log out the current user.\n\nThis endpoint invalidates the user's current access token, effectively logging them out.","operationId":"logout_app_v2_logout_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}}},"security":[{"OAuth2PasswordBearer":[]},{"HTTPBearer":[]}]}},"/v2/user":{"get":{"summary":"Get User App","description":"Get the current user's profile information.\n\nThis endpoint returns the profile information of the currently authenticated user.","operationId":"get_user_app_v2_user_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}}},"security":[{"HTTPBearer":[]}]},"put":{"summary":"Put User App","description":"Update the current user's profile information.\n\nThis endpoint allows the authenticated user to update their profile information.","operationId":"put_user_app_v2_user_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_put_user_app_v2_user_put"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/refresh_access_token":{"post":{"summary":"Refresh Access Token App","description":"Refresh the access token using a refresh token.\n\nThis endpoint allows users to obtain a new access token using their refresh token.","operationId":"refresh_access_token_app_v2_refresh_access_token_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","title":"Refresh Token","description":"Refresh token"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/change_password":{"post":{"summary":"Change Password App","description":"Change the authenticated user's password.\n\nThis endpoint allows users to change their password by providing their current password\nand a new password.","operationId":"change_password_app_v2_change_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_change_password_app_v2_change_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/request_password_reset":{"post":{"summary":"Request Password Reset App","description":"Request a password reset for a user.\n\nThis endpoint initiates the password reset process by sending a reset link\nto the specified email address.","operationId":"request_password_reset_app_v2_request_password_reset_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","format":"email","title":"Email","description":"User's email address"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/reset_password":{"post":{"summary":"Reset Password App","operationId":"reset_password_app_v2_reset_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_reset_password_app_v2_reset_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user/{user_id}":{"delete":{"summary":"Delete User App","description":"Delete a user account.\n\nThis endpoint allows users to delete their own account or, for superusers,\nto delete any user account.","operationId":"delete_user_app_v2_user__user_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"ID of the user to delete","title":"User Id"},"description":"ID of the user to delete"}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_user_app_v2_user__user_id__delete"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_graph":{"post":{"summary":"Create Graph","description":"Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings. If document IDs are not provided, the graph will be created on all documents in the system.\n\nThis step extracts the relevant entities and relationships from the documents and creates a graph based on the extracted information.\n\nIn order to do GraphRAG, you will need to run the enrich_graph endpoint.","operationId":"create_graph_v2_create_graph_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_graph_v2_create_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/enrich_graph":{"post":{"summary":"Enrich Graph","description":"This endpoint enriches the graph with additional information. It creates communities of nodes based on their similarity and adds embeddings to the graph. This step is necessary for GraphRAG to work.","operationId":"enrich_graph_v2_enrich_graph_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_enrich_graph_v2_enrich_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/openapi_spec":{"get":{"summary":"Openapi Spec","operationId":"openapi_spec_v2_openapi_spec_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"Body_add_prompt_app_v2_add_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"type":"string","title":"Template","description":"Prompt template"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name","template"],"title":"Body_add_prompt_app_v2_add_prompt_post"},"Body_add_user_to_collection_app_v2_add_user_to_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_add_user_to_collection_app_v2_add_user_to_collection_post"},"Body_agent_app_v2_agent_post":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message"},"type":"array","title":"Messages","description":"List of message objects"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":true}},"type":"object","required":["messages"],"title":"Body_agent_app_v2_agent_post"},"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"},"Body_change_password_app_v2_change_password_post":{"properties":{"current_password":{"type":"string","title":"Current Password","description":"Current password"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["current_password","new_password"],"title":"Body_change_password_app_v2_change_password_post"},"Body_create_collection_app_v2_create_collection_post":{"properties":{"name":{"type":"string","title":"Name","description":"Collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Collection description","default":""}},"type":"object","required":["name"],"title":"Body_create_collection_app_v2_create_collection_post"},"Body_create_graph_v2_create_graph_post":{"properties":{"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID to create graph for."},"kg_creation_settings":{"anyOf":[{"type":"object"},{"$ref":"#/components/schemas/KGCreationSettings"},{"type":"null"}],"title":"Kg Creation Settings","description":"Settings for the graph creation process.","default":"{}"}},"type":"object","required":["collection_id"],"title":"Body_create_graph_v2_create_graph_post"},"Body_delete_user_app_v2_user__user_id__delete":{"properties":{"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password","description":"User's current password"},"delete_vector_data":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Delete Vector Data","description":"Whether to delete the user's vector data","default":false}},"type":"object","title":"Body_delete_user_app_v2_user__user_id__delete"},"Body_enrich_graph_v2_enrich_graph_post":{"properties":{"collection_id":{"type":"string","title":"Collection Id","description":"Collection name to enrich graph for."},"kg_enrichment_settings":{"anyOf":[{"type":"object"},{"$ref":"#/components/schemas/KGEnrichmentSettings"},{"type":"null"}],"title":"Kg Enrichment Settings","description":"Settings for the graph enrichment process.","default":"{}"}},"type":"object","required":["collection_id"],"title":"Body_enrich_graph_v2_enrich_graph_post"},"Body_ingest_files_app_v2_ingest_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to ingest into the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_user_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_ingest_files_app_v2_ingest_files_post"},"Body_login_app_v2_login_post":{"properties":{"grant_type":{"anyOf":[{"type":"string","pattern":"password"},{"type":"null"}],"title":"Grant Type"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"scope":{"type":"string","title":"Scope","default":""},"client_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Id"},"client_secret":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Secret"}},"type":"object","required":["username","password"],"title":"Body_login_app_v2_login_post"},"Body_put_user_app_v2_user_put":{"properties":{"email":{"anyOf":[{"type":"string","format":"email"},{"type":"null"}],"title":"Email","description":"Updated email address"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated user name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio","description":"Updated user bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture","description":"Updated profile picture URL"}},"type":"object","title":"Body_put_user_app_v2_user_put"},"Body_rag_app_v2_rag_post":{"properties":{"query":{"type":"string","title":"Query","description":"RAG query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":false}},"type":"object","required":["query"],"title":"Body_rag_app_v2_rag_post"},"Body_register_app_v2_register_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"password":{"type":"string","title":"Password","description":"User's password"}},"type":"object","required":["email","password"],"title":"Body_register_app_v2_register_post"},"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"},"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"},"Body_reset_password_app_v2_reset_password_post":{"properties":{"reset_token":{"type":"string","title":"Reset Token","description":"Password reset token"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["reset_token","new_password"],"title":"Body_reset_password_app_v2_reset_password_post"},"Body_score_completion_v2_score_completion_post":{"properties":{"message_id":{"type":"string","title":"Message Id","description":"Message ID"},"score":{"type":"number","title":"Score","description":"Completion score"}},"type":"object","required":["message_id","score"],"title":"Body_score_completion_v2_score_completion_post"},"Body_search_app_v2_search_post":{"properties":{"query":{"type":"string","title":"Query","description":"Search query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"}},"type":"object","required":["query"],"title":"Body_search_app_v2_search_post"},"Body_update_collection_app_v2_update_collection_put":{"properties":{"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Updated collection description"}},"type":"object","required":["collection_id"],"title":"Body_update_collection_app_v2_update_collection_put"},"Body_update_files_app_v2_update_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to update in the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_user_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_update_files_app_v2_update_files_post"},"Body_update_prompt_app_v2_update_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Template","description":"Prompt template"},"input_types":{"anyOf":[{"additionalProperties":{"type":"string"},"type":"object"},{"type":"null"}],"title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name"],"title":"Body_update_prompt_app_v2_update_prompt_post"},"Body_verify_email_app_v2_verify_email_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"verification_code":{"type":"string","title":"Verification Code","description":"Email verification code"}},"type":"object","required":["email","verification_code"],"title":"Body_verify_email_app_v2_verify_email_post"},"GenerationConfig":{"properties":{"model":{"type":"string","title":"Model"},"temperature":{"type":"number","title":"Temperature"},"top_p":{"type":"number","title":"Top P"},"max_tokens_to_sample":{"type":"integer","title":"Max Tokens To Sample"},"stream":{"type":"boolean","title":"Stream"},"functions":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Functions"},"tools":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tools"},"add_generation_kwargs":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Add Generation Kwargs"},"api_base":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api Base"}},"type":"object","title":"GenerationConfig","max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"GenericMessageResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"GenericMessageResponse"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"HybridSearchSettings":{"properties":{"full_text_weight":{"type":"number","title":"Full Text Weight","description":"Weight to apply to full text search","default":1.0},"semantic_weight":{"type":"number","title":"Semantic Weight","description":"Weight to apply to semantic search","default":5.0},"full_text_limit":{"type":"integer","title":"Full Text Limit","description":"Maximum number of results to return from full text search","default":200},"rrf_k":{"type":"integer","title":"Rrf K","description":"K-value for RRF (Rank Reciprocal Fusion)","default":50}},"type":"object","title":"HybridSearchSettings"},"IndexMeasure":{"type":"string","enum":["cosine_distance","l2_distance","max_inner_product"],"title":"IndexMeasure","description":"An enum representing the types of distance measures available for indexing.\n\nAttributes:\n cosine_distance (str): The cosine distance measure for indexing.\n l2_distance (str): The Euclidean (L2) distance measure for indexing.\n max_inner_product (str): The maximum inner product measure for indexing."},"KGCreationSettings":{"properties":{"kg_triples_extraction_prompt":{"type":"string","title":"Kg Extraction Prompt","description":"The prompt to use for knowledge graph extraction.","default":"graphrag_triples_extraction_few_shot"},"force_kg_creation":{"type":"boolean","title":"Force Kg Creation","description":"Force run the KG creation step even if the graph is already created.","default":false},"entity_types":{"items":{"type":"string"},"type":"array","title":"Entity Types","description":"The types of entities to extract.","default":[]},"relation_types":{"items":{"type":"string"},"type":"array","title":"Relation Types","description":"The types of relations to extract.","default":[]},"extraction_merge_count":{"type":"integer","title":"Extraction Merge Count","description":"The number of extractions to merge into a single KG extraction.","default":4},"max_knowledge_triples":{"type":"integer","title":"Max Knowledge Triples","description":"The maximum number of knowledge triples to extract from each chunk.","default":100},"max_description_input_length":{"type":"integer","title":"Max Description Input Length","description":"The maximum length of the description for a node in the graph.","default":65536},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"Configuration for text generation during graph enrichment."}},"type":"object","title":"KGCreationSettings","description":"Settings for knowledge graph creation."},"KGEnrichmentSettings":{"properties":{"skip_clustering":{"type":"boolean","title":"Skip Clustering","description":"Whether to skip leiden clustering on the graph or not.","default":false},"force_enrichment":{"type":"boolean","title":"Force Enrichment","description":"Force run the enrichment step even if graph creation is still in progress for some documents.","default":false},"community_reports_prompt":{"type":"string","title":"Community Reports Prompt","description":"The prompt to use for knowledge graph enrichment.","default":"graphrag_community_reports_prompt"},"max_summary_input_length":{"type":"integer","title":"Max Summary Input Length","description":"The maximum length of the summary for a community.","default":65536},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"Configuration for text generation during graph enrichment."},"leiden_params":{"type":"object","title":"Leiden Params","description":"Parameters for the Leiden algorithm."}},"type":"object","title":"KGEnrichmentSettings","description":"Settings for knowledge graph enrichment."},"KGSearchSettings":{"properties":{"graphrag_map_system_prompt":{"type":"string","title":"Graphrag Map System Prompt","description":"The system prompt for the graphrag map prompt.","default":"graphrag_map_system_prompt"},"graphrag_reduce_system_prompt":{"type":"string","title":"Graphrag Reduce System Prompt","description":"The system prompt for the graphrag reduce prompt.","default":"graphrag_reduce_system_prompt"},"use_kg_search":{"type":"boolean","title":"Use Kg Search","description":"Whether to use KG search","default":false},"kg_search_type":{"type":"string","title":"Kg Search Type","description":"KG search type","default":"local"},"kg_search_level":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kg Search Level","description":"KG search level"},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"Configuration for text generation during graph search."},"max_community_description_length":{"type":"integer","title":"Max Community Description Length","default":65536},"max_llm_queries_for_global_search":{"type":"integer","title":"Max Llm Queries For Global Search","default":250},"local_search_limits":{"additionalProperties":{"type":"integer"},"type":"object","title":"Local Search Limits","default":{"__Entity__":20,"__Relationship__":20,"__Community__":20}}},"type":"object","title":"KGSearchSettings","generation_config":{"max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"kg_search_level":"0","kg_search_type":"global","local_search_limits":{"__Community__":20,"__Entity__":20,"__Relationship__":20},"max_community_description_length":65536,"max_llm_queries_for_global_search":250,"use_kg_search":true},"Message":{"properties":{"role":{"anyOf":[{"$ref":"#/components/schemas/MessageType"},{"type":"string"}],"title":"Role"},"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"function_call":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Function Call"},"tool_calls":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"type":"object","required":["role"],"title":"Message","content":"This is a test message.","role":"user"},"MessageType":{"type":"string","enum":["system","user","assistant","function","tool"],"title":"MessageType"},"ResultsWrapper_GenericMessageResponse_":{"properties":{"results":{"$ref":"#/components/schemas/GenericMessageResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[GenericMessageResponse]"},"ResultsWrapper_TokenResponse_":{"properties":{"results":{"$ref":"#/components/schemas/TokenResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[TokenResponse]"},"ResultsWrapper_UserResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UserResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UserResponse]"},"Token":{"properties":{"token":{"type":"string","title":"Token"},"token_type":{"type":"string","title":"Token Type"}},"type":"object","required":["token","token_type"],"title":"Token"},"TokenResponse":{"properties":{"access_token":{"$ref":"#/components/schemas/Token"},"refresh_token":{"$ref":"#/components/schemas/Token"}},"type":"object","required":["access_token","refresh_token"],"title":"TokenResponse"},"UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-09-30T22:13:38.156070"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-09-30T22:13:38.156077"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"VectorSearchSettings":{"properties":{"use_vector_search":{"type":"boolean","title":"Use Vector Search","description":"Whether to use vector search","default":true},"use_hybrid_search":{"type":"boolean","title":"Use Hybrid Search","description":"Whether to perform a hybrid search (combining vector and keyword search)","default":false},"filters":{"type":"object","title":"Filters","description":"Filters to apply to the vector search"},"search_limit":{"type":"integer","maximum":1000.0,"minimum":1.0,"title":"Search Limit","description":"Maximum number of results to return","default":10},"offset":{"type":"integer","minimum":0.0,"title":"Offset","description":"Offset to paginate search results","default":0},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"index_measure":{"$ref":"#/components/schemas/IndexMeasure","description":"The distance measure to use for indexing","default":"cosine_distance"},"include_values":{"type":"boolean","title":"Include Values","description":"Whether to include search score values in the search results","default":true},"include_metadatas":{"type":"boolean","title":"Include Metadatas","description":"Whether to include element metadata in the search results","default":true},"probes":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Probes","description":"Number of ivfflat index lists to query. Higher increases accuracy but decreases speed.","default":10},"ef_search":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Ef Search","description":"Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed.","default":40},"hybrid_search_settings":{"anyOf":[{"$ref":"#/components/schemas/HybridSearchSettings"},{"type":"null"}],"description":"Settings for hybrid search","default":{"full_text_weight":1.0,"semantic_weight":5.0,"full_text_limit":200,"rrf_k":50}},"search_strategy":{"type":"string","title":"Search Strategy","description":"Search strategy to use (e.g., 'default', 'query_fusion', 'hyde')","default":"vanilla"}},"type":"object","title":"VectorSearchSettings","ef_search":40,"filters":{"category":"technology"},"hybrid_search_settings":{"full_text_limit":200,"full_text_weight":1.0,"rrf_k":50,"semantic_weight":5.0},"include_metadata":true,"index_measure":"cosine_distance","limit":20,"offset":0,"probes":10,"selected_collection_ids":["2acb499e-8428-543b-bd85-0d9098718220","3e157b3a-8469-51db-90d9-52e7d896b49b"],"use_hybrid_search":true,"use_vector_search":true}},"securitySchemes":{"HTTPBearer":{"type":"http","scheme":"bearer"},"OAuth2PasswordBearer":{"type":"oauth2","flows":{"password":{"scopes":{},"tokenUrl":"token"}}}}}} diff --git a/docs/cookbooks/application.mdx b/docs/cookbooks/application.mdx index 13e4050c5..ed0de2d49 100644 --- a/docs/cookbooks/application.mdx +++ b/docs/cookbooks/application.mdx @@ -6,8 +6,6 @@ icon: 'display' R2R offers an [open-source React+Next.js application](https://github.com/SciPhi-AI/R2R-Application) designed to give developers an administrative portal for their R2R deployment, and users an application to communicate with out of the box. -In addition, R2R comes with an orchestration GUI powered by Hatchet, which you can learn about [here](/cookbooks/orchestration). - ## Setup ### Install PNPM @@ -36,7 +34,7 @@ After installation, you may need to add PNPM to your system's PATH. ### Installing and Running the R2R Dashboard -If you're running R2R with the Docker, you already have the R2R dashboard running! Just navigate to [http://localhost:3000](http://localhost:3000). +If you're running R2R with the Docker, you already have the R2R application running! Just navigate to [http://localhost:7273](http://localhost:7273). If you're running R2R outside of Docker, run the following commands to install the R2R Dashboard. @@ -74,15 +72,27 @@ By default, an R2R instance is hosted on port 7272. The login page will include ### Documents -The documents page provides an overview of uploaded documents and their metadata. You can upload new documents and update or delete existing ones. +The documents page provides an overview of uploaded documents and their metadata. You can upload new documents and update, download, or delete existing ones. Additionally, you can view information about each document, including the documents' chunks and previews of PDFs. ![Documents Page](/images/oss_dashboard_documents.png) -### Playground +### Collections + +Collections allow users to create and share sets of documents. The collections page provides a place to manage your existing collections or create new collections. + +![Collections Page](/images/oss_collections_page.png) + +### Chat + +In the chat page, you can stream RAG responses with different models and configurable settings. You can interact with both the RAG Agent and RAG endpoints here. + +![Chat Interface](/images/chat.png) + +### Users -The playground allows streaming RAG responses with different models and configurable settings. +Manage your users and gain insight into their interactions. -![Playground Interface](/images/playground.png) +![Users Page](/images/users.png) ### Logs diff --git a/docs/cookbooks/graphrag.mdx b/docs/cookbooks/graphrag.mdx index a9d08dc6e..e85ec97e0 100644 --- a/docs/cookbooks/graphrag.mdx +++ b/docs/cookbooks/graphrag.mdx @@ -30,25 +30,25 @@ r2r serve ``` toml [kg] -provider = "neo4j" +provider = "postgres" batch_size = 256 [kg.kg_creation_settings] - kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" + kg_triples_extraction_prompt = "graphrag_triples_extraction_few_shot" entity_types = [] # if empty, all entities are extracted relation_types = [] # if empty, all relations are extracted max_knowledge_triples = 100 fragment_merge_count = 4 # number of fragments to merge into a single extraction - generation_config = { model = "gpt-4o-mini" } # and other params, model used for triplet extraction + generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for triplet extraction [kg.kg_enrichment_settings] max_description_input_length = 65536 # increase if you want more comprehensive descriptions max_summary_input_length = 65536 # increase if you want more comprehensive summaries - generation_config = { model = "gpt-4o-mini" } # and other params, model used for node description and graph clustering - leiden_params = { max_levels = 10 } # more params here: https://neo4j.com/docs/graph-data-science/current/algorithms/leiden/ + generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for node description and graph clustering + leiden_params = {} [kg.kg_search_settings] - generation_config = { model = "gpt-4o-mini" } + generation_config = { model = "openai/gpt-4o-mini" } ``` @@ -92,13 +92,13 @@ batch_size = 32 add_title_as_prefix = true [parsing] -excluded_parsers = [ "gif", "jpeg", "jpg", "png", "svg", "mp3", "mp4" ] +excluded_parsers = [ "mp4" ] [kg] -provider = "neo4j" +provider = "postgres" [kg.kg_creation_settings] - kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" + kg_triples_extraction_prompt = "graphrag_triples_extraction_few_shot" entity_types = [] # if empty, all entities are extracted relation_types = [] # if empty, all relations are extracted max_knowledge_triples = 100 @@ -109,7 +109,7 @@ provider = "neo4j" max_description_input_length = 65536 # increase if you want more comprehensive descriptions max_summary_input_length = 65536 generation_config = { model = "ollama/llama3.1" } # and other params, model used for node description and graph clustering - leiden_params = { max_levels = 10 } # more params here: https://neo4j.com/docs/graph-data-science/current/algorithms/leiden/ + leiden_params = {} [kg.kg_search_settings] generation_config = { model = "ollama/llama3.1" } @@ -175,10 +175,7 @@ r2r create-graph --document-ids=9fbe403b-c11c-5aae-8ade-ef22980c3ad1 [{'message': 'Graph creation task queued successfully.', 'task_id': 'd9dae1bb-5862-4a16-abaf-5297024df390'}] ``` -This step will create a knowledge graph with nodes and relationships. You can visualize the graph in two ways: - - -1. Using the neo4j browser on `http://localhost:7474`. The username and password are `neo4j` and `ineedastrongerpassword`. To visualize the graph, run the following command in the neo4j browser: +This step will create a knowledge graph with nodes and relationships. Below is a visualization of the graph which we produced with Neo4j: ``` MATCH (a) diff --git a/docs/cookbooks/observability.mdx b/docs/cookbooks/observability.mdx index ff838c101..cbc3e1b5f 100644 --- a/docs/cookbooks/observability.mdx +++ b/docs/cookbooks/observability.mdx @@ -254,7 +254,7 @@ queries = [ # Perform random searches for _ in range(1000): query = random.choice(queries) - app.rag(query, GenerationConfig(model="gpt-4o-mini")) + app.rag(query, GenerationConfig(model="openai/gpt-4o-mini")) print("Preloading complete. You can now run analytics on this data.") ``` diff --git a/docs/cookbooks/user-auth.mdx b/docs/cookbooks/user-auth.mdx index b9b821bd5..0c47b72f4 100644 --- a/docs/cookbooks/user-auth.mdx +++ b/docs/cookbooks/user-auth.mdx @@ -91,7 +91,7 @@ search_result = client.search(query="Sample search query") # {'results': {'vector_search_results': [{ ... 'metadata': {'text': 'Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.', 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 0, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}, {'id': '781ce9e6-9e73-5012-8445-35b7d84f161c', 'score': 0.670799394202279, 'metadata': {'text': "Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent", 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 8, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}, {'id': 'f32cda7c-2538-5248-b0b6-4d0d45cc4d60', 'score': 0.667974928858889, 'metadata': {'text': 'Aristotle was revered among medieval Muslim scholars as "The First Teacher", and among medieval Christians like Thomas Aquinas as simply "The Philosopher", while the poet Dante called him "the master of those who know". His works contain the earliest known formal study of logic, and were studied by medieval scholars such as Peter Abelard and Jean Buridan. Aristotle\'s influence on logic continued well into the 19th century. In addition, his ethics, although always influential, gained renewed interest with', 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 5, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}, {'id': 'e6592fd5-e02e-5847-b158-79bbdd8710a2', 'score': 0.6647597950983339, 'metadata': {'text': "Little is known about Aristotle's life. He was born in the city of Stagira in northern Greece during the Classical period. His father, Nicomachus, died when Aristotle was a child, and he was brought up by a guardian. At 17 or 18, he joined Plato's Academy in Athens and remained there until the age of 37 (c.\u2009347 BC). Shortly after Plato died, Aristotle left Athens and, at the request of Philip II of Macedon, tutored his son Alexander the Great beginning in 343 BC. He established a library in the Lyceum,", 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 1, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}, {'id': '8c72faca-6d98-5129-b9ee-70769272e361', 'score': 0.6476034942146001, 'metadata': {'text': 'Among countless other achievements, Aristotle was the founder of formal logic,[146] pioneered the study of zoology, and left every future scientist and philosopher in his debt through his contributions to the scientific method.[2][147][148] Taneli Kukkonen, observes that his achievement in founding two sciences is unmatched, and his reach in influencing "every branch of intellectual enterprise" including Western ethical and political theory, theology, rhetoric, and literary analysis is equally long. As a', 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 175, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}, {'id': '3ce904cc-5835-551a-a85c-f00be1a5e8dc', 'score': 0.626156434278918, 'metadata': {'text': 'Aristotle has been called the father of logic, biology, political science, zoology, embryology, natural law, scientific method, rhetoric, psychology, realism, criticism, individualism, teleology, and meteorology.[151]', 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 177, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}, {'id': '6a15b09b-4bf1-5c1f-af24-fe659c8a011d', 'score': 0.624521989361129, 'metadata': {'text': 'after friends and relatives, and to deal with the latter as with beasts or plants".[13] By 335 BC, Aristotle had returned to Athens, establishing his own school there known as the Lyceum. Aristotle conducted courses at the school for the next twelve years. While in Athens, his wife Pythias died and Aristotle became involved with Herpyllis of Stagira. They had a son whom Aristotle named after his father, Nicomachus. If the Suda – an uncritical compilation from the Middle Ages – is accurate, he may also have', 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 16, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}, {'id': '19a755d0-770f-5c6f-991e-ca191a40c8d6', 'score': 0.614493374720815, 'metadata': {'text': "passed to Plato's nephew Speusippus, although it is possible that he feared the anti-Macedonian sentiments in Athens at that time and left before Plato died.[10] Aristotle then accompanied Xenocrates to the court of his friend Hermias of Atarneus in Asia Minor. After the death of Hermias, Aristotle travelled with his pupil Theophrastus to the island of Lesbos, where together they researched the botany and zoology of the island and its sheltered lagoon. While in Lesbos, Aristotle married Pythias, either", 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 12, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}, {'id': '33b2dbd7-2f3a-5450-9618-976a996bde2a', 'score': 0.6117302824500019, 'metadata': {'text': 'Transmission\nFurther information: List of writers influenced by Aristotle\nMore than 2300 years after his death, Aristotle remains one of the most influential people who ever lived.[142][143][144] He contributed to almost every field of human knowledge then in existence, and he was the founder of many new fields. According to the philosopher Bryan Magee, "it is doubtful whether any human being has ever known as much as he did".[145]', 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 174, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}, {'id': '2d101d42-6317-5d8c-85c3-fb9b6d947c68', 'score': 0.610827455968717, 'metadata': {'text': "The immediate influence of Aristotle's work was felt as the Lyceum grew into the Peripatetic school. Aristotle's students included Aristoxenus, Dicaearchus, Demetrius of Phalerum, Eudemos of Rhodes, Harpalus, Hephaestion, Mnason of Phocis, Nicomachus, and Theophrastus. Aristotle's influence over Alexander the Great is seen in the latter's bringing with him on his expedition a host of zoologists, botanists, and researchers. He had also learned a great deal about Persian customs and traditions from his", 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 181, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}], 'kg_search_results': []}} rag_result = client.rag(query="Sample search query") -# {'results': {'completion': {'id': 'chatcmpl-9llkGYsrG1YZaWkqYvzXr1eQNl0gA', 'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': 'The search results for the query "Sample search query" include various topics and excerpts related to Aristotle\'s works and other subjects. Here are the relevant references:\n\n1. **Categories of Aristotle\'s Works**:\n - On Interpretation [1], [2]\n - Prior Analytics [1], [2]\n - Posterior Analytics [1], [2]\n - Topics [1], [2]\n - On Sophistical Refutations [1], [2]\n\n2. **Aristotle\'s Theory on Sense Perceptions and Memory**:\n - Aristotle\'s belief that people receive sense perceptions and perceive them as impressions, leading to the weaving together of new experiences. The search for these impressions involves searching the memory itself, where recollection occurs when one retrieved experience naturally follows another [3], [4].\n\n3. **Medieval Judaism**:\n - References to Medieval Judaism [5], [6].\n\n4. **Scientific Style**:\n - References to Scientific Style [7], [8].\n\n5. **Recovery of Texts by Apellicon**:\n - Apellicon\'s efforts to recover degraded texts by copying them into new manuscripts and using guesswork to fill in unreadable gaps [9], [10].\n\nThese references provide a broad overview of the topics related to the query, including Aristotle\'s works, his theories on memory, Medieval Judaism, scientific style, and the recovery of ancient texts.', 'role': 'assistant'}}], 'created': 1721171976, 'model': 'gpt-4o-2024-05-13', 'object': 'chat.completion', 'system_fingerprint': 'fp_5e997b69d8', 'usage': {'completion_tokens': 286, 'prompt_tokens': 513, 'total_tokens': 799}}, 'search_results': {'vector_search_results': [{'id': 'd70e2776-befa-5b67-9da7-b76aedb7c101', 'score': 0.270276627830369, 'metadata': {'text': 'Categories\nOn Interpretation\nPrior Analytics\nPosterior Analytics\nTopics\nOn Sophistical Refutations', 'title': 'aristotle.txt', 'user_id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', 'version': 'v0', 'chunk_order': 26, 'document_id': '4bb1e5e0-3bb3-54e0-bc71-69e68bce30c7', 'extraction_id': '9401dfe6-10dd-5eb1-8b88-de1927a6c556', 'associatedQuery': 'Sample search query'}}, {'id': 'f54c9cda-0053-5ea2-a22b-aaba6437518c', 'score': 0.270276627830369, 'metadata': {'text': 'Categories\nOn Interpretation\nPrior Analytics\nPosterior Analytics\nTopics\nOn Sophistical Refutations', 'title': 'aristotle.txt', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'version': 'v0', 'chunk_order': 26, 'document_id': '93123a68-d668-51de-8291-92162730dc87', 'extraction_id': 'bc497a0c-4b17-5e86-97d4-aa06474e0e5b', 'associatedQuery': 'Sample search query'}}, {'id': 'd0675bcd-23d1-5982-8114-1a6459faec3f', 'score': 0.242980153623792, 'metadata': {'text': 'Because Aristotle believes people receive all kinds of sense perceptions and perceive them as impressions, people are continually weaving together new impressions of experiences. To search for these impressions, people search the memory itself.[105] Within the memory, if one experience is offered instead of a specific memory, that person will reject this experience until they find what they are looking for. Recollection occurs when one retrieved experience naturally follows another. If the chain of', 'title': 'aristotle.txt', 'user_id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', 'version': 'v0', 'chunk_order': 119, 'document_id': '4bb1e5e0-3bb3-54e0-bc71-69e68bce30c7', 'extraction_id': '9401dfe6-10dd-5eb1-8b88-de1927a6c556', 'associatedQuery': 'Sample search query'}}, {'id': '69aed771-061f-5360-90f1-0ce395601b98', 'score': 0.242980153623792, 'metadata': {'text': 'Because Aristotle believes people receive all kinds of sense perceptions and perceive them as impressions, people are continually weaving together new impressions of experiences. To search for these impressions, people search the memory itself.[105] Within the memory, if one experience is offered instead of a specific memory, that person will reject this experience until they find what they are looking for. Recollection occurs when one retrieved experience naturally follows another. If the chain of', 'title': 'aristotle.txt', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'version': 'v0', 'chunk_order': 119, 'document_id': '93123a68-d668-51de-8291-92162730dc87', 'extraction_id': 'bc497a0c-4b17-5e86-97d4-aa06474e0e5b', 'associatedQuery': 'Sample search query'}}, {'id': 'dadd2d48-a2b7-5e55-9a8c-1030712c5ca0', 'score': 0.20218510005651702, 'metadata': {'text': 'Medieval Judaism', 'title': 'aristotle.txt', 'user_id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', 'version': 'v0', 'chunk_order': 202, 'document_id': '4bb1e5e0-3bb3-54e0-bc71-69e68bce30c7', 'extraction_id': '9401dfe6-10dd-5eb1-8b88-de1927a6c556', 'associatedQuery': 'Sample search query'}}, {'id': 'da81f692-40d9-599b-a69b-25b6a5179b47', 'score': 0.20218510005651702, 'metadata': {'text': 'Medieval Judaism', 'title': 'aristotle.txt', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'version': 'v0', 'chunk_order': 202, 'document_id': '93123a68-d668-51de-8291-92162730dc87', 'extraction_id': 'bc497a0c-4b17-5e86-97d4-aa06474e0e5b', 'associatedQuery': 'Sample search query'}}, {'id': '0c4fea20-f7ee-520f-ae1f-155ecb398e1f', 'score': 0.19056136124594703, 'metadata': {'text': 'Scientific style', 'title': 'aristotle.txt', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'version': 'v0', 'chunk_order': 92, 'document_id': '93123a68-d668-51de-8291-92162730dc87', 'extraction_id': 'bc497a0c-4b17-5e86-97d4-aa06474e0e5b', 'associatedQuery': 'Sample search query'}}, {'id': 'c3c3145a-5d9d-5362-9629-f9159a027a9d', 'score': 0.19051768949311598, 'metadata': {'text': 'Scientific style', 'title': 'aristotle.txt', 'user_id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', 'version': 'v0', 'chunk_order': 92, 'document_id': '4bb1e5e0-3bb3-54e0-bc71-69e68bce30c7', 'extraction_id': '9401dfe6-10dd-5eb1-8b88-de1927a6c556', 'associatedQuery': 'Sample search query'}}, {'id': '63e3a252-90bd-5494-9f9f-aee772f4db54', 'score': 0.18900877964391904, 'metadata': {'text': 'Apellicon sought to recover the texts, many of which were seriously degraded at this point due to the conditions in which they were stored. He had them copied out into new manuscripts, and used his best guesswork to fill in the gaps where the originals were unreadable.[216]:\u200a5–6', 'title': 'aristotle.txt', 'user_id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', 'version': 'v0', 'chunk_order': 228, 'document_id': '4bb1e5e0-3bb3-54e0-bc71-69e68bce30c7', 'extraction_id': '9401dfe6-10dd-5eb1-8b88-de1927a6c556', 'associatedQuery': 'Sample search query'}}, {'id': '2c1183a8-e130-5432-a311-ee1f0f194562', 'score': 0.18894388145542895, 'metadata': {'text': 'Apellicon sought to recover the texts, many of which were seriously degraded at this point due to the conditions in which they were stored. He had them copied out into new manuscripts, and used his best guesswork to fill in the gaps where the originals were unreadable.[216]:\u200a5–6', 'title': 'aristotle.txt', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'version': 'v0', 'chunk_order': 228, 'document_id': '93123a68-d668-51de-8291-92162730dc87', 'extraction_id': 'bc497a0c-4b17-5e86-97d4-aa06474e0e5b', 'associatedQuery': 'Sample search query'}}], 'kg_search_results': None}}} +# {'results': {'completion': {'id': 'chatcmpl-9llkGYsrG1YZaWkqYvzXr1eQNl0gA', 'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': 'The search results for the query "Sample search query" include various topics and excerpts related to Aristotle\'s works and other subjects. Here are the relevant references:\n\n1. **Categories of Aristotle\'s Works**:\n - On Interpretation [1], [2]\n - Prior Analytics [1], [2]\n - Posterior Analytics [1], [2]\n - Topics [1], [2]\n - On Sophistical Refutations [1], [2]\n\n2. **Aristotle\'s Theory on Sense Perceptions and Memory**:\n - Aristotle\'s belief that people receive sense perceptions and perceive them as impressions, leading to the weaving together of new experiences. The search for these impressions involves searching the memory itself, where recollection occurs when one retrieved experience naturally follows another [3], [4].\n\n3. **Medieval Judaism**:\n - References to Medieval Judaism [5], [6].\n\n4. **Scientific Style**:\n - References to Scientific Style [7], [8].\n\n5. **Recovery of Texts by Apellicon**:\n - Apellicon\'s efforts to recover degraded texts by copying them into new manuscripts and using guesswork to fill in unreadable gaps [9], [10].\n\nThese references provide a broad overview of the topics related to the query, including Aristotle\'s works, his theories on memory, Medieval Judaism, scientific style, and the recovery of ancient texts.', 'role': 'assistant'}}], 'created': 1721171976, 'model': 'gpt-4o-2024-05-13', 'object': 'chat.completion', 'system_fingerprint': 'fp_5e997b69d8', 'usage': {'completion_tokens': 286, 'prompt_tokens': 513, 'total_tokens': 799}}, 'search_results': {'vector_search_results': [{'id': 'd70e2776-befa-5b67-9da7-b76aedb7c101', 'score': 0.270276627830369, 'metadata': {'text': 'Categories\nOn Interpretation\nPrior Analytics\nPosterior Analytics\nTopics\nOn Sophistical Refutations', 'title': 'aristotle.txt', 'user_id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', 'version': 'v0', 'chunk_order': 26, 'document_id': '4bb1e5e0-3bb3-54e0-bc71-69e68bce30c7', 'extraction_id': '9401dfe6-10dd-5eb1-8b88-de1927a6c556', 'associatedQuery': 'Sample search query'}}, {'id': 'f54c9cda-0053-5ea2-a22b-aaba6437518c', 'score': 0.270276627830369, 'metadata': {'text': 'Categories\nOn Interpretation\nPrior Analytics\nPosterior Analytics\nTopics\nOn Sophistical Refutations', 'title': 'aristotle.txt', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'version': 'v0', 'chunk_order': 26, 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', 'extraction_id': 'bc497a0c-4b17-5e86-97d4-aa06474e0e5b', 'associatedQuery': 'Sample search query'}}, {'id': 'd0675bcd-23d1-5982-8114-1a6459faec3f', 'score': 0.242980153623792, 'metadata': {'text': 'Because Aristotle believes people receive all kinds of sense perceptions and perceive them as impressions, people are continually weaving together new impressions of experiences. To search for these impressions, people search the memory itself.[105] Within the memory, if one experience is offered instead of a specific memory, that person will reject this experience until they find what they are looking for. Recollection occurs when one retrieved experience naturally follows another. If the chain of', 'title': 'aristotle.txt', 'user_id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', 'version': 'v0', 'chunk_order': 119, 'document_id': '4bb1e5e0-3bb3-54e0-bc71-69e68bce30c7', 'extraction_id': '9401dfe6-10dd-5eb1-8b88-de1927a6c556', 'associatedQuery': 'Sample search query'}}, {'id': '69aed771-061f-5360-90f1-0ce395601b98', 'score': 0.242980153623792, 'metadata': {'text': 'Because Aristotle believes people receive all kinds of sense perceptions and perceive them as impressions, people are continually weaving together new impressions of experiences. To search for these impressions, people search the memory itself.[105] Within the memory, if one experience is offered instead of a specific memory, that person will reject this experience until they find what they are looking for. Recollection occurs when one retrieved experience naturally follows another. If the chain of', 'title': 'aristotle.txt', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'version': 'v0', 'chunk_order': 119, 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', 'extraction_id': 'bc497a0c-4b17-5e86-97d4-aa06474e0e5b', 'associatedQuery': 'Sample search query'}}, {'id': 'dadd2d48-a2b7-5e55-9a8c-1030712c5ca0', 'score': 0.20218510005651702, 'metadata': {'text': 'Medieval Judaism', 'title': 'aristotle.txt', 'user_id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', 'version': 'v0', 'chunk_order': 202, 'document_id': '4bb1e5e0-3bb3-54e0-bc71-69e68bce30c7', 'extraction_id': '9401dfe6-10dd-5eb1-8b88-de1927a6c556', 'associatedQuery': 'Sample search query'}}, {'id': 'da81f692-40d9-599b-a69b-25b6a5179b47', 'score': 0.20218510005651702, 'metadata': {'text': 'Medieval Judaism', 'title': 'aristotle.txt', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'version': 'v0', 'chunk_order': 202, 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', 'extraction_id': 'bc497a0c-4b17-5e86-97d4-aa06474e0e5b', 'associatedQuery': 'Sample search query'}}, {'id': '0c4fea20-f7ee-520f-ae1f-155ecb398e1f', 'score': 0.19056136124594703, 'metadata': {'text': 'Scientific style', 'title': 'aristotle.txt', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'version': 'v0', 'chunk_order': 92, 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', 'extraction_id': 'bc497a0c-4b17-5e86-97d4-aa06474e0e5b', 'associatedQuery': 'Sample search query'}}, {'id': 'c3c3145a-5d9d-5362-9629-f9159a027a9d', 'score': 0.19051768949311598, 'metadata': {'text': 'Scientific style', 'title': 'aristotle.txt', 'user_id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', 'version': 'v0', 'chunk_order': 92, 'document_id': '4bb1e5e0-3bb3-54e0-bc71-69e68bce30c7', 'extraction_id': '9401dfe6-10dd-5eb1-8b88-de1927a6c556', 'associatedQuery': 'Sample search query'}}, {'id': '63e3a252-90bd-5494-9f9f-aee772f4db54', 'score': 0.18900877964391904, 'metadata': {'text': 'Apellicon sought to recover the texts, many of which were seriously degraded at this point due to the conditions in which they were stored. He had them copied out into new manuscripts, and used his best guesswork to fill in the gaps where the originals were unreadable.[216]:\u200a5–6', 'title': 'aristotle.txt', 'user_id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', 'version': 'v0', 'chunk_order': 228, 'document_id': '4bb1e5e0-3bb3-54e0-bc71-69e68bce30c7', 'extraction_id': '9401dfe6-10dd-5eb1-8b88-de1927a6c556', 'associatedQuery': 'Sample search query'}}, {'id': '2c1183a8-e130-5432-a311-ee1f0f194562', 'score': 0.18894388145542895, 'metadata': {'text': 'Apellicon sought to recover the texts, many of which were seriously degraded at this point due to the conditions in which they were stored. He had them copied out into new manuscripts, and used his best guesswork to fill in the gaps where the originals were unreadable.[216]:\u200a5–6', 'title': 'aristotle.txt', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'version': 'v0', 'chunk_order': 228, 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', 'extraction_id': 'bc497a0c-4b17-5e86-97d4-aa06474e0e5b', 'associatedQuery': 'Sample search query'}}], 'kg_search_results': None}}} ``` ## Advanced Authentication Features @@ -136,7 +136,8 @@ Users can delete their accounts: ```python # Delete account (requires password confirmation) -delete_result = client.delete_user("password123") +user_id = register_response["results"]["id"] # input unique id here +delete_result = client.delete_user(user_id, "password123") # {'results': {'message': 'User account deleted successfully'}} ``` @@ -204,7 +205,7 @@ login_result = client.login("admin@example.com", "change_me_immediately") # Now you can access superuser features, for example: users_overview = client.users_overview() -# {'results': [{'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'num_files': 2, 'total_size_in_bytes': 73672, 'document_ids': ['c4967f03-1780-5161-8e1d-57b55aa65076', '93123a68-d668-51de-8291-92162730dc87']}, {'user_id': 'ac730ec3-7d3d-451a-a166-e7ac7c57b198', 'num_files': 1, 'total_size_in_bytes': 73353, 'document_ids': ['d4861e78-cf02-5184-9b6a-d5bdbddd39b2']}, {'user_id': 'e0514342-e51a-43e5-8aaa-665468102dce', 'num_files': 1, 'total_size_in_bytes': 73353, 'document_ids': ['f4fbe534-b7d6-5fec-9d41-9093b2112732']}]} +# {'results': [{'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'num_files': 2, 'total_size_in_bytes': 73672, 'document_ids': ['c4967f03-1780-5161-8e1d-57b55aa65076', '9fbe403b-c11c-5aae-8ade-ef22980c3ad1']}, {'user_id': 'ac730ec3-7d3d-451a-a166-e7ac7c57b198', 'num_files': 1, 'total_size_in_bytes': 73353, 'document_ids': ['d4861e78-cf02-5184-9b6a-d5bdbddd39b2']}, {'user_id': 'e0514342-e51a-43e5-8aaa-665468102dce', 'num_files': 1, 'total_size_in_bytes': 73353, 'document_ids': ['f4fbe534-b7d6-5fec-9d41-9093b2112732']}]} # Access system-wide logs logs = client.logs() diff --git a/docs/cookbooks/walkthrough.mdx b/docs/cookbooks/walkthrough.mdx index b0dcabfda..d828fc682 100644 --- a/docs/cookbooks/walkthrough.mdx +++ b/docs/cookbooks/walkthrough.mdx @@ -58,7 +58,7 @@ client.ingest_files(file_paths=["test.txt"]) # Call RAG directly rag_response = client.rag( query="Who is john", - rag_generation_config={"model": "gpt-4o-mini", "temperature": 0.0}, + rag_generation_config={"model": "openai/gpt-4o-mini", "temperature": 0.0}, ) results = rag_response["results"] print(f"Search Results:\n{results['search_results']}") @@ -82,13 +82,11 @@ concurrent_request_limit = 16 temperature = 0.5 [chunking] -provider = "unstructured_local" -strategy = "auto" -chunking_strategy = "by_title" -new_after_n_chars = 512 -max_characters = 1_024 -combine_under_n_chars = 128 -overlap = 20 +provider = "r2r" +chunking_strategy = "recursive" +chunk_size = 512 +chunk_overlap = 256 +excluded_parsers = ["mp4"] ``` Then, use the `config-path` argument to specify your custom configuration when launching R2R: diff --git a/docs/documentation/configuration/ingestion/overview.mdx b/docs/documentation/configuration/ingestion/overview.mdx index 4b29d01db..b8d00ba28 100644 --- a/docs/documentation/configuration/ingestion/overview.mdx +++ b/docs/documentation/configuration/ingestion/overview.mdx @@ -4,9 +4,16 @@ description: 'Configure your R2R ingestion pipeline' --- ## Introduction + R2R's ingestion pipeline efficiently processes various document formats, transforming them into searchable content. It seamlessly integrates with vector databases and knowledge graphs for optimal retrieval and analysis. -By default, R2R leverages Unstructured's open-source [ingestion platform](https://docs.unstructured.io/open-source/introduction/overview) to handle supported file types. For formats not covered by Unstructured, such as `.mp3`, R2R implements custom ingestion logic to ensure comprehensive support. Supported file types are listed [here](/documentation/configuration/parsing_and_chunking). +R2R offers two main implementations for ingestion: + +- **Light**: Uses R2R's **built-in** ingestion logic, which supports a wide range of file types including TXT, JSON, HTML, PDF, DOCX, PPTX, XLSX, CSV, Markdown, images, audio, and video. This is the default for the 'light' installation. +- **Full**: Leverages **Unstructured's** open-source [ingestion platform](https://docs.unstructured.io/open-source/introduction/overview) to handle supported file types. This is the default for the 'full' installation and provides more advanced parsing capabilities. + + +Supported file types for each implementation are listed [here](/documentation/configuration/ingestion/parsing_and_chunking). ## Key Configuration Areas @@ -16,17 +23,11 @@ Many of the settings managed by the [`r2r.toml`](https://github.com/SciPhi-AI/R2 [database] provider = "postgres" -[chunking] -provider = "unstructured_local" -strategy = "auto" -chunking_strategy = "basic" -new_after_n_chars = 512 -max_characters = 1_024 -combine_under_n_chars = 128 -overlap = 20 - -[parsing] -provider = "unstructured_local" +[ingestion] +provider = "r2r" +chunking_strategy = "recursive" +chunk_size = 1_024 +chunk_overlap = 512 excluded_parsers = ["mp4"] [embedding] @@ -42,9 +43,7 @@ These settings directly impact how R2R performs ingestion: - The `[database]` section configures the Postgres database used for semantic search and document management. During retrieval, this database is queried to find the most relevant document chunks based on vector similarity. -- The `[chunking]` section defines how documents are split into smaller, manageable pieces. This affects the granularity of information storage and retrieval. - -- The `[parsing]` section determines how different file types are processed and converted into text. This impacts the initial extraction of content from various document formats. +- The `[ingestion]` section determines how different file types are processed and converted into text. This includes protocol for how text is split into smaller, manageable pieces. This affects the granularity of information storage and retrieval. - The `[embedding]` section defines the model and parameters for converting text into vector embeddings. In the retrieval process, these settings are used to embed the user's query, allowing it to be compared against the stored document embeddings. diff --git a/docs/documentation/configuration/ingestion/parsing_and_chunking.mdx b/docs/documentation/configuration/ingestion/parsing_and_chunking.mdx index 7c754a374..cb0e8edcf 100644 --- a/docs/documentation/configuration/ingestion/parsing_and_chunking.mdx +++ b/docs/documentation/configuration/ingestion/parsing_and_chunking.mdx @@ -13,8 +13,8 @@ provider = "unstructured_local" # | rag | unstructured_api excluded_parsers = ["mp4"] ``` Available providers: -- `r2r`: Liteweight parser provided by R2R -- `unstructured_local`: Default offering, makes use of open source Unstructured package. +- `r2r`: Default offering for `light` installations, a simple and lightweight parser included in R2R. +- `unstructured_local`: Default offering for `full` installations, makes use of open source Unstructured package. - `unstructured_api`: Cloud offering of Unstructured ### Supported File Types @@ -77,7 +77,15 @@ overlap = 20 Key chunking configuration options: -- `provider`: The chunking provider (defaults to "unstructured_local"). +- `provider`: The chunking provider (defaults to "r2r"). + +**For R2R:** +- `chunking_strategy`: The chunking method ("recursive"). +- `chunk_size`: The target size for each chunk. +- `chunk_overlap`: The number of characters to overlap between chunks. +- `excluded_parsers`: List of parsers to exclude (e.g., ["mp4"]). + +**For Unstructured:** - `strategy`: The overall chunking strategy ("auto", "fast", or "hi_res"). - `chunking_strategy`: The specific chunking method ("by_title" or "basic"). - `new_after_n_chars`: Soft maximum size for a chunk. @@ -90,18 +98,14 @@ Key chunking configuration options: ```python - # No additional setup required - # Ensure unstructured is installed: pip install unstructured + # Ensure unstructured is installed + # Refer to the full installation docs here - [https://r2r-docs.sciphi.ai/introduction/documentation/installation/full/docker] - - # Set 'provider = "unstructured_local"' for `[parsing]` and `[chunking]` in `my_r2r.toml` + # Set 'provider = "unstructured_local"' for `ingestion` in `my_r2r.toml`. r2r serve --config-path=my_r2r.toml ``` - Supported chunking strategies: - - by_title - - basic - This is the default provider, using the open-source Unstructured library for local processing. + This is the default `full` provider, using the open-source Unstructured library for local processing. @@ -111,12 +115,9 @@ Key chunking configuration options: # .. set other environment variables # Optional - Update default provider - # Set 'provider = "unstructured_api"' for `[parsing]` and `[chunking]` in `my_r2r.toml` + # Set 'provider = "unstructured_api"' for `ingestion` in `my_r2r.toml`. r2r serve --config-path=my_r2r.toml ``` - Supported chunking strategies: - - by_title - - basic Uses the Unstructured platform API for chunking, which may offer additional features or performance benefits. @@ -126,13 +127,11 @@ Key chunking configuration options: # No additional setup required # Optional - Update default provider - # Set 'provider = "r2r"' for `[parsing]` and `[chunking]` in `my_r2r.toml` + # Set 'provider = "r2r"' for `ingestion` in `my_r2r.toml`. r2r serve --config-path=my_r2r.toml ``` - Supported chunking strategies: - - recursive + This is the default `light` provider, using the open-source R2R library for local processing. - A lightweight chunking method provided by R2R, suitable for basic chunking needs. @@ -141,8 +140,8 @@ Key chunking configuration options: When using the Unstructured chunking provider, you can specify additional parameters in the configuration file: ```toml -[chunking] -provider = "unstructured_local" # or "unstructured_api" +[ingestion] +provider = "unstructured_local" strategy = "auto" # "auto", "fast", or "hi_res" chunking_strategy = "by_title" # "by_title" or "basic" @@ -190,7 +189,7 @@ Chunking is a crucial part of the document processing pipeline in R2R. It works ```python response = client.ingest_files( file_paths=["document.pdf"], - chunking_config={ + ingestion_config={ "provider": "unstructured_local", "chunking_strategy": "by_title", "max_characters": 1000 diff --git a/docs/documentation/configuration/introduction.mdx b/docs/documentation/configuration/introduction.mdx index ebee91e9b..0fcb4633a 100644 --- a/docs/documentation/configuration/introduction.mdx +++ b/docs/documentation/configuration/introduction.mdx @@ -25,7 +25,7 @@ The default settings for R2R are specified in the [`r2r.toml`](https://github.co ```toml r2r.toml [embedding] provider = "litellm" -base_model = "text-embedding-3-large" +base_model = "text-embedding-3-small" base_dimension = 1536 [completion] diff --git a/docs/documentation/configuration/knowledge-graph/enrichment.mdx b/docs/documentation/configuration/knowledge-graph/enrichment.mdx index 1f15f5d9a..bb6abeb5a 100644 --- a/docs/documentation/configuration/knowledge-graph/enrichment.mdx +++ b/docs/documentation/configuration/knowledge-graph/enrichment.mdx @@ -9,25 +9,25 @@ You can configure knowledge graph enrichment in the R2R configuration file. To d ```toml [kg] -provider = "neo4j" +provider = "postgres" batch_size = 256 [kg.kg_creation_settings] - kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" + kg_triples_extraction_prompt = "graphrag_triples_extraction_few_shot" entity_types = [] # if empty, all entities are extracted relation_types = [] # if empty, all relations are extracted fragment_merge_count = 4 # number of fragments to merge into a single extraction max_knowledge_triples = 100 # max number of triples to extract for each document chunk - generation_config = { model = "gpt-4o-mini" } # and other generation params + generation_config = { model = "openai/gpt-4o-mini" } # and other generation params [kg.kg_enrichment_settings] max_description_input_length = 65536 # increase if you want more comprehensive descriptions max_summary_input_length = 65536 - generation_config = { model = "gpt-4o-mini" } # and other generation params - leiden_params = { max_levels = 10 } # more params in graspologic/partition/leiden.py + generation_config = { model = "openai/gpt-4o-mini" } # and other generation params + leiden_params = {} # more params in graspologic/partition/leiden.py [kg.kg_search_settings] - generation_config = { model = "gpt-4o-mini" } + generation_config = { model = "openai/gpt-4o-mini" } ``` Next you can do GraphRAG with the knowledge graph. Find out more about GraphRAG in the [GraphRAG Guide](/cookbooks/graphrag). diff --git a/docs/documentation/configuration/knowledge-graph/overview.mdx b/docs/documentation/configuration/knowledge-graph/overview.mdx index 14f278abb..1b0f05eea 100644 --- a/docs/documentation/configuration/knowledge-graph/overview.mdx +++ b/docs/documentation/configuration/knowledge-graph/overview.mdx @@ -4,42 +4,40 @@ description: 'Configure your R2R knowledge graph provider.' --- ## Knowledge Graph Provider -R2R supports knowledge graph functionality to enhance document understanding and retrieval. By default, R2R uses [Neo4j](https://neo4j.com/) as the knowledge graph provider. We are actively working to integrate with [Memgraph](https://memgraph.com/docs). You can find out more about creating knowledge graphs in the [GraphRAG Cookbook](/cookbooks/graphrag). +R2R supports knowledge graph functionality to enhance document understanding and retrieval. By default, R2R creates the graph by clustering with `graspologic` and saving the output triples and relationships into Postgres. We are actively working to integrate with [Memgraph](https://memgraph.com/docs). You can find out more about creating knowledge graphs in the [GraphRAG Cookbook](/cookbooks/graphrag). -To configure the knowledge graph settings for your project: - -1. Edit the `kg` section in your `r2r.toml` file: +To configure the knowledge graph settings for your project, edit the `kg` section in your `r2r.toml` file: ```toml r2r.toml [kg] -provider = "neo4j" +provider = "postgres" batch_size = 256 -kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" +kg_triples_extraction_prompt = "graphrag_triples_extraction_few_shot" [kg.kg_creation_settings] entity_types = [] # if empty, all entities are extracted relation_types = [] # if empty, all relations are extracted - generation_config = { model = "gpt-4o-mini" } + generation_config = { model = "openai/gpt-4o-mini" } max_knowledge_triples = 100 # max number of triples to extract for each document chunk fragment_merge_count = 4 # number of fragments to merge into a single extraction [kg.kg_enrichment_settings] max_description_input_length = 65536 # increase if you want more comprehensive descriptions max_summary_input_length = 65536 - generation_config = { model = "gpt-4o-mini" } # and other generation params below - leiden_params = { max_levels = 10 } # more params in https://neo4j.com/docs/graph-data-science/current/algorithms/leiden/ + generation_config = { model = "openai/gpt-4o-mini" } # and other generation params below + leiden_params = {} [kg.kg_search_settings] - generation_config = { model = "gpt-4o-mini" } + generation_config = { model = "openai/gpt-4o-mini" } ``` Let's break down the knowledge graph configuration options: -- `provider`: Specifies the knowledge graph provider. Currently, "neo4j" is supported. +- `provider`: Specifies the knowledge graph provider. Currently, "postgres" is supported. - `batch_size`: Determines the number of entities or relationships to process in a single batch during import operations. -- `kg_extraction_prompt`: Specifies the prompt template to use for extracting knowledge graph information from text. +- `kg_triples_extraction_prompt`: Specifies the prompt template to use for extracting knowledge graph information from text. - `kg_creation_settings`: Configuration for the model used in knowledge graph creation. - `max_knowledge_triples`: The maximum number of knowledge triples to extract for each document chunk. - `fragment_merge_count`: The number of fragments to merge into a single extraction. @@ -49,27 +47,7 @@ Let's break down the knowledge graph configuration options: - `leiden_params`: Parameters for the Leiden algorithm. - `kg_search_settings`: Similar configuration for the model used in knowledge graph search operations. -### Neo4j Configuration - -When using Neo4j as the knowledge graph provider, you need to set up the following environment variables or provide them in the `r2r.toml` file. To set them as environment variables: -```bash -export NEO4J_USER=your_neo4j_username -export NEO4J_PASSWORD=your_neo4j_password -export NEO4J_URL=bolt://your_neo4j_host:7687 -export NEO4J_DATABASE=neo4j -``` - -And to set them directly in your config: - -```toml r2r.toml -[kg] -provider = "neo4j" -user = "your_neo4j_username" -password = "your_neo4j_password" -url = "bolt://your_neo4j_host:7687" -database = "neo4j" -``` Setting configuration values in the `r2r.toml` will override environment variables by default. @@ -78,8 +56,6 @@ Setting configuration values in the `r2r.toml` will override environment variabl ### Knowledge Graph Operations -The Neo4jKGProvider supports various operations: - 1. **Entity Management**: Add, update, and retrieve entities in the knowledge graph. 2. **Relationship Management**: Create and query relationships between entities. 3. **Batch Import**: Efficiently import large amounts of data using batched operations. @@ -88,7 +64,7 @@ The Neo4jKGProvider supports various operations: ### Customization -You can customize the knowledge graph extraction and search processes by modifying the `kg_extraction_prompt` and adjusting the model configurations in `kg_extraction_settings` and `kg_search_settings`. Moreover, you can customize the LLM models used in various parts of the knowledge graph creation process. All of these options can be selected at runtime, with the only exception being the specified database provider. For more details, refer to the knowledge graph settings in the [search API](/api-reference/endpoint/search). +You can customize the knowledge graph extraction and search processes by modifying the `kg_triples_extraction_prompt` and adjusting the model configurations in `kg_extraction_settings` and `kg_search_settings`. Moreover, you can customize the LLM models used in various parts of the knowledge graph creation process. All of these options can be selected at runtime, with the only exception being the specified database provider. For more details, refer to the knowledge graph settings in the [search API](/api-reference/endpoint/search). By leveraging the knowledge graph capabilities, you can enhance R2R's understanding of document relationships and improve the quality of search and retrieval operations. diff --git a/docs/documentation/configuration/llm.mdx b/docs/documentation/configuration/llm.mdx index 5acee19d6..35e2e2e1c 100644 --- a/docs/documentation/configuration/llm.mdx +++ b/docs/documentation/configuration/llm.mdx @@ -1,5 +1,5 @@ --- -title: 'LLM' +title: 'LLMs' description: 'Learn how to configure LLMs in your R2R deployment' icon: 'language' --- @@ -44,9 +44,6 @@ Key generation configuration options: # Set '"model": "openai/gpt-4o-mini"' in `r2r.toml` # then call `r2r serve --config-path=r2r.toml` r2r serve - # Set '"model": "openai/gpt-4o-mini"' in `r2r.toml` - # then call `r2r serve --config-path=r2r.toml` - r2r serve ``` Supported models include: - openai/gpt-4o diff --git a/docs/documentation/configuration/postgres.mdx b/docs/documentation/configuration/postgres.mdx index 9a5c21d2d..86e2f6c43 100644 --- a/docs/documentation/configuration/postgres.mdx +++ b/docs/documentation/configuration/postgres.mdx @@ -29,7 +29,7 @@ To customize the database settings, you can modify the `database` section in you [database] provider = "postgres" # currently only `postgres` is supported -# optional parameters which can be set by environment instead +# optional parameters which are typically set in the environment instead: user = "your_postgres_user" password = "your_postgres_password" host = "your_postgres_host" @@ -86,16 +86,10 @@ The `collection.py` file includes methods for building complex SQL queries that ### GIN Indexing -R2R uses GIN (Generalized Inverted Index) indexing to optimize both full-text and JSON searches: +R2R uses GIN (Generalized Inverted Index) indexing to optimize full-text searches: ```python -Index( - f"idx_{name}_fts_trgm", - table.c.fts, - table.c.text, - postgresql_using="gin", - postgresql_ops={"text": "gin_trgm_ops"}, -) +Index(f"idx_{name}_fts", "fts", postgresql_using="gin"), ``` This indexing strategy allows for efficient full-text search and trigram similarity matching. diff --git a/docs/documentation/configuration/prompts.mdx b/docs/documentation/configuration/prompts.mdx index f590272b6..3165286c7 100644 --- a/docs/documentation/configuration/prompts.mdx +++ b/docs/documentation/configuration/prompts.mdx @@ -59,8 +59,8 @@ Certainly! I'll create an expanded table that explains all the prompts you've li | [`graphrag_community_reports.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/prompts/defaults/graphrag_community_reports.yaml) | Used in GraphRAG to generate reports about communities or clusters in the knowledge graph. | | [`graphrag_map_system_prompt.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/prompts/defaults/graphrag_map_system_prompt.yaml) | System prompt for the "map" phase in GraphRAG, used to process individual nodes or edges. | | [`graphrag_reduce_system_prompt.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/prompts/defaults/graphrag_reduce_system_prompt.yaml) | System prompt for the "reduce" phase in GraphRAG, used to combine or summarize information from multiple sources. | -| [`graphrag_triplet_extraction_few_shot.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/prompts/defaults/graphrag_triplet_extraction_few_shot.yaml) | Few-shot prompt for extracting subject-predicate-object triplets in GraphRAG, with examples. | -| [`graphrag_triplet_extraction_zero_shot.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/prompts/defaults/graphrag_triplet_extraction_zero_shot.yaml) | Zero-shot prompt for extracting subject-predicate-object triplets in GraphRAG, without examples. | +| [`graphrag_triples_extraction_few_shot.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/prompts/defaults/graphrag_triples_extraction_few_shot.yaml) | Few-shot prompt for extracting subject-predicate-object triplets in GraphRAG, with examples. | +| [`graphrag_triples_extraction_few_shot.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/prompts/defaults/graphrag_triples_extraction_few_shot.yaml) | Zero-shot prompt for extracting subject-predicate-object triplets in GraphRAG, without examples. | | [`hyde.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/prompts/defaults/hyde.yaml) | Related to Hypothetical Document Embeddings (HyDE) for improving retrieval performance. | | [`kg_search.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/prompts/defaults/kg_search.yaml) | Used for searching the knowledge graph, possibly to formulate queries or interpret results. | | [`kg_search_with_spec.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/prompts/defaults/kg_search_with_spec.yaml) | Similar to `kg_search.yaml`, but with a specific schema or specification for the search process. | diff --git a/docs/documentation/configuration/rag.mdx b/docs/documentation/configuration/rag.mdx index 4f08989b9..4593544e8 100644 --- a/docs/documentation/configuration/rag.mdx +++ b/docs/documentation/configuration/rag.mdx @@ -33,7 +33,7 @@ The `rag_generation_config` parameter allows you to customize the language model # Configure knowledge graph search vector_search_settings = { "use_vector_search": True, - "filters": {"metadata.document_type": "article"}, + "filters": {"document_type": "article"}, "search_limit": 20, "use_hybrid_search": True, "selected_collection_ids": ["c3291abf-8a4e-5d9d-80fd-232ef6fd8526"] diff --git a/docs/documentation/configuration/retrieval/overview.mdx b/docs/documentation/configuration/retrieval/overview.mdx index f9a60a362..d08706037 100644 --- a/docs/documentation/configuration/retrieval/overview.mdx +++ b/docs/documentation/configuration/retrieval/overview.mdx @@ -25,7 +25,8 @@ rerank_model = "None" concurrent_request_limit = 256 [kg] -provider = "None" +provider = "postgres" +batch_size = 256 [completion] provider = "litellm" diff --git a/docs/documentation/configuration/retrieval/vector-search.mdx b/docs/documentation/configuration/retrieval/vector-search.mdx index b923007b5..de8a417d4 100644 --- a/docs/documentation/configuration/retrieval/vector-search.mdx +++ b/docs/documentation/configuration/retrieval/vector-search.mdx @@ -10,7 +10,7 @@ Example using the Python SDK: ```python vector_search_settings = { "use_vector_search": True, - "filters": {"metadata.document_type": "article"}, + "filters": {"document_type": "article"}, "search_limit": 20, "use_hybrid_search": True, "selected_collection_ids": ["c3291abf-8a4e-5d9d-80fd-232ef6fd8526"] @@ -55,9 +55,9 @@ Example of advanced filtering: ```python filters = { "$and": [ - {"metadata.publication_date": {"$gte": "2023-01-01"}}, - {"metadata.author": {"$in": ["John Doe", "Jane Smith"]}}, - {"metadata.category": {"$ilike": "%technology%"}} + {"publication_date": {"$gte": "2023-01-01"}}, + {"author": {"$in": ["John Doe", "Jane Smith"]}}, + {"category": {"$ilike": "%technology%"}} ] } vector_search_settings["filters"] = filters diff --git a/docs/documentation/deep-dive/main/builder.mdx b/docs/documentation/deep-dive/main/builder.mdx index 3de1b7244..c0befbe46 100644 --- a/docs/documentation/deep-dive/main/builder.mdx +++ b/docs/documentation/deep-dive/main/builder.mdx @@ -33,7 +33,7 @@ r2r = R2RBuilder().build() r2r = R2RBuilder(config=R2RConfig.from_toml("path/to/config.toml")).build() # Create an R2R instance with a predefined configuration -r2r = R2RBuilder(config_name="neo4j_kg").build() +r2r = R2RBuilder(config_name="full").build() ``` ## Factories diff --git a/docs/documentation/deep-dive/main/config.mdx b/docs/documentation/deep-dive/main/config.mdx index 96629b908..ea985aeb3 100644 --- a/docs/documentation/deep-dive/main/config.mdx +++ b/docs/documentation/deep-dive/main/config.mdx @@ -33,8 +33,8 @@ from r2r import R2RConfig, R2RBuilder config = R2RConfig.from_toml("path/to/your/r2r.toml") r2r = R2RBuilder(config).build() -# Or use a preset configuration -r2r = R2RBuilder(config_name="default").build() +# Or use the preset configuration +r2r = R2RBuilder().build() ``` ## Configuration Sections @@ -146,36 +146,13 @@ provider = "None" - `provider`: Evaluation provider. Set to "None" to disable evaluation functionality. -### Ingestion - -```toml -[ingestion] -excluded_parsers = [ "mp4" ] - -[[ingestion.override_parsers]] -document_type = "pdf" -parser = "PDFParser" - -[ingestion.text_splitter] -type = "recursive_character" -chunk_size = 512 -chunk_overlap = 20 -``` - -- `excluded_parsers`: List of file types to exclude from parsing. -- `override_parsers`: Custom parser settings for specific document types. -- `text_splitter`: Configuration for splitting ingested text. - - `type`: The algorithm used for splitting. - - `chunk_size`: Maximum size of each chunk in characters. - - `chunk_overlap`: Number of overlapping characters between chunks. - ### Knowledge Graph Refer to the [`KGProvider`](documentation/deep-dive/providers/knowledge-graph) to learn more about how R2R supports knowledge graphs. ```toml [kg] -provider = "neo4j" +provider = "postgres" batch_size = 1 [kg.kg_extraction_config] @@ -187,7 +164,7 @@ stream = false add_generation_kwargs = { } ``` -- `provider`: Specifies the backend used for storing and querying the knowledge graph. Options include "neo4j" and "None". +- `provider`: Specifies the backend used for storing and querying the knowledge graph. Options include "postgres" and "None". - `batch_size`: Determines how many text chunks are processed at once for knowledge extraction. - `kg_extraction_config`: Configures the language model used for extracting knowledge from text chunks. diff --git a/docs/documentation/deep-dive/providers/embeddings.mdx b/docs/documentation/deep-dive/providers/embeddings.mdx index 6725d4dc3..aef2d91d2 100644 --- a/docs/documentation/deep-dive/providers/embeddings.mdx +++ b/docs/documentation/deep-dive/providers/embeddings.mdx @@ -105,7 +105,7 @@ R2R supports a wide range of embedding providers through LiteLLM. Here's how to ``` Supported models include: - text-embedding-3-small - - text-embedding-3-large + - text-embedding-3-small - text-embedding-ada-002 diff --git a/docs/documentation/deep-dive/providers/knowledge-graph.mdx b/docs/documentation/deep-dive/providers/knowledge-graph.mdx index e516fb0af..8788afb07 100644 --- a/docs/documentation/deep-dive/providers/knowledge-graph.mdx +++ b/docs/documentation/deep-dive/providers/knowledge-graph.mdx @@ -9,15 +9,6 @@ R2R's `KGProvider` handles the creation, management, and querying of knowledge g For a practical, step-by-step guide on implementing knowledge graphs in R2R, including code examples and common use cases, see our [GraphRAG Cookbook](/cookbooks/graphrag). -## Providers - -The default `Neo4jKGProvider` offers a complete knowledge graph solution. - -Key features: -- Neo4j-based graph database -- Entity and relationship extraction from text -- Graph querying and traversal -- Integration with vector search ## Configuration @@ -27,36 +18,24 @@ These are located in the `r2r.toml` file, under the `[kg]` section. ```toml [kg] -provider = "neo4j" +provider = "postgres" batch_size = 256 -user = "your_neo4j_user" -password = "your_neo4j_password" -url = "your_neo4j_url" -database = "your_neo4j_database" [kg.kg_creation_settings] - kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" + kg_triples_extraction_prompt = "graphrag_triples_extraction_few_shot" entity_types = ["Person", "Organization", "Location"] # if empty, all entities are extracted relation_types = ["works at", "founded by", "invested in"] # if empty, all relations are extracted max_knowledge_triples = 100 fragment_merge_count = 4 # number of fragments to merge into a single extraction - generation_config = { model = "gpt-4o-mini" } # and other params, model used for triplet extraction + generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for triplet extraction [kg.kg_enrichment_settings] max_description_input_length = 65536 # increase if you want more comprehensive descriptions max_summary_input_length = 65536 # increase if you want more comprehensive summaries - generation_config = { model = "gpt-4o-mini" } # and other params, model used for node description and graph clustering - leiden_params = { max_levels = 10 } # more params here: https://neo4j.com/docs/graph-data-science/current/algorithms/leiden/ + generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for node description and graph clustering + leiden_params = {} ``` -Alternatively, instead of setting `user`, `password`, `url`, and `database` in your `r2r.toml`, you can set these values using environment variables: - -```bash -export NEO4J_USER=your_username -export NEO4J_PASSWORD=your_password -export NEO4J_URL=your_neo4j_url -export NEO4J_DATABASE=neo4j -``` Environment variables take precedence over the config settings in case of conflicts. The R2R Docker includes configuration options that facilitate integration with a combined Postgres+pgvector database setup. @@ -163,12 +142,6 @@ Common issues and solutions: 2. **Query Performance**: Optimize graph structure and use appropriate indexes. 3. **Memory Issues**: Adjust batch sizes and implement pagination for large graphs. -## Performance Considerations - -1. **Query Optimization**: Use efficient Cypher queries for Neo4j. -2. **Embedding Caching**: Cache embeddings for frequently accessed entities. -3. **Asynchronous Processing**: Use async methods for non-blocking operations. - ## Conclusion R2R's Knowledge Graph system provides a powerful foundation for building applications that require structured data representation and complex querying capabilities. By understanding its components, following best practices, and leveraging its flexibility, you can create sophisticated information retrieval and analysis systems tailored to your specific needs. diff --git a/docs/documentation/deployment/aws.mdx b/docs/documentation/deployment/aws.mdx index 6dca6cc8f..e62fe0997 100644 --- a/docs/documentation/deployment/aws.mdx +++ b/docs/documentation/deployment/aws.mdx @@ -95,7 +95,7 @@ docker run hello-world export OPENAI_API_KEY=sk-... # Optional - pass in a custom configuration here -r2r serve --docker +r2r serve --docker --full ``` 2. Double check the health of the system: diff --git a/docs/documentation/deployment/azure.mdx b/docs/documentation/deployment/azure.mdx index 4f962eec9..591404002 100644 --- a/docs/documentation/deployment/azure.mdx +++ b/docs/documentation/deployment/azure.mdx @@ -115,7 +115,7 @@ docker run hello-world export OPENAI_API_KEY=sk-... # Optional - pass in a custom configuration here -r2r serve --docker +r2r serve --docker --full ``` diff --git a/docs/documentation/deployment/gcp.mdx b/docs/documentation/deployment/gcp.mdx index 9b0d935ca..e701c8b0b 100644 --- a/docs/documentation/deployment/gcp.mdx +++ b/docs/documentation/deployment/gcp.mdx @@ -91,7 +91,7 @@ docker run hello-world export OPENAI_API_KEY=sk-... # Optional - pass in a custom configuration here -r2r serve --docker +r2r serve --docker --full ``` 2. Double check the health of the system: diff --git a/docs/documentation/deployment/introduction.mdx b/docs/documentation/deployment/introduction.mdx index 05c2972a8..ea39a3c50 100644 --- a/docs/documentation/deployment/introduction.mdx +++ b/docs/documentation/deployment/introduction.mdx @@ -25,7 +25,7 @@ When running R2R, we recommend: For detailed, step-by-step instructions on deploying R2R in various environments, please refer to our specific deployment guides: -- [Local Deployment](/documentation/installation/docker) +- [Local Deployment](/documentation/installation/overview) - [Azure Deployment](/documentation/deployment/azure) - [SciPhi Cloud](/sciphi-cloud/introduction/) diff --git a/docs/documentation/deployment/troubleshooting/api_connections.mdx b/docs/documentation/deployment/troubleshooting/api_connections.mdx index 793e6f3f4..edaecd0dd 100644 --- a/docs/documentation/deployment/troubleshooting/api_connections.mdx +++ b/docs/documentation/deployment/troubleshooting/api_connections.mdx @@ -38,7 +38,7 @@ Look for error messages or exceptions that might indicate why the API is not res **Symptom:** API starts but immediately exits or fails to initialize. **Solutions:** -- Verify that all required services (Postgres, Neo4j, etc.) are up and healthy. +- Verify that all required services (Postgres, etc.) are up and healthy. - Check if the `depends_on` conditions in the Docker Compose file are correct. - Increase the retry count or add a delay in the API service startup script. diff --git a/docs/documentation/deployment/troubleshooting/api_responses.mdx b/docs/documentation/deployment/troubleshooting/api_responses.mdx index d59bd93ff..03cd0e1f2 100644 --- a/docs/documentation/deployment/troubleshooting/api_responses.mdx +++ b/docs/documentation/deployment/troubleshooting/api_responses.mdx @@ -125,7 +125,7 @@ curl -X GET "http://your-r2r-api-url/v2/health" -H "accept: application/json" **Issue:** API returns relationships between data that don't match expectations. **Solution:** -- Review your knowledge graph structure in Neo4j. +- Review your knowledge graph structure. - Check the logic in your data ingestion and relationship creation processes. - Verify that your query is correctly traversing the graph. diff --git a/docs/documentation/deployment/troubleshooting/connection_strings.mdx b/docs/documentation/deployment/troubleshooting/connection_strings.mdx index 5d91ab7e3..897842e9c 100644 --- a/docs/documentation/deployment/troubleshooting/connection_strings.mdx +++ b/docs/documentation/deployment/troubleshooting/connection_strings.mdx @@ -48,15 +48,6 @@ DATABASE_URL=postgres://user:password@correcthost:5432/dbname 1. Verify username and password are correct 2. Ensure the user has the required permissions on the database -Example fix for Neo4j: -``` -# Before -NEO4J_AUTH=neo4j/wrongpassword - -# After -NEO4J_AUTH=neo4j/correctpassword -``` - ### 2.3 Invalid Connection String Format **Symptom:** Errors mentioning "invalid connection string" or specific syntax errors @@ -150,7 +141,6 @@ For cloud deployments: 1. **Test the connection independently:** Use command-line tools to test the connection outside of R2R: - For PostgreSQL: `psql -h -U -d ` - - For Neo4j: `cypher-shell -a : -u -p ` 2. **Check service logs:** Examine logs of the service you're trying to connect to for any error messages or access attempts. diff --git a/docs/documentation/deployment/troubleshooting/database.mdx b/docs/documentation/deployment/troubleshooting/database.mdx index 2cdd46763..e8cee37ab 100644 --- a/docs/documentation/deployment/troubleshooting/database.mdx +++ b/docs/documentation/deployment/troubleshooting/database.mdx @@ -1,6 +1,6 @@ # Troubleshooting Guide: Database Connection Failures in R2R -Database connection issues can significantly impact the functionality of your R2R deployment. This guide will help you diagnose and resolve common database connection problems for both Postgres and Neo4j. +Database connection issues can significantly impact the functionality of your R2R deployment. This guide will help you diagnose and resolve common database connection problems for both Postgres. ## 1. General Troubleshooting Steps @@ -9,20 +9,17 @@ Before diving into database-specific issues, try these general troubleshooting s 1. **Check Database Service Status**: Ensure the database service is running. ```bash docker ps | grep postgres - docker ps | grep neo4j ``` 2. **Verify Network Connectivity**: Ensure the R2R service can reach the database. ```bash docker exec r2r-container ping postgres - docker exec r2r-container ping neo4j ``` 3. **Check Logs**: Examine R2R and database container logs for error messages. ```bash docker logs r2r-container docker logs postgres-container - docker logs neo4j-container ``` 4. **Verify Environment Variables**: Ensure all necessary environment variables are correctly set in your Docker Compose file or deployment configuration. @@ -67,45 +64,9 @@ Before diving into database-specific issues, try these general troubleshooting s - Recreate the Postgres user or database if they're missing. - Modify Postgres network settings to allow connections from R2R. -## 3. Neo4j Connection Issues +## 3. Advanced Troubleshooting -### 3.1 Common Neo4j Error Messages - -- "Unable to connect to Neo4j at `neo4j://neo4j:7687`" -- "Authentication failure" -- "Connection refused" - -### 3.2 Troubleshooting Steps for Neo4j - -1. **Check Neo4j Connection Settings**: - - Verify the `NEO4J_*` environment variables in your R2R configuration. - - Ensure the bolt URL, username, and password are correct. - -2. **Test Neo4j Connection**: - ```bash - docker exec neo4j-container cypher-shell -u neo4j -p your_password "RETURN 1;" - ``` - -3. **Check Neo4j Logs**: - ```bash - docker logs neo4j-container - ``` - -4. **Verify Neo4j Authentication**: - - Ensure the Neo4j password is correctly set and matches the R2R configuration. - -5. **Check Neo4j Network Settings**: - - Verify that Neo4j is configured to accept connections on the correct interfaces. - -### 3.3 Common Solutions for Neo4j Issues - -- Update the Neo4j connection settings in R2R configuration. -- Reset the Neo4j password if authentication is failing. -- Modify Neo4j configuration to allow connections from R2R container. - -## 4. Advanced Troubleshooting - -### 4.1 Database Container Health Checks +### 3.1 Database Container Health Checks Ensure your Docker Compose file includes proper health checks for database services: @@ -117,7 +78,7 @@ healthcheck: retries: 5 ``` -### 4.2 Network Debugging +### 3.2 Network Debugging If network issues persist: @@ -132,14 +93,13 @@ If network issues persist: docker exec postgres-container netstat -tuln ``` -### 4.3 Volume Permissions +### 3.3 Volume Permissions Check if volume permissions are causing issues: 1. Inspect volume permissions: ```bash docker exec postgres-container ls -l /var/lib/postgresql/data - docker exec neo4j-container ls -l /data ``` 2. Adjust permissions if necessary: @@ -147,7 +107,7 @@ Check if volume permissions are causing issues: docker exec postgres-container chown -R postgres:postgres /var/lib/postgresql/data ``` -## 5. Preventive Measures +## 4. Preventive Measures To avoid future database connection issues: @@ -156,7 +116,7 @@ To avoid future database connection issues: 3. Set up monitoring and alerting for database health and connectivity. 4. Regularly backup your database and test restoration procedures. -## 6. Seeking Further Help +## 5. Seeking Further Help If you're still experiencing issues: diff --git a/docs/documentation/deployment/troubleshooting/dependencies.mdx b/docs/documentation/deployment/troubleshooting/dependencies.mdx index a278274f7..9c8ad2ea7 100644 --- a/docs/documentation/deployment/troubleshooting/dependencies.mdx +++ b/docs/documentation/deployment/troubleshooting/dependencies.mdx @@ -120,22 +120,6 @@ Dependency conflicts can occur when different components of the R2R system requi - Ensure all services are using compatible versions - Update service versions if necessary -## 5. Conflicts with External Services - -### 5.1 Neo4j Conflicts - -1. Check Neo4j version compatibility with R2R -2. Ensure required Neo4j plugins are installed and compatible - -### 5.2 Postgres Conflicts - -1. Verify Postgres version (should be 13+) -2. Check if pgvector extension is properly installed - -### 5.3 Ollama Conflicts - -1. Ensure Ollama version is compatible with R2R -2. Check if required models are available in Ollama ## 6. Advanced Troubleshooting diff --git a/docs/documentation/deployment/troubleshooting/environment.mdx b/docs/documentation/deployment/troubleshooting/environment.mdx index 00cad233c..2b610a1b3 100644 --- a/docs/documentation/deployment/troubleshooting/environment.mdx +++ b/docs/documentation/deployment/troubleshooting/environment.mdx @@ -17,7 +17,6 @@ Here are some critical environment variables for R2R: - Database credentials (e.g., `POSTGRES_USER`, `POSTGRES_PASSWORD`) - API keys (e.g., `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`) - Configuration settings (e.g., `CONFIG_NAME`, `CONFIG_PATH`) -- Service URLs (e.g., `NEO4J_URL`, `OLLAMA_API_BASE`) ## 3. Checking for Missing Variables @@ -114,7 +113,6 @@ Ensure these key R2R variables are set: - `CONFIG_NAME` or `CONFIG_PATH`: Specifies which configuration to use. - `POSTGRES_*`: Database connection details. -- `NEO4J_*`: Neo4j connection details. - `OPENAI_API_KEY`: If using OpenAI services. - `ANTHROPIC_API_KEY`: If using Anthropic models. - `OLLAMA_API_BASE`: For local LLM integration. diff --git a/docs/documentation/deployment/troubleshooting/firewall.mdx b/docs/documentation/deployment/troubleshooting/firewall.mdx index a3ca274a8..7aa3a887a 100644 --- a/docs/documentation/deployment/troubleshooting/firewall.mdx +++ b/docs/documentation/deployment/troubleshooting/firewall.mdx @@ -26,9 +26,6 @@ aws ec2 authorize-security-group-ingress --group-name R2R-SecurityGroup --protoc # Allow access to Hatchet Dashboard aws ec2 authorize-security-group-ingress --group-name R2R-SecurityGroup --protocol tcp --port 8002 --cidr 0.0.0.0/0 - -# If using Neo4j browser interface -aws ec2 authorize-security-group-ingress --group-name R2R-SecurityGroup --protocol tcp --port 7474 --cidr 0.0.0.0/0 ``` ### Azure Network Security Groups @@ -92,9 +89,6 @@ sudo ufw allow 8001/tcp # Allow Hatchet Dashboard sudo ufw allow 8002/tcp -# If using Neo4j browser interface -sudo ufw allow 7474/tcp - # Enable the firewall sudo ufw enable ``` @@ -114,9 +108,6 @@ sudo firewall-cmd --permanent --add-port=8001/tcp # Allow Hatchet Dashboard sudo firewall-cmd --permanent --add-port=8002/tcp -# If using Neo4j browser interface -sudo firewall-cmd --permanent --add-port=7474/tcp - # Reload firewall sudo firewall-cmd --reload ``` @@ -126,7 +117,7 @@ sudo firewall-cmd --reload 1. Open Windows Defender Firewall with Advanced Security 2. Click on "Inbound Rules" and then "New Rule" 3. Choose "Port" and click "Next" -4. Select "TCP" and enter the specific ports (22, 7272, 8001, 8002, 7474 if using Neo4j browser) +4. Select "TCP" and enter the specific ports (22, 7272, 8001, 8002) 5. Choose "Allow the connection" and click "Next" 6. Apply the rule to all profiles (Domain, Private, Public) 7. Give the rule a name (e.g., "R2R Ports") and click "Finish" diff --git a/docs/documentation/deployment/troubleshooting/graph_db.mdx b/docs/documentation/deployment/troubleshooting/graph_db.mdx deleted file mode 100644 index 050b3cbd9..000000000 --- a/docs/documentation/deployment/troubleshooting/graph_db.mdx +++ /dev/null @@ -1,177 +0,0 @@ -# Troubleshooting Guide: Neo4j Graph Database Query Failures in R2R - -When working with R2R (RAG to Riches) and encountering Neo4j graph database query failures, it's important to systematically identify and resolve the issues. This guide will help you troubleshoot common problems. - -## 1. Verify Neo4j Connection - -First, ensure that R2R can connect to Neo4j: - -1. Check Neo4j connection settings in your R2R configuration: - ```bash - cat /path/to/your/r2r_config.toml - ``` - Look for the `[graph_store]` section and verify the Neo4j URL, username, and password. - -2. Try connecting to Neo4j using the official Neo4j Browser or cypher-shell: - ```bash - cypher-shell -a bolt://localhost:7687 -u neo4j -p your_password - ``` - If this fails, it's likely a connection issue rather than a query problem. - -## 2. Check Neo4j Logs - -Neo4j logs can provide valuable information about query failures: - -1. Locate Neo4j logs: - ```bash - docker exec -it r2r-neo4j-1 bash - cat /logs/neo4j.log | grep ERROR - ``` - -2. Look for specific error messages related to your queries. - -## 3. Validate Cypher Queries - -Ensure your Cypher queries are correctly formatted: - -1. Extract the failing query from R2R logs: - ```bash - docker logs r2r-r2r-1 | grep "Cypher query" - ``` - -2. Test the query directly in Neo4j Browser or cypher-shell. - -3. Common Cypher syntax issues: - - Missing or mismatched parentheses - - Incorrect property names - - Case sensitivity (Neo4j is case-sensitive) - - Incorrect relationship types - -## 4. Check for Data Integrity - -Ensure the data in Neo4j matches what your queries expect: - -1. Verify node labels: - ```cypher - MATCH (n) RETURN DISTINCT labels(n) - ``` - -2. Check relationship types: - ```cypher - MATCH ()-[r]->() RETURN DISTINCT type(r) - ``` - -3. Inspect property keys: - ```cypher - MATCH (n) - UNWIND keys(n) AS key - RETURN DISTINCT key - ``` - -## 5. Performance Issues - -If queries are failing due to timeouts: - -1. Check Neo4j's query timeout setting: - ```bash - docker exec -it r2r-neo4j-1 bash - cat /var/lib/neo4j/conf/neo4j.conf | grep dbms.transaction.timeout - ``` - -2. Analyze query performance: - - Use `PROFILE` before your Cypher query in Neo4j Browser to see execution plans. - - Look for full graph scans or high-cost operations. - -3. Ensure proper indexes are in place: - ```cypher - SHOW INDEXES - ``` - -## 6. Memory and Resource Constraints - -Graph queries can be memory-intensive: - -1. Check Neo4j's memory settings: - ```bash - docker exec -it r2r-neo4j-1 bash - cat /var/lib/neo4j/conf/neo4j.conf | grep dbms.memory - ``` - -2. Monitor Neo4j's memory usage: - ```bash - docker stats r2r-neo4j-1 - ``` - -3. Adjust memory settings if necessary in your Docker Compose file. - -## 7. Version Compatibility - -Ensure R2R is compatible with your Neo4j version: - -1. Check Neo4j version: - ```bash - docker exec -it r2r-neo4j-1 neo4j --version - ``` - -2. Verify R2R's supported Neo4j versions in the documentation. - -## 8. Plugin Issues - -If using Neo4j plugins like APOC or GDS: - -1. Verify plugin installation: - ```cypher - CALL dbms.procedures() YIELD name - WHERE name STARTS WITH "apoc" OR name STARTS WITH "gds" - RETURN name - ``` - -2. Check plugin compatibility with your Neo4j version. - -## 9. Authentication and Authorization - -Ensure the Neo4j user has proper permissions: - -1. Check user roles: - ```cypher - SHOW USERS - ``` - -2. Verify database access rights: - ```cypher - SHOW USER neo4j PRIVILEGES - ``` - -## 10. Network Issues - -For dockerized setups: - -1. Ensure Neo4j is on the same Docker network as R2R: - ```bash - docker network inspect r2r-network - ``` - -2. Check if Neo4j is accessible from the R2R container: - ```bash - docker exec -it r2r-r2r-1 ping neo4j - ``` - -## Debugging Steps - -1. Enable verbose logging in R2R for Neo4j operations. -2. Use Neo4j's query logging feature to capture all executed queries. -3. Implement query parameter logging in R2R to ensure correct parameter passing. - -## Getting Help - -If issues persist: - -1. Gather relevant logs (R2R, Neo4j, Docker). -2. Document the exact steps to reproduce the issue. -3. Check R2R and Neo4j documentation for known issues. -4. Consult the R2R community on Discord or GitHub for support. -5. For Neo4j-specific issues, consider posting on Neo4j's community forums. - -Remember to sanitize any sensitive information before sharing logs or queries publicly. - -By following this guide, you should be able to identify and resolve most Neo4j query failures in your R2R setup. If problems persist, don't hesitate to seek help from the R2R or Neo4j communities. diff --git a/docs/documentation/deployment/troubleshooting/high_usage.mdx b/docs/documentation/deployment/troubleshooting/high_usage.mdx index 001115f9d..3159b73e1 100644 --- a/docs/documentation/deployment/troubleshooting/high_usage.mdx +++ b/docs/documentation/deployment/troubleshooting/high_usage.mdx @@ -38,7 +38,6 @@ This will show CPU, memory, and I/O usage for each container. #### Solutions: 1. **Optimize queries:** - Review and optimize database queries, especially those involving large datasets. - - Use indexing in Postgres and Neo4j where appropriate. 2. **Adjust background task frequency:** - Review Hatchet workflows and adjust the frequency of recurring tasks. @@ -101,7 +100,7 @@ This will show CPU, memory, and I/O usage for each container. - Implement log rotation to manage file sizes. 3. **Database optimizations:** - - Ensure proper indexing in Postgres and Neo4j. + - Ensure proper indexing in Postgres. - Optimize query patterns to reduce full table scans. 4. **File ingestion improvements:** diff --git a/docs/documentation/deployment/troubleshooting/index.mdx b/docs/documentation/deployment/troubleshooting/index.mdx index ef3a523ab..91916c293 100644 --- a/docs/documentation/deployment/troubleshooting/index.mdx +++ b/docs/documentation/deployment/troubleshooting/index.mdx @@ -18,7 +18,7 @@ Have you encountered issues with deploying your R2R system? Have no fear our tro ### Local System Installation - Issue: [Dependency conflicts](/documentation/deployment/troubleshooting/dependencies) -- Issue: [Service configuration](/documentation/deployment/troubleshooting/services) problems (Postgres, Neo4j, Hatchet) +- Issue: [Service configuration](/documentation/deployment/troubleshooting/services) problems (Postgres, Hatchet) - Issue: [Unstructured.io](/documentation/deployment/troubleshooting/unstructured) setup difficulties ## Deployment Problems @@ -59,10 +59,6 @@ Have you encountered issues with deploying your R2R system? Have no fear our tro - Issue: [Vector storage problems](/documentation/deployment/troubleshooting/vector_store_issues) - Issue: [Connection string errors](/documentation/deployment/troubleshooting/connection_strings) -### Neo4j -- Issue: [Graph database query failures](/documentation/deployment/troubleshooting/graph_db) -- Issue: [Neo4j version compatibility](/documentation/deployment/troubleshooting/neo4j) - ### Hatchet - Issue: [Workflow orchestration failures](/documentation/deployment/troubleshooting/workflows) - Issue: [RabbitMQ connectivity issues](/documentation/deployment/troubleshooting/rabbit_mq) diff --git a/docs/documentation/deployment/troubleshooting/insufficient_resources.mdx b/docs/documentation/deployment/troubleshooting/insufficient_resources.mdx index a566f4b67..a36ce57bc 100644 --- a/docs/documentation/deployment/troubleshooting/insufficient_resources.mdx +++ b/docs/documentation/deployment/troubleshooting/insufficient_resources.mdx @@ -88,35 +88,25 @@ Look for error messages related to resource constraints. ## R2R-Specific Resource Considerations -### 1. Neo4j Memory Configuration - -Neo4j can be memory-intensive. Adjust its memory settings in the Docker Compose file: - -```yaml -environment: - - NEO4J_server_memory_pagecache_size=2G - - NEO4J_server_memory_heap_max__size=2G -``` - -### 2. Postgres with pgvector +### 1. Postgres with pgvector Vector operations can be CPU-intensive. Ensure your instance has sufficient CPU resources, or consider using a managed database service. -### 3. Ollama for Local LLM +### 2. Ollama for Local LLM Local LLM inference can be very resource-intensive. Ensure your instance has: - At least 8GB of RAM (16GB+ recommended) - Sufficient disk space for model storage - A capable CPU or GPU for inference -### 4. Hatchet Engine +### 3. Hatchet Engine The Hatchet workflow engine may require significant resources depending on your workload. Monitor its resource usage and adjust as necessary. ## Optimizing Resource Usage 1. **Use Resource Limits:** Set appropriate CPU and memory limits for each container. -2. **Optimize Configurations:** Fine-tune application configs (e.g., Neo4j memory settings, Postgres work_mem). +2. **Optimize Configurations:** Fine-tune application configs (e.g., Postgres work_mem). 3. **Scale Horizontally:** Consider splitting services across multiple smaller instances instead of one large instance. 4. **Use Managed Services:** For production, consider using managed services for databases and other resource-intensive components. 5. **Monitor and Alert:** Set up monitoring and alerting for resource usage to catch issues early. diff --git a/docs/documentation/deployment/troubleshooting/neo4j.mdx b/docs/documentation/deployment/troubleshooting/neo4j.mdx deleted file mode 100644 index c49936923..000000000 --- a/docs/documentation/deployment/troubleshooting/neo4j.mdx +++ /dev/null @@ -1,126 +0,0 @@ -# Troubleshooting Guide: Neo4j Version Compatibility with R2R - -Neo4j version compatibility is crucial for the proper functioning of R2R (RAG to Riches). This guide will help you diagnose and resolve issues related to Neo4j versions in your R2R deployment. - -## 1. Checking Neo4j Version - -First, verify the version of Neo4j you're running: - -1. If using Docker: - ```bash - docker exec -it neo4j --version - ``` - -2. If installed directly on the host: - ```bash - neo4j --version - ``` - -## 2. R2R Compatibility Matrix - -R2R is generally compatible with Neo4j versions 4.x and 5.x. Here's a quick compatibility matrix: - -- Neo4j 3.x: Not supported -- Neo4j 4.x: Supported (4.4+ recommended) -- Neo4j 5.x: Fully supported and recommended - -## 3. Common Issues and Solutions - -### 3.1 Outdated Neo4j Version - -**Symptom:** R2R fails to connect to Neo4j or throws deprecation warnings. - -**Solution:** -1. Update Neo4j to the latest supported version: - - For Docker: - ```yaml - neo4j: - image: neo4j:5.21.0 # Or the latest 5.x version - ``` - - For direct installation, follow Neo4j's official upgrade guide. - -2. After updating, restart your R2R application. - -### 3.2 Incompatible Cypher Queries - -**Symptom:** Some Cypher queries fail or produce unexpected results after a Neo4j version update. - -**Solution:** -1. Review R2R logs for specific Cypher query errors. -2. Consult Neo4j's changelog for syntax changes between versions. -3. Update the problematic queries in your R2R configuration or custom code. - -### 3.3 Plugin Compatibility Issues - -**Symptom:** Neo4j plugins (like APOC or GDS) fail to load or function correctly. - -**Solution:** -1. Ensure plugin versions match your Neo4j version: - ```yaml - environment: - - NEO4J_PLUGINS=["apoc", "graph-data-science"] - - NEO4J_apoc_version=5.21.0 # Should match Neo4j version - - NEO4J_gds_version=2.5.0 # Choose compatible GDS version - ``` -2. If using custom plugins, check their compatibility with your Neo4j version. - -### 3.4 Connection String Mismatches - -**Symptom:** R2R fails to establish a connection with Neo4j. - -**Solution:** -1. Verify the connection string in R2R configuration: - ```toml - [neo4j] - url = "bolt://neo4j:7687" # Use "neo4j+s://" for Neo4j 4.x with SSL - ``` -2. Ensure the protocol (bolt, neo4j, neo4j+s) is correct for your Neo4j version and setup. - -### 3.5 Authentication Changes - -**Symptom:** R2R fails to authenticate with Neo4j after an update. - -**Solution:** -1. Check if the authentication method has changed (e.g., from basic auth to JWT in newer versions). -2. Update the R2R configuration to match Neo4j's current authentication requirements: - ```toml - [neo4j] - user = "neo4j" - password = "your_password" - # Add any new authentication parameters here - ``` - -## 4. Debugging Steps - -If you're still experiencing issues: - -1. Enable verbose logging in both R2R and Neo4j. -2. Check Neo4j logs for any startup or runtime errors: - ```bash - docker logs - ``` -3. Verify network connectivity between R2R and Neo4j containers. -4. Try connecting to Neo4j using a standalone tool (like Neo4j Browser) to isolate R2R-specific issues. - -## 5. Rollback Procedure - -If all else fails, you may need to rollback to a previous working version: - -1. Document your current Neo4j version and configuration. -2. Stop the R2R and Neo4j services. -3. Revert to the last known working Neo4j image in your Docker Compose file. -4. Restart the services and verify functionality. - -## 6. Seeking Help - -If you're unable to resolve the issue: - -1. Gather the following information: - - R2R version - - Neo4j version (before and after any updates) - - Relevant sections of R2R and Neo4j logs - - Your Docker Compose file (with sensitive information redacted) -2. Check the [R2R GitHub Issues](https://github.com/SciPhi-AI/R2R/issues) for similar problems. -3. If your issue is unique, create a new GitHub issue with the gathered information. - -Remember to always backup your data before making significant version changes to your database system. diff --git a/docs/documentation/deployment/troubleshooting/port_conflicts.mdx b/docs/documentation/deployment/troubleshooting/port_conflicts.mdx index c070b51d9..834965483 100644 --- a/docs/documentation/deployment/troubleshooting/port_conflicts.mdx +++ b/docs/documentation/deployment/troubleshooting/port_conflicts.mdx @@ -135,16 +135,6 @@ If the R2R API server (default port 7272) is conflicting: PORT=7273 ``` -### Neo4j Port Conflicts - -Neo4j uses ports 7474 (HTTP) and 7687 (Bolt). If these are conflicting: - -1. Check for conflicts: - ```bash - sudo lsof -i :7474 - sudo lsof -i :7687 - ``` - ### Hatchet Engine Conflict If the Hatchet engine (default port 7077) is conflicting: diff --git a/docs/documentation/deployment/troubleshooting/r2r_logs.mdx b/docs/documentation/deployment/troubleshooting/r2r_logs.mdx index 9d682ed2c..9ccdebe40 100644 --- a/docs/documentation/deployment/troubleshooting/r2r_logs.mdx +++ b/docs/documentation/deployment/troubleshooting/r2r_logs.mdx @@ -227,7 +227,7 @@ queries = [ for _ in range(1000): query = random.choice(queries) - app.rag(query, GenerationConfig(model="gpt-4o-mini")) + app.rag(query, GenerationConfig(model="openai/gpt-4o-mini")) print("Preloading complete. You can now run analytics on this data.") ``` diff --git a/docs/documentation/deployment/troubleshooting/resources.mdx b/docs/documentation/deployment/troubleshooting/resources.mdx index d97a79f70..4d5d0a443 100644 --- a/docs/documentation/deployment/troubleshooting/resources.mdx +++ b/docs/documentation/deployment/troubleshooting/resources.mdx @@ -63,7 +63,6 @@ a. Increase Docker's memory limit: - On Docker Desktop, go to Settings > Resources > Advanced and increase memory allocation. b. Optimize memory usage in R2R configuration: - - Adjust `NEO4J_server_memory_heap_max__size` for Neo4j - Modify Postgres memory settings in `postgresql.conf` c. Add or increase swap space on your host system. @@ -94,7 +93,7 @@ b. Increase disk space allocation for Docker: - On Docker Desktop, go to Settings > Resources > Advanced and increase disk image size. c. Monitor and manage log file sizes: - - Implement log rotation for services like Neo4j and Postgres + - Implement log rotation for services like Postgres - Use Docker's logging options to limit log file sizes: ```yaml logging: diff --git a/docs/documentation/deployment/troubleshooting/services.mdx b/docs/documentation/deployment/troubleshooting/services.mdx index f8ca33fc5..829acb3fb 100644 --- a/docs/documentation/deployment/troubleshooting/services.mdx +++ b/docs/documentation/deployment/troubleshooting/services.mdx @@ -1,6 +1,6 @@ -# R2R Service Configuration Troubleshooting Guide: Postgres, Neo4j, Hatchet +# R2R Service Configuration Troubleshooting Guide: Postgres, Hatchet -This guide addresses common configuration problems for Postgres, Neo4j, and Hatchet services in R2R deployments. +This guide addresses common configuration problems for Postgres, and Hatchet services in R2R deployments. ## Postgres Configuration Issues @@ -49,47 +49,6 @@ c) Authentication problems: ``` - Tune Postgres configuration parameters like `shared_buffers`, `effective_cache_size`, etc. -## Neo4j Configuration Issues - -### 1. Authentication Failures - -**Symptom:** R2R cannot authenticate with Neo4j. - -**Solution:** -- Verify the `NEO4J_AUTH` environment variable: - ```yaml - environment: - - NEO4J_AUTH=neo4j/yourpassword - ``` -- Ensure R2R is using the correct credentials in its configuration. - -### 2. Memory Allocation Issues - -**Symptom:** Neo4j fails to start or crashes with out-of-memory errors. - -**Solution:** -- Adjust memory settings: - ```yaml - environment: - - NEO4J_server_memory_pagecache_size=1G - - NEO4J_server_memory_heap_initial__size=1G - - NEO4J_server_memory_heap_max__size=1G - ``` -- Ensure your host system has enough available memory. - -### 3. Plugin Issues - -**Symptom:** Required Neo4j plugins are not loading or functioning. - -**Solution:** -- Verify plugin configuration: - ```yaml - environment: - - NEO4J_PLUGINS=["apoc", "graph-data-science"] - - NEO4J_dbms_security_procedures_unrestricted=apoc.*,gds.* - ``` -- Check Neo4j logs for plugin-related errors. -- Ensure plugin compatibility with your Neo4j version. ## Hatchet Configuration Issues @@ -156,14 +115,12 @@ c) Authentication problems: 1. **Check Logs:** Always start by examining the logs of the problematic service: ``` docker-compose logs postgres - docker-compose logs neo4j docker-compose logs hatchet-engine ``` 2. **Verify Network Connectivity:** Ensure services can communicate: ``` docker-compose exec r2r ping postgres - docker-compose exec r2r ping neo4j docker-compose exec r2r ping hatchet-engine ``` @@ -185,4 +142,4 @@ c) Authentication problems: 6. **Environment Variables:** Double-check all environment variables in your `.env` file and `docker-compose.yml`. -By following this guide, you should be able to diagnose and resolve most configuration issues related to Postgres, Neo4j, and Hatchet in your R2R deployment. If problems persist, consider seeking help from the R2R community or support channels. +By following this guide, you should be able to diagnose and resolve most configuration issues related to Postgres, and Hatchet in your R2R deployment. If problems persist, consider seeking help from the R2R community or support channels. diff --git a/docs/documentation/deployment/troubleshooting/slow_queries.mdx b/docs/documentation/deployment/troubleshooting/slow_queries.mdx index 810c2400c..0a18860a9 100644 --- a/docs/documentation/deployment/troubleshooting/slow_queries.mdx +++ b/docs/documentation/deployment/troubleshooting/slow_queries.mdx @@ -36,21 +36,6 @@ Before diving into specific solutions, it's crucial to identify where the slowdo VACUUM ANALYZE; ``` -### 2.2 Neo4j - -1. **Use PROFILE to analyze query performance:** - ```cypher - PROFILE MATCH (n)-[r]->(m) RETURN n, r, m LIMIT 100; - ``` - -2. **Check for missing indexes:** - ```cypher - CALL db.indexes(); - ``` - -3. **Monitor the Neo4j query log:** - Check the neo4j.log file for slow queries. - ## 3. Optimize Vector Search 1. **Check vector index:** @@ -120,7 +105,6 @@ Before diving into specific solutions, it's crucial to identify where the slowdo 1. **Review and optimize complex queries:** - Break down complex queries into simpler ones - Use appropriate JOIN types in SQL queries - - Optimize Cypher queries for Neo4j 2. **Use query parameterization:** Avoid string concatenation in queries to leverage query plan caching. diff --git a/docs/documentation/deployment/troubleshooting/timeouts.mdx b/docs/documentation/deployment/troubleshooting/timeouts.mdx index 62ecd45e9..8f2d71628 100644 --- a/docs/documentation/deployment/troubleshooting/timeouts.mdx +++ b/docs/documentation/deployment/troubleshooting/timeouts.mdx @@ -73,18 +73,16 @@ If the R2R API is timing out: ### 4.2 Database Connection Timeouts -For Postgres or Neo4j connection issues: +For Postgres connection issues: 1. Verify database service is running: ```bash docker-compose ps postgres - docker-compose ps neo4j ``` 2. Check database logs: ```bash docker-compose logs postgres - docker-compose logs neo4j ``` 3. Ensure correct connection strings in R2R configuration. diff --git a/docs/documentation/deployment/troubleshooting/workflows.mdx b/docs/documentation/deployment/troubleshooting/workflows.mdx index 7f8627992..ad9724627 100644 --- a/docs/documentation/deployment/troubleshooting/workflows.mdx +++ b/docs/documentation/deployment/troubleshooting/workflows.mdx @@ -124,20 +124,11 @@ Inspect RabbitMQ queues to check for message backlogs or routing issues: 2. Verify the pgvector extension is properly installed and enabled. 3. Ensure the R2R configuration correctly specifies the vector store settings. -### 5.3 Knowledge Graph Generation Failures - -**Symptom:** Neo4j graph not updating or showing incorrect data. - -**Solution:** -1. Check Neo4j connection settings in the R2R configuration. -2. Verify Neo4j logs for any query execution errors. -3. Ensure the workflow steps for graph generation are correctly defined. - ## 6. Seeking Further Help If you're still experiencing issues after trying these solutions: -1. Gather all relevant logs (R2R, Hatchet, RabbitMQ, Postgres, Neo4j). +1. Gather all relevant logs (R2R, Hatchet, RabbitMQ, Postgres). 2. Document the steps to reproduce the issue. 3. Check the R2R GitHub repository for similar reported issues. 4. Consider opening a new issue on the R2R GitHub repository with your findings. diff --git a/docs/documentation/installation/docker.mdx b/docs/documentation/installation/full/docker.mdx similarity index 86% rename from docs/documentation/installation/docker.mdx rename to docs/documentation/installation/full/docker.mdx index a7c1ce71d..17be5e07a 100644 --- a/docs/documentation/installation/docker.mdx +++ b/docs/documentation/installation/full/docker.mdx @@ -1,5 +1,8 @@ +This installation guide is for R2R Core. For solo developers or teams prototyping, we highly recommend starting with R2R Light. + # R2R Docker Installation + This guide will walk you through installing and running R2R using Docker, which is the quickest and easiest way to get started. ## Prerequisites @@ -25,14 +28,14 @@ pip install r2r # Set cloud LLM settings export OPENAI_API_KEY=sk-... - r2r serve --docker --config-name=default + r2r serve --docker --full ``` [Refer here](/documentation/configuration/llm) for more information on how to configure various LLM providers. To start R2R with your local computer as the default LLM inference provider: ```bash - r2r serve --docker --config-name=local_llm + r2r serve --docker --full --config-name=full_local_llm ``` Then, in a separate terminal you will need to run Ollama to provide completions: ```bash @@ -53,12 +56,10 @@ pip install r2r -Hatchet (RabbitMQ based orchestration), Neo4j, and Postgres come bundled into the R2R Docker by default. If you prefer to use your own deployments of either Neo4j or Postgres, you may optionally include the flags `--exclude-neo4j` and/or `--exclude-postgres`. - -Excluding postgres requires that a valid configuration be selected, such as `core/configs/neo4j_kg_no_vector_postgres.toml`. +Hatchet (RabbitMQ based orchestration), and Postgres come bundled into the R2R Docker by default. -The above command will automatically pull the necessary Docker images and start all the required containers, including `R2R`, `Hatchet`, `Neo4j`, and `Postgres+pgvector`. +The above command will automatically pull the necessary Docker images and start all the required containers, including `R2R`, `Hatchet`, and `Postgres+pgvector`. The end result is a live server at http://localhost:7272 serving the [R2R API](/api-reference/introduction). diff --git a/docs/documentation/installation/local-system.mdx b/docs/documentation/installation/full/local-system.mdx similarity index 83% rename from docs/documentation/installation/local-system.mdx rename to docs/documentation/installation/full/local-system.mdx index cdebe2b18..57140337f 100644 --- a/docs/documentation/installation/local-system.mdx +++ b/docs/documentation/installation/full/local-system.mdx @@ -1,33 +1,33 @@ +This installation guide is for R2R Core. For solo developers or teams prototyping, we highly recommend starting with R2R Light. # R2R Local System Installation This guide will walk you through installing and running R2R on your local system without using Docker. This method allows for more customization and control over individual components. - - Local installation of R2R is challenging due to the numerous services it integrates. We strongly recommend using Docker to get started quickly. + + + Local installation of R2R Core is challenging due to the numerous services it integrates. We strongly recommend using Docker to get started quickly. If you choose to proceed with a local installation, be prepared to set up and configure the following services: 1. **Postgres with pgvector**: A relational database with vector storage capabilities. 2. **Unstructured.io**: A complex system for file ingestion. - 3. **Neo4j**: A graph database system. 4. **Hatchet**: A RabbitMQ-based orchestration system. Alternatively, you can use cloud versions of these services, but you'll be responsible for enrolling in them and providing the necessary environment variables. Each of these components has its own requirements, potential compatibility issues, and configuration complexities. Debugging issues in a local setup can be significantly more challenging than using a pre-configured Docker environment. - + Unless you have a specific need for a local installation and are comfortable with advanced system configuration, we highly recommend using the Docker setup method for a smoother experience. ## Prerequisites Before starting, ensure you have the following installed and/or available in the cloud: -- Python 3.8 or higher +- Python 3.10 or higher - pip (Python package manager) - Git -- Postgres +- Postgres + pgvector - Unstructured file ingestion -- Neo4j - Hatchet workflow orchestration ## Install the R2R CLI & Python SDK @@ -35,7 +35,7 @@ Before starting, ensure you have the following installed and/or available in the First, install the R2R CLI and Python SDK: ```bash -pip install r2r +pip install 'r2r[core ingestion-bundle hatchet]' ``` ## Environment Setup @@ -68,23 +68,12 @@ R2R requires connections to various services. Set up the following environment v export POSTGRES_HOST=$YOUR_POSTGRES_HOST export POSTGRES_PORT=$YOUR_POSTGRES_PORT export POSTGRES_DBNAME=$YOUR_POSTGRES_DBNAME - export POSTGRES_PROJECT_NAME=$YOUR_VECS_COLLECTION # see note below + export POSTGRES_PROJECT_NAME=$YOUR_PROJECT_NAME # see note below ``` - The `POSTGRES_PROJECT_NAME` environment variable defines the collection within your Postgres database where R2R related tables reside. If the specified collection does not exist then it will be created by R2R during initialization. + The `POSTGRES_PROJECT_NAME` environment variable defines the tables within your Postgres database where the selected R2R project resides. If the specified tables do not exist then they will be created by R2R during initialization. - - Use your own local instance of Neo4j. - ```bash - # Set Neo4j settings - export NEO4J_USER=$YOUR_NEO4J_USER - export NEO4J_PASSWORD=$YOUR_NEO4J_PASSWORD - export NEO4J_URL=$YOUR_NEO4J_URL - export NEO4J_DATABASE=$YOUR_NEO4J_DATABASE - ``` - Refer [here for basic setup help](/other/neo4j-setup). - By default, R2R uses [unstructured.io](https://docs.unstructured.io/welcome) to handle file ingestion. Unstructured can be: @@ -102,6 +91,21 @@ R2R requires connections to various services. Set up the following environment v +## Running R2R + +After setting up your environment, you can start R2R using the following command: + +```bash +# requires services for unstructured, hatchet, postgres +r2r serve --config-name=core +``` + +For local LLM usage: + +```bash +r2r serve --config-name=core_local_llm +``` + ## Python Development Mode For those looking to develop R2R locally: @@ -112,26 +116,16 @@ For those looking to develop R2R locally: ```bash git clone git@github.com:SciPhi-AI/R2R.git cd R2R/py - poetry install -E core + poetry install -E "core ingestion-bundle hatchet" ``` 3. Setup environment: Follow the steps listed in the Environment Setup section above. Additionally, you may introduce a local .env file to make development easier, and you can customize your local `r2r.toml` to suit your specific needs. -## Running R2R - -After setting up your environment, you can start R2R using the following command: - -```bash -# requires full unstructured installation -r2r serve --config-name=default -``` - -For local LLM usage: - -```bash -r2r serve --config-name=local_llm -``` +4. Start your server: + ```bash + poetry run r2r serve --config-name=core + ``` ## Next Steps diff --git a/docs/documentation/installation/light/docker.mdx b/docs/documentation/installation/light/docker.mdx new file mode 100644 index 000000000..2e03edcc8 --- /dev/null +++ b/docs/documentation/installation/light/docker.mdx @@ -0,0 +1,81 @@ +# R2R Docker Installation + +This guide will walk you through installing and running R2R using Docker, which is the quickest and easiest way to get started. + +## Prerequisites + +- Docker installed on your system. If you haven't installed Docker yet, please refer to the [official Docker installation guide](https://docs.docker.com/engine/install/). + +## Install the R2R CLI & Python SDK + +First, install the R2R CLI and Python SDK: + +```bash +pip install r2r +``` + +We are actively developing a distinct CLI binary for R2R for easier installation. Please reach out if you have any specific needs or feature requests. + +## Start R2R with Docker + + + + To start R2R with OpenAI as the default LLM inference and embedding provider: + ```bash + # Set cloud LLM settings + export OPENAI_API_KEY=sk-... + + r2r serve --docker + ``` + [Refer here](/documentation/configuration/llm) for more information on how to configure various LLM providers. + + + To start R2R with your local computer as the default LLM inference provider: + ```bash + r2r serve --docker --config-name=light_local_llm + ``` + Then, in a separate terminal you will need to run Ollama to provide completions: + ```bash + ollama pull llama3.1 + ollama pull mxbai-embed-large + ollama serve + ``` + The code above assumes that Ollama has already been installed. If you have not yet done so, then refer to the official Ollama webpage [for installation instructions](https://ollama.com/download). For more information on local installation, [refer here](/documentation/local-rag). + + + R2R offers flexibility in selecting and configuring LLMs, allowing you to optimize your RAG pipeline for various use cases. Execute the command below run deploy R2R with your own custom configuration: + ```bash + r2r serve --config-path=/abs/path/to/my_r2r.toml + ``` + + Learn in detail how to [configure your deployment here](/documentation/configuration). + + + + +Postgres comes bundled into the R2R Docker by default. + + +The above command will automatically pull the necessary Docker images and start all the required containers, including `R2R`, `Postgres+pgvector`. + +The end result is a live server at http://localhost:7272 serving the [R2R API](/api-reference/introduction). + +In addition to launching a RESTful API, the R2R Docker also launches a applications at `localhost:7273` and `localhost:7274`, which you can [read more about here](/cookbooks/application). + +### Stopping R2R + +Safely stop your system by running `r2r docker-down` to avoid potential shutdown complications. + +## Next Steps + +After successfully installing R2R: + +1. **Verify Installation**: Ensure all components are running correctly by accessing the R2R API at http://localhost:7272/v2/health. + +2. **Quick Start**: Follow our [R2R Quickstart Guide](/documentation/quickstart) to set up your first RAG application. + +3. **In-Depth Tutorial**: For a more comprehensive understanding, work through our [R2R Walkthrough](/cookbooks/walkthrough). + +4. **Customize Your Setup**: Configure R2R components with the [Configuration Guide](/documentation/configuration). + +If you encounter any issues during installation or setup, please use our [Discord community](https://discord.gg/p6KqD2kjtB) or [GitHub repository](https://github.com/SciPhi-AI/R2R) to seek assistance. diff --git a/docs/documentation/installation/light/local-system.mdx b/docs/documentation/installation/light/local-system.mdx new file mode 100644 index 000000000..12c45db5d --- /dev/null +++ b/docs/documentation/installation/light/local-system.mdx @@ -0,0 +1,106 @@ +# R2R Local System Installation + +This guide will walk you through installing and running R2R on your local system without using Docker. This method allows for more customization and control over individual components. + + +## Prerequisites + +Before starting, ensure you have the following installed and/or available in the cloud: +- Python 3.10 or higher +- pip (Python package manager) +- Git +- Postgres + pgvector + +## Install the R2R CLI and extra dependencies + +First, install the R2R CLI with the additional `light` dependencies: + +```bash +pip install 'r2r[core,ingestion-bundle]' +``` + +The `core` and `ingestion-bundle` dependencies, combined with a Postgres database, provide the necessary components to deploy a user-facing R2R application into production. + +If you need advanced features like orchestration or parsing with `Unstructured.io` then refer to the full installation . + +## Environment Setup + +R2R requires connections to various services. Set up the following environment variables based on your needs: + + + + Note, cloud providers are optional as R2R can be run entirely locally. + ```bash + # Set cloud LLM settings + export OPENAI_API_KEY=sk-... + # export ANTHROPIC_API_KEY=... + # ... + ``` + + + With R2R you can connect to your own instance of Postgres+pgvector or a remote cloud instance. + ```bash + # Set Postgres+pgvector settings + export POSTGRES_USER=$YOUR_POSTGRES_USER + export POSTGRES_PASSWORD=$YOUR_POSTGRES_PASSWORD + export POSTGRES_HOST=$YOUR_POSTGRES_HOST + export POSTGRES_PORT=$YOUR_POSTGRES_PORT + export POSTGRES_DBNAME=$YOUR_POSTGRES_DBNAME + export POSTGRES_PROJECT_NAME=$YOUR_PROJECT_NAME # see note below + ``` + + The `POSTGRES_PROJECT_NAME` environment variable defines the tables within your Postgres database where the selected R2R project resides. If the specified tables do not exist then they will be created by R2R during initialization. + + If you are unfamiliar with Postgres then Supabase's free cloud offering is a good place to start. + + + + +## Running R2R + +After setting up your environment, you can start R2R using the following command: + +```bash +r2r serve +``` + +For local LLM usage: + +```bash +r2r serve --config-name=light_local_llm +``` + +## Python Development Mode + +For those looking to develop R2R locally: + +1. Install Poetry: Follow instructions on the [official Poetry website](https://python-poetry.org/docs/#installation). + +2. Clone and install dependencies: + ```bash + git clone git@github.com:SciPhi-AI/R2R.git + cd R2R/py + poetry install -E "core ingestion-bundle" + ``` + +3. Setup environment: + Follow the steps listed in the Environment Setup section above. Additionally, you may introduce a local .env file to make development easier, and you can customize your local `r2r.toml` to suit your specific needs. + +4. Start your server: + ```bash + poetry run r2r serve + ``` + +## Next Steps + +After successfully installing R2R: + +1. **Verify Installation**: Ensure all components are running correctly by accessing the R2R API at http://localhost:7272/v2/health. + +2. **Quick Start**: Follow our [R2R Quickstart Guide](/documentation/quickstart) to set up your first RAG application. + +3. **In-Depth Tutorial**: For a more comprehensive understanding, work through our [R2R Walkthrough](/cookbooks/walkthrough). + +4. **Customize Your Setup**: Configure R2R components with the [Configuration Guide](/documentation/configuration). + +If you encounter any issues during installation or setup, please use our [Discord community](https://discord.gg/p6KqD2kjtB) or [GitHub repository](https://github.com/SciPhi-AI/R2R) to seek assistance. diff --git a/docs/documentation/installation/overview.mdx b/docs/documentation/installation/overview.mdx index 241fc7f4c..92545ecb1 100644 --- a/docs/documentation/installation/overview.mdx +++ b/docs/documentation/installation/overview.mdx @@ -6,20 +6,17 @@ Welcome to the R2R installation guide. R2R offers powerful features for your RAG - **State-of-the-Art Tech**: Advanced RAG techniques like [hybrid search](/cookbooks/hybrid-search), [GraphRAG](/cookbooks/graphrag), and [agentic RAG](/cookbooks/agent). - **Auth & Orchestration**: Production must-haves like [auth](/cookbooks/user-auth) and [ingestion orchestration](/cookbooks/orchestration). -## Choose Your Installation Method +## Choose Your System - -SciPhi Cloud includes a generous free tier and is the quickest way to get up and running with R2R. Check out the [documentation here](/sciphi-cloud/deploy) to skip the local installation! - - -If you're new to R2R or want to get started quickly, we recommend the Docker installation. Local installation is supported on a best-effort basis. - - Install and run R2R with Docker + + A lightweight version of R2R, **perfect for quick prototyping and simpler applications**. Some advanced features, like orchestration and advanced document parsing, may not be available. - - Install and run R2R directly on your local system **(not recommended)**. + + The full-featured R2R system, ideal **for advanced use cases and production deployments**. Includes all components and capabilities, such as **Hatchet** for orchestration and **Unstructured** for parsing. + +Choose the system that best aligns with your requirements and proceed with the installation guide. diff --git a/docs/documentation/js-sdk/auth.mdx b/docs/documentation/js-sdk/auth.mdx index ca66f1753..1dc8dfdea 100644 --- a/docs/documentation/js-sdk/auth.mdx +++ b/docs/documentation/js-sdk/auth.mdx @@ -258,7 +258,8 @@ const updateResult = client.updateUser(undefined, "John Doe", "R2R enthusiast"); Delete the user's account: ```javascript -const delete_result = client.delete_user("password123") +const user_id = register_response["results"]["id"] # input unique id here +const delete_result = client.delete_user(user_id, "password123") ``` diff --git a/docs/documentation/js-sdk/ingestion.mdx b/docs/documentation/js-sdk/ingestion.mdx index 0ccb359af..109ee87cd 100644 --- a/docs/documentation/js-sdk/ingestion.mdx +++ b/docs/documentation/js-sdk/ingestion.mdx @@ -51,7 +51,7 @@ const ingestResponse = await client.ingestFiles(files, { - + The chunking config override parameter enables developers to customize their R2R chunking strategy at runtime. @@ -219,7 +219,7 @@ const updateResponse = await client.updateFiles(files, { An optional array of metadata objects for the updated files. - + The chunking config override parameter enables developers to customize their R2R chunking strategy at runtime. @@ -261,7 +261,7 @@ const documentsOverview = await client.documentsOverview(); ```bash [ { - 'document_id': '93123a68-d668-51de-8291-92162730dc87', + 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', 'version': 'v1', 'size_in_bytes': 73353, 'metadata': {}, @@ -289,7 +289,7 @@ const documentsOverview = await client.documentsOverview(); Fetch chunks for a particular document: ```javascript -const documentId = '93123a68-d668-51de-8291-92162730dc87'; +const documentId = '9fbe403b-c11c-5aae-8ade-ef22980c3ad1'; const chunks = await client.documentChunks(documentId); ``` @@ -301,7 +301,7 @@ const chunks = await client.documentChunks(documentId); [{ 'text': 'Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath...', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', - 'document_id': 'db02076e-989a-59cd-98d5-e24e15a0bd27', + 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', 'extraction_id': 'aeba6400-1bd0-5ee9-8925-04732d675434', 'fragment_id': 'f48bcdad-4155-52a4-8c9d-8ba06e996ba3' 'metadata': {'title': 'aristotle.txt', 'version': 'v0', 'chunk_order': 0}} @@ -337,5 +337,5 @@ const deleteResponse = await client.delete({ document_id: "91662726-7271-51a5-a0 - A list of logical filters to perform over input documents fields which identifies the unique set of documents to delete (e.g., `{"document_id": {"$eq": "db02076e-989a-59cd-98d5-e24e15a0bd27"}}`). Logical operations might include variables such as `"user_id"` or `"title"` and filters like `neq`, `gte`, etc. + A list of logical filters to perform over input documents fields which identifies the unique set of documents to delete (e.g., `{"document_id": {"$eq": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"}}`). Logical operations might include variables such as `"user_id"` or `"title"` and filters like `neq`, `gte`, etc. diff --git a/docs/documentation/js-sdk/retrieval.mdx b/docs/documentation/js-sdk/retrieval.mdx index f10ddb521..f784f9762 100644 --- a/docs/documentation/js-sdk/retrieval.mdx +++ b/docs/documentation/js-sdk/retrieval.mdx @@ -669,7 +669,7 @@ if (agentResponse instanceof ReadableStream) { The agent endpoint will stream back its response, including internal tool calls. ```javascript - search{"query":"Aristotle's influence on later thinkers in philosophy"}"{"id":"b234931e-0cfb-5644-8f23-560a3097f5fe","score":1.0,"metadata":{"text":"ormation: List of writers influenced by Aristotle More than 2300 years after his death, Aristotle remains one of the most influential people who ever lived.[142][143][144] He contributed to almost every field of human knowledge then in existence, and he was the founder of many new fields. According to the philosopher Bryan Magee, \"it is doubtful whether any human being has ever known as much as he did\".[145]","title":"aristotle.txt","user_id":"2acb499e-8428-543b-bd85-0d9098718220","document_id":"93123a68-d668-51de-8291-92162730dc87","extraction_id":"69431c4a-30cf-504f-8fab-7dcfc7580c63","associatedQuery":"Aristotle's influence on later thinkers in philosophy"}}","{"id":"1827ac2c-2a06-5bc2-ad29-aa14b4d99540","score":1.0,"metadata":{"text":"subject of contemporary philosophical discussion. Aristotle's views profoundly shaped medieval scholarship. The influence of his physical science extended from late antiquity and the Early Middle Ages into the Renaissance, and was not replaced systematically until the Enlightenment and theories such as classical mechanics were developed. He influenced Judeo-Islamic philosophies during the Middle Ages, as well as Christian theology, especially the Neoplatonism of the Early Church and the scholastic tradition","title":"aristotle.txt","user_id":"2acb499e-8428-543b-bd85-0d9098718220","document_id":"93123a68-d668-51de-8291-92162730dc87","extraction_id":"69431c4a-30cf-504f-8fab-7dcfc7580c63","associatedQuery":"Aristotle's influence on later thinkers in philosophy"}}","{"id":"94718936-ea92-5e29-a5ee-d4a6bc037384","score":1.0,"metadata":{"text":"the scholastic tradition of the Catholic Church. Aristotle was revered among medieval Muslim scholars as \"The First Teacher\", and among medieval Christians like Thomas Aquinas as simply \"The Philosopher\", while the poet Dante called him \"the master of those who know\". His works contain the earliest known formal study of logic, and were studied by medieval scholars such as Peter Abelard and Jean Buridan. Aristotle's influence on logic continued well into the 19th century. In addition, his ethics, although","title":"aristotle.txt","user_id":"2acb499e-8428-543b-bd85-0d9098718220","document_id":"93123a68-d668-51de-8291-92162730dc87","extraction_id":"69431c4a-30cf-504f-8fab-7dcfc7580c63","associatedQuery":"Aristotle's influence on later thinkers in philosophy"}}","{"id":"16483f14-f8a2-5c5c-8fcd-1bcbbd6603e4","score":1.0,"metadata":{"text":"hilosophy\nFurther information: Peripatetic school The immediate influence of Aristotle's work was felt as the Lyceum grew into the Peripatetic school. Aristotle's students included Aristoxenus, Dicaearchus, Demetrius of Phalerum, Eudemos of Rhodes, Harpalus, Hephaestion, Mnason of Phocis, Nicomachus, and Theophrastus. Aristotle's influence over Alexander the Great is seen in the latter's bringing with him on his expedition a host of zoologists, botanists, and researchers. He had also learned a great deal","title":"aristotle.txt","user_id":"2acb499e-8428-543b-bd85-0d9098718220","document_id":"93123a68-d668-51de-8291-92162730dc87","extraction_id":"69431c4a-30cf-504f-8fab-7dcfc7580c63","associatedQuery":"Aristotle's influence on later thinkers in philosophy"}}","{"id":"26eb20ee-a203-5ad5-beaa-511cc526aa6e","score":1.0,"metadata":{"text":"scholastic philosophers. Alkindus greatly admired Aristotle's philosophy,[168] and Averroes spoke of Aristotle as the \"exemplar\" for all future philosophers.[169] Medieval Muslim scholars regularly described Aristotle as the \"First Teacher\".[167] The title was later used by Western philosophers (as in the famous poem of Dante) who were influenced by the tradition of Islamic philosophy.[170]\n\nMedieval Europe\nFurther information: Aristotelianism and Syllogism § Medieval","title":"aristotle.txt","user_id":"2acb499e-8428-543b-bd85-0d9098718220","document_id":"93123a68-d668-51de-8291-92162730dc87","extraction_id":"69431c4a-30cf-504f-8fab-7dcfc7580c63","associatedQuery":"Aristotle's influence on later thinkers in philosophy"}}","{"id":"a08fd1b4-4e6f-5487-9af6-df2f6cfe1048","score":1.0,"metadata":{"text":"those by James of Venice and William of Moerbeke. After the Scholastic Thomas Aquinas wrote his Summa Theologica, working from Moerbeke's translations and calling Aristotle \"The Philosopher\",[172] the demand for Aristotle's writings grew, and the Greek manuscripts returned to the West, stimulating a revival of Aristotelianism in Europe that continued into the Renaissance.[173] These thinkers blended Aristotelian philosophy with Christianity, bringing the thought of Ancient Greece into the Middle Ages.","title":"aristotle.txt","user_id":"2acb499e-8428-543b-bd85-0d9098718220","document_id":"93123a68-d668-51de-8291-92162730dc87","extraction_id":"69431c4a-30cf-504f-8fab-7dcfc7580c63","associatedQuery":"Aristotle's influence on later thinkers in philosophy"}}"Aristotle's contributions to philosophy, particularly his development of formal logic, had a profound influence on later thinkers across various cultures and eras. Here are some key ways in which his work influenced subsequent intellectual traditions: + search{"query":"Aristotle's influence on later thinkers in philosophy"}"{"id":"b234931e-0cfb-5644-8f23-560a3097f5fe","score":1.0,"metadata":{"text":"ormation: List of writers influenced by Aristotle More than 2300 years after his death, Aristotle remains one of the most influential people who ever lived.[142][143][144] He contributed to almost every field of human knowledge then in existence, and he was the founder of many new fields. According to the philosopher Bryan Magee, \"it is doubtful whether any human being has ever known as much as he did\".[145]","title":"aristotle.txt","user_id":"2acb499e-8428-543b-bd85-0d9098718220","document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","extraction_id":"69431c4a-30cf-504f-8fab-7dcfc7580c63","associatedQuery":"Aristotle's influence on later thinkers in philosophy"}}","{"id":"1827ac2c-2a06-5bc2-ad29-aa14b4d99540","score":1.0,"metadata":{"text":"subject of contemporary philosophical discussion. Aristotle's views profoundly shaped medieval scholarship. The influence of his physical science extended from late antiquity and the Early Middle Ages into the Renaissance, and was not replaced systematically until the Enlightenment and theories such as classical mechanics were developed. He influenced Judeo-Islamic philosophies during the Middle Ages, as well as Christian theology, especially the Neoplatonism of the Early Church and the scholastic tradition","title":"aristotle.txt","user_id":"2acb499e-8428-543b-bd85-0d9098718220","document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","extraction_id":"69431c4a-30cf-504f-8fab-7dcfc7580c63","associatedQuery":"Aristotle's influence on later thinkers in philosophy"}}","{"id":"94718936-ea92-5e29-a5ee-d4a6bc037384","score":1.0,"metadata":{"text":"the scholastic tradition of the Catholic Church. Aristotle was revered among medieval Muslim scholars as \"The First Teacher\", and among medieval Christians like Thomas Aquinas as simply \"The Philosopher\", while the poet Dante called him \"the master of those who know\". His works contain the earliest known formal study of logic, and were studied by medieval scholars such as Peter Abelard and Jean Buridan. Aristotle's influence on logic continued well into the 19th century. In addition, his ethics, although","title":"aristotle.txt","user_id":"2acb499e-8428-543b-bd85-0d9098718220","document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","extraction_id":"69431c4a-30cf-504f-8fab-7dcfc7580c63","associatedQuery":"Aristotle's influence on later thinkers in philosophy"}}","{"id":"16483f14-f8a2-5c5c-8fcd-1bcbbd6603e4","score":1.0,"metadata":{"text":"hilosophy\nFurther information: Peripatetic school The immediate influence of Aristotle's work was felt as the Lyceum grew into the Peripatetic school. Aristotle's students included Aristoxenus, Dicaearchus, Demetrius of Phalerum, Eudemos of Rhodes, Harpalus, Hephaestion, Mnason of Phocis, Nicomachus, and Theophrastus. Aristotle's influence over Alexander the Great is seen in the latter's bringing with him on his expedition a host of zoologists, botanists, and researchers. He had also learned a great deal","title":"aristotle.txt","user_id":"2acb499e-8428-543b-bd85-0d9098718220","document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","extraction_id":"69431c4a-30cf-504f-8fab-7dcfc7580c63","associatedQuery":"Aristotle's influence on later thinkers in philosophy"}}","{"id":"26eb20ee-a203-5ad5-beaa-511cc526aa6e","score":1.0,"metadata":{"text":"scholastic philosophers. Alkindus greatly admired Aristotle's philosophy,[168] and Averroes spoke of Aristotle as the \"exemplar\" for all future philosophers.[169] Medieval Muslim scholars regularly described Aristotle as the \"First Teacher\".[167] The title was later used by Western philosophers (as in the famous poem of Dante) who were influenced by the tradition of Islamic philosophy.[170]\n\nMedieval Europe\nFurther information: Aristotelianism and Syllogism § Medieval","title":"aristotle.txt","user_id":"2acb499e-8428-543b-bd85-0d9098718220","document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","extraction_id":"69431c4a-30cf-504f-8fab-7dcfc7580c63","associatedQuery":"Aristotle's influence on later thinkers in philosophy"}}","{"id":"a08fd1b4-4e6f-5487-9af6-df2f6cfe1048","score":1.0,"metadata":{"text":"those by James of Venice and William of Moerbeke. After the Scholastic Thomas Aquinas wrote his Summa Theologica, working from Moerbeke's translations and calling Aristotle \"The Philosopher\",[172] the demand for Aristotle's writings grew, and the Greek manuscripts returned to the West, stimulating a revival of Aristotelianism in Europe that continued into the Renaissance.[173] These thinkers blended Aristotelian philosophy with Christianity, bringing the thought of Ancient Greece into the Middle Ages.","title":"aristotle.txt","user_id":"2acb499e-8428-543b-bd85-0d9098718220","document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","extraction_id":"69431c4a-30cf-504f-8fab-7dcfc7580c63","associatedQuery":"Aristotle's influence on later thinkers in philosophy"}}"Aristotle's contributions to philosophy, particularly his development of formal logic, had a profound influence on later thinkers across various cultures and eras. Here are some key ways in which his work influenced subsequent intellectual traditions: 1. Medieval Islamic Philosophy: Aristotle was highly revered among medieval Muslim scholars, who referred to him as "The First Teacher." His works were extensively translated into Arabic and studied by philosophers such as Averroes and Avicenna. These scholars not only preserved Aristotle's works but also expanded upon them, influencing both Islamic and Western thought. diff --git a/docs/documentation/local-rag.mdx b/docs/documentation/local-rag.mdx index 2a43d5ed7..2de6674da 100644 --- a/docs/documentation/local-rag.mdx +++ b/docs/documentation/local-rag.mdx @@ -8,15 +8,7 @@ icon: 'house' To run R2R with default local LLM settings, execute `r2r serve --docker --config-name=local_llm`. -R2R supports RAG with local LLMs through the Ollama library. You may follow the instructions on their [official website](https://ollama.com/) to install Ollama outside of the R2R Docker. To include Ollama inside the R2R Docker, pass the selection `--exclude-ollama=False` in the command shown previously. - - -For MacBooks with M1 or newer processors, we recommend setting `--exclude-ollama` flag to True and installing Ollama outside of Docker. - -This approach is recommended because Docker doesn't fully support hardware acceleration on Apple Silicon, which can limit performance. - - -Performance may be worse when running Ollama inside Docker. Please review your Docker Desktop settings when running on your local computer. +R2R supports RAG with local LLMs through the Ollama library. You may follow the instructions on their [official website](https://ollama.com/) to install Ollama outside of the R2R Docker. ## Preparing Local LLMs @@ -26,10 +18,6 @@ Next, make sure that you have all the necessary LLMs installed: ollama pull llama3.1 ollama pull mxbai-embed-large ollama serve - -# when running ollama inisde Docker, instead: -# docker exec -it r2r-ollama-1 ollama pull llama3.1 -# docker exec -it r2r-ollama-1 ollama pull mxbai-embed-large ``` These commands will need to be replaced with models specific to your configuration when deploying R2R with a customized configuration. @@ -69,7 +57,7 @@ add_title_as_prefix = true concurrent_request_limit = 32 [ingestion] -excluded_parsers = [ "gif", "jpeg", "jpg", "png", "svg", "mp3", "mp4" ] +excluded_parsers = [ "mp4" ] ``` This configuration uses `ollama` and the model `mxbai-embed-large` to run embeddings. We have excluded media file parsers as they are not yet supported locally. diff --git a/docs/documentation/python-sdk/auth.mdx b/docs/documentation/python-sdk/auth.mdx index 5b74a40cd..efef16228 100644 --- a/docs/documentation/python-sdk/auth.mdx +++ b/docs/documentation/python-sdk/auth.mdx @@ -257,7 +257,8 @@ update_result = client.update_user(name="John Doe", bio="R2R enthusiast") Delete the user's account: ```python -delete_result = client.delete_user("password123") +user_id = register_response["results"]["id"] # input unique id here +delete_result = client.delete_user(user_id, "password123") ``` diff --git a/docs/documentation/python-sdk/collections.mdx b/docs/documentation/python-sdk/collections.mdx index 6b3b2e8c5..d7b8d5b94 100644 --- a/docs/documentation/python-sdk/collections.mdx +++ b/docs/documentation/python-sdk/collections.mdx @@ -21,6 +21,7 @@ Create a new collection with a name and optional description: ```python create_collection_response = client.create_collection("Marketing Team", "Collection for marketing department") +collection_id = create_collection_response["results"]["collection_id] # '123e4567-e89b-12d3-a456-426614174000' ``` @@ -46,7 +47,7 @@ create_collection_response = client.create_collection("Marketing Team", "Collect Retrieve details about a specific collection: ```python -collection_details = client.get_collection('123e4567-e89b-12d3-a456-426614174000') +collection_details = client.get_collection(collection_id) ``` @@ -72,9 +73,11 @@ collection_details = client.get_collection('123e4567-e89b-12d3-a456-426614174000 Update a collection's name or description: ```python -update_result = client.update_collection('123e4567-e89b-12d3-a456-426614174000', - name="Updated Marketing Team", - description="New description for marketing team") +update_result = client.update_collection( + collection_id, + name="Updated Marketing Team", + description="New description for marketing team" +) ``` @@ -131,7 +134,9 @@ collections_list = client.list_collections() Add a user to a collection: ```python -add_user_result = client.add_user_to_collection('456e789f-g01h-34i5-j678-901234567890', '123e4567-e89b-12d3-a456-426614174000') + +user_id = '456e789f-g01h-34i5-j678-901234567890' # This should be a valid user ID +add_user_result = client.add_user_to_collection(user_id, collection_id) ``` @@ -153,7 +158,7 @@ add_user_result = client.add_user_to_collection('456e789f-g01h-34i5-j678-9012345 Remove a user from a collection: ```python -remove_user_result = client.remove_user_from_collection('456e789f-g01h-34i5-j678-901234567890', '123e4567-e89b-12d3-a456-426614174000') +remove_user_result = client.remove_user_from_collection(user_id, collection_id) ``` @@ -175,7 +180,7 @@ remove_user_result = client.remove_user_from_collection('456e789f-g01h-34i5-j678 Get a list of all users in a specific collection: ```python -users_in_collection = client.get_users_in_collection('123e4567-e89b-12d3-a456-426614174000') +users_in_collection = client.get_users_in_collection(collection_id) ``` @@ -203,7 +208,7 @@ users_in_collection = client.get_users_in_collection('123e4567-e89b-12d3-a456-42 Get all collections that a user is a member of: ```python -user_collections = client.user_collections('456e789f-g01h-34i5-j678-901234567890') +user_collections = client.user_collections(user_id) ``` @@ -232,7 +237,8 @@ user_collections = client.user_collections('456e789f-g01h-34i5-j678-901234567890 Assign a document to a collection: ```python -assign_doc_result = client.assign_document_to_collection('789g012j-k34l-56m7-n890-123456789012', '123e4567-e89b-12d3-a456-426614174000') +document_id = '789g012j-k34l-56m7-n890-123456789012' # must be a valid document id +assign_doc_result = client.assign_document_to_collection(document_id, collection_id) ``` @@ -254,7 +260,7 @@ assign_doc_result = client.assign_document_to_collection('789g012j-k34l-56m7-n89 Remove a document from a collection: ```python -remove_doc_result = client.remove_document_from_collection('789g012j-k34l-56m7-n890-123456789012', '123e4567-e89b-12d3-a456-426614174000') +remove_doc_result = client.remove_document_from_collection(document_id, collection_id) ``` @@ -276,7 +282,7 @@ remove_doc_result = client.remove_document_from_collection('789g012j-k34l-56m7-n Get a list of all documents in a specific collection: ```python -docs_in_collection = client.documents_in_collection('123e4567-e89b-12d3-a456-426614174000') +docs_in_collection = client.documents_in_collection(collection_id) ``` @@ -303,7 +309,7 @@ docs_in_collection = client.documents_in_collection('123e4567-e89b-12d3-a456-426 Get all collections that a document is assigned to: ```python -document_collections = client.document_collections('789g012j-k34l-56m7-n890-123456789012') +document_collections = client.document_collections(document_id) ``` @@ -363,7 +369,7 @@ collections_overview = client.collections_overview() Delete a collection: ```python -delete_result = client.delete_collection('123e4567-e89b-12d3-a456-426614174000') +delete_result = client.delete_collection(collection_id) ``` @@ -372,7 +378,7 @@ delete_result = client.delete_collection('123e4567-e89b-12d3-a456-426614174000') ```python { 'results': { - 'message': 'Group successfully deleted' + 'message': 'Collection successfully deleted' } } ``` @@ -389,10 +395,10 @@ Many collection-related methods support pagination and filtering: paginated_collection = client.list_collections(offset=10, limit=20) # Get users in a collection with pagination -paginated_users = client.get_users_in_collection('123e4567-e89b-12d3-a456-426614174000', offset=5, limit=10) +paginated_users = client.get_users_in_collection(collection_id, offset=5, limit=10) # Get documents in a collection with pagination -paginated_docs = client.documents_in_collection('123e4567-e89b-12d3-a456-426614174000', offset=0, limit=50) +paginated_docs = client.documents_in_collection(collection_id, offset=0, limit=50) # Get collections overview with specific collection IDs specific_collections_overview = client.collections_overview(collection_ids=['id1', 'id2', 'id3']) diff --git a/docs/documentation/python-sdk/ingestion.mdx b/docs/documentation/python-sdk/ingestion.mdx index c39c19c70..3e432cd49 100644 --- a/docs/documentation/python-sdk/ingestion.mdx +++ b/docs/documentation/python-sdk/ingestion.mdx @@ -18,7 +18,8 @@ ingest_response = client.ingest_files( file_paths=file_paths, metadatas=metadatas, # optionally override chunking settings at runtime - chunking_config={ + ingestion_config={ + "provider": "unstructured_local", "strategy": "auto", "chunking_strategy": "by_title", "new_after_n_chars": 256, # soft maximum @@ -27,7 +28,6 @@ ingest_response = client.ingest_files( "overlap": 100, } ) - ``` @@ -56,7 +56,7 @@ ingest_response = client.ingest_files( An optional list of version strings for the ingested files. If provided, the length should match the number of files being ingested. - + The chunking config override parameter enables developers to customize their R2R chunking strategy at runtime. @@ -179,7 +179,7 @@ Update existing documents: ```python file_paths = ["/path/to/r2r/examples/data/aristotle_v2.txt"] -document_ids = ["93123a68-d668-51de-8291-92162730dc87"] +document_ids = ["9fbe403b-c11c-5aae-8ade-ef22980c3ad1"] update_response = client.update_files( file_paths=file_paths, document_ids=document_ids, @@ -226,7 +226,7 @@ documents_overview = client.documents_overview() ```bash [ { - 'document_id': '93123a68-d668-51de-8291-92162730dc87', + 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', 'version': 'v0', 'collection_ids': [], 'ingestion_status': 'success', @@ -253,7 +253,7 @@ documents_overview = client.documents_overview() Fetch chunks for a particular document: ```python -document_id = "db02076e-989a-59cd-98d5-e24e15a0bd27" +document_id = "9fbe403b-c11c-5aae-8ade-ef22980c3ad1" chunks = client.document_chunks(document_id) ``` @@ -265,7 +265,7 @@ chunks = client.document_chunks(document_id) [{ 'text': 'Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath...', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', - 'document_id': 'db02076e-989a-59cd-98d5-e24e15a0bd27', + 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', 'extraction_id': 'aeba6400-1bd0-5ee9-8925-04732d675434', 'fragment_id': 'f48bcdad-4155-52a4-8c9d-8ba06e996ba3' 'metadata': {'title': 'aristotle.txt', 'version': 'v0', 'chunk_order': 0, 'document_type': 'txt', 'unstructured_filetype': 'text/plain', 'unstructured_languages': ['eng'], 'unstructured_parent_id': '971399f6ba2ec9768d2b5b92ab9d17d6', 'partitioned_by_unstructured': True} @@ -289,7 +289,7 @@ Delete a document by its ID: delete_response = client.delete( { "document_id": - {"$eq": "db02076e-989a-59cd-98d5-e24e15a0bd27"} + {"$eq": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"} } ) ``` @@ -306,5 +306,5 @@ delete_response = client.delete( - A list of logical filters to perform over input documents fields which identifies the unique set of documents to delete (e.g., `{"document_id": {"$eq": "db02076e-989a-59cd-98d5-e24e15a0bd27"}}`). Logical operations might include variables such as `"user_id"` or `"title"` and filters like `neq`, `gte`, etc. + A list of logical filters to perform over input documents fields which identifies the unique set of documents to delete (e.g., `{"document_id": {"$eq": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"}}`). Logical operations might include variables such as `"user_id"` or `"title"` and filters like `neq`, `gte`, etc. diff --git a/docs/documentation/python-sdk/retrieval.mdx b/docs/documentation/python-sdk/retrieval.mdx index b38c5a316..f4503e8e5 100644 --- a/docs/documentation/python-sdk/retrieval.mdx +++ b/docs/documentation/python-sdk/retrieval.mdx @@ -207,7 +207,7 @@ kg_search_response = client.search( "kg_search_type": "local", "kg_search_level": "0", "generation_config": { - "model": "gpt-4o-mini", + "model": "openai/gpt-4o-mini", "temperature": 0.7, }, "local_search_limits": { @@ -794,7 +794,7 @@ rag_agent_response = client.agent( The agent endpoint will stream back its response, including internal tool calls. ```bash - search{"query":"Aristotle's influence on later thinkers in philosophy"}"{\"id\":\"b234931e-0cfb-5644-8f23-560a3097f5fe\",\"score\":1.0,\"metadata\":{\"text\":\"ormation: List of writers influenced by Aristotle More than 2300 years after his death, Aristotle remains one of the most influential people who ever lived.[142][143][144] He contributed to almost every field of human knowledge then in existence, and he was the founder of many new fields. According to the philosopher Bryan Magee, \\\"it is doubtful whether any human being has ever known as much as he did\\\".[145]\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"93123a68-d668-51de-8291-92162730dc87\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"1827ac2c-2a06-5bc2-ad29-aa14b4d99540\",\"score\":1.0,\"metadata\":{\"text\":\"subject of contemporary philosophical discussion. Aristotle's views profoundly shaped medieval scholarship. The influence of his physical science extended from late antiquity and the Early Middle Ages into the Renaissance, and was not replaced systematically until the Enlightenment and theories such as classical mechanics were developed. He influenced Judeo-Islamic philosophies during the Middle Ages, as well as Christian theology, especially the Neoplatonism of the Early Church and the scholastic tradition\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"93123a68-d668-51de-8291-92162730dc87\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"94718936-ea92-5e29-a5ee-d4a6bc037384\",\"score\":1.0,\"metadata\":{\"text\":\"the scholastic tradition of the Catholic Church. Aristotle was revered among medieval Muslim scholars as \\\"The First Teacher\\\", and among medieval Christians like Thomas Aquinas as simply \\\"The Philosopher\\\", while the poet Dante called him \\\"the master of those who know\\\". His works contain the earliest known formal study of logic, and were studied by medieval scholars such as Peter Abelard and Jean Buridan. Aristotle's influence on logic continued well into the 19th century. In addition, his ethics, although\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"93123a68-d668-51de-8291-92162730dc87\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"16483f14-f8a2-5c5c-8fcd-1bcbbd6603e4\",\"score\":1.0,\"metadata\":{\"text\":\"hilosophy\\nFurther information: Peripatetic school The immediate influence of Aristotle's work was felt as the Lyceum grew into the Peripatetic school. Aristotle's students included Aristoxenus, Dicaearchus, Demetrius of Phalerum, Eudemos of Rhodes, Harpalus, Hephaestion, Mnason of Phocis, Nicomachus, and Theophrastus. Aristotle's influence over Alexander the Great is seen in the latter's bringing with him on his expedition a host of zoologists, botanists, and researchers. He had also learned a great deal\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"93123a68-d668-51de-8291-92162730dc87\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"26eb20ee-a203-5ad5-beaa-511cc526aa6e\",\"score\":1.0,\"metadata\":{\"text\":\"scholastic philosophers. Alkindus greatly admired Aristotle's philosophy,[168] and Averroes spoke of Aristotle as the \\\"exemplar\\\" for all future philosophers.[169] Medieval Muslim scholars regularly described Aristotle as the \\\"First Teacher\\\".[167] The title was later used by Western philosophers (as in the famous poem of Dante) who were influenced by the tradition of Islamic philosophy.[170]\\n\\nMedieval Europe\\nFurther information: Aristotelianism and Syllogism \u00a7 Medieval\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"93123a68-d668-51de-8291-92162730dc87\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"a08fd1b4-4e6f-5487-9af6-df2f6cfe1048\",\"score\":1.0,\"metadata\":{\"text\":\"those by James of Venice and William of Moerbeke. After the Scholastic Thomas Aquinas wrote his Summa Theologica, working from Moerbeke's translations and calling Aristotle \\\"The Philosopher\\\",[172] the demand for Aristotle's writings grew, and the Greek manuscripts returned to the West, stimulating a revival of Aristotelianism in Europe that continued into the Renaissance.[173] These thinkers blended Aristotelian philosophy with Christianity, bringing the thought of Ancient Greece into the Middle Ages.\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"93123a68-d668-51de-8291-92162730dc87\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"9689a804-5a95-5696-97da-a076a3eb8320\",\"score\":1.0,\"metadata\":{\"text\":\"Aristotle[A] (Greek: \u1f08\u03c1\u03b9\u03c3\u03c4\u03bf\u03c4\u03ad\u03bb\u03b7\u03c2 Aristot\u00e9l\u0113s, pronounced [aristot\u00e9l\u025b\u02d0s]; 384\u2013322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"93123a68-d668-51de-8291-92162730dc87\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"dd19a0d6-4cef-590b-9721-35c26b1ee056\",\"score\":1.0,\"metadata\":{\"text\":\", individualism, teleology, and meteorology.[151] The scholar Taneli Kukkonen notes that \\\"in the best 20th-century scholarship Aristotle comes alive as a thinker wrestling with the full weight of the Greek philosophical tradition.\\\"[148] What follows is an overview of the transmission and influence of his texts and ideas into the modern era.\\n\\nHis successor, Theophrastus\\nMain articles: Theophrastus and Historia Plantarum (Theophrastus)\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"93123a68-d668-51de-8291-92162730dc87\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"8d125c7a-0084-5adf-b094-c96c91611897\",\"score\":1.0,\"metadata\":{\"text\":\"man being has ever known as much as he did\\\".[145] Among countless other achievements, Aristotle was the founder of formal logic,[146] pioneered the study of zoology, and left every future scientist and philosopher in his debt through his contributions to the scientific method.[2][147][148] Taneli Kukkonen, observes that his achievement in founding two sciences is unmatched, and his reach in influencing \\\"every branch of intellectual enterprise\\\" including Western ethical and political theory, theology,\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"93123a68-d668-51de-8291-92162730dc87\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"40d671b0-a412-5822-b088-461baf2324e6\",\"score\":1.0,\"metadata\":{\"text\":\"ight) in the Kit\u0101b na\u02bft al-\u1e25ayaw\u0101n, c.\u20091220.[166] Aristotle was one of the most revered Western thinkers in early Islamic theology. Most of the still extant works of Aristotle,[167] as well as a number of the original Greek commentaries, were translated into Arabic and studied by Muslim philosophers, scientists and scholars. Averroes, Avicenna and Alpharabius, who wrote on Aristotle in great depth, also influenced Thomas Aquinas and other Western Christian scholastic philosophers. Alkindus greatly admired\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"93123a68-d668-51de-8291-92162730dc87\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}"Aristotle's contributions to philosophy, particularly his development of formal logic, had a profound influence on later thinkers across various cultures and eras. Here are some key ways in which his work influenced subsequent intellectual traditions: + search{"query":"Aristotle's influence on later thinkers in philosophy"}"{\"id\":\"b234931e-0cfb-5644-8f23-560a3097f5fe\",\"score\":1.0,\"metadata\":{\"text\":\"ormation: List of writers influenced by Aristotle More than 2300 years after his death, Aristotle remains one of the most influential people who ever lived.[142][143][144] He contributed to almost every field of human knowledge then in existence, and he was the founder of many new fields. According to the philosopher Bryan Magee, \\\"it is doubtful whether any human being has ever known as much as he did\\\".[145]\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"1827ac2c-2a06-5bc2-ad29-aa14b4d99540\",\"score\":1.0,\"metadata\":{\"text\":\"subject of contemporary philosophical discussion. Aristotle's views profoundly shaped medieval scholarship. The influence of his physical science extended from late antiquity and the Early Middle Ages into the Renaissance, and was not replaced systematically until the Enlightenment and theories such as classical mechanics were developed. He influenced Judeo-Islamic philosophies during the Middle Ages, as well as Christian theology, especially the Neoplatonism of the Early Church and the scholastic tradition\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"94718936-ea92-5e29-a5ee-d4a6bc037384\",\"score\":1.0,\"metadata\":{\"text\":\"the scholastic tradition of the Catholic Church. Aristotle was revered among medieval Muslim scholars as \\\"The First Teacher\\\", and among medieval Christians like Thomas Aquinas as simply \\\"The Philosopher\\\", while the poet Dante called him \\\"the master of those who know\\\". His works contain the earliest known formal study of logic, and were studied by medieval scholars such as Peter Abelard and Jean Buridan. Aristotle's influence on logic continued well into the 19th century. In addition, his ethics, although\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"16483f14-f8a2-5c5c-8fcd-1bcbbd6603e4\",\"score\":1.0,\"metadata\":{\"text\":\"hilosophy\\nFurther information: Peripatetic school The immediate influence of Aristotle's work was felt as the Lyceum grew into the Peripatetic school. Aristotle's students included Aristoxenus, Dicaearchus, Demetrius of Phalerum, Eudemos of Rhodes, Harpalus, Hephaestion, Mnason of Phocis, Nicomachus, and Theophrastus. Aristotle's influence over Alexander the Great is seen in the latter's bringing with him on his expedition a host of zoologists, botanists, and researchers. He had also learned a great deal\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"26eb20ee-a203-5ad5-beaa-511cc526aa6e\",\"score\":1.0,\"metadata\":{\"text\":\"scholastic philosophers. Alkindus greatly admired Aristotle's philosophy,[168] and Averroes spoke of Aristotle as the \\\"exemplar\\\" for all future philosophers.[169] Medieval Muslim scholars regularly described Aristotle as the \\\"First Teacher\\\".[167] The title was later used by Western philosophers (as in the famous poem of Dante) who were influenced by the tradition of Islamic philosophy.[170]\\n\\nMedieval Europe\\nFurther information: Aristotelianism and Syllogism \u00a7 Medieval\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"a08fd1b4-4e6f-5487-9af6-df2f6cfe1048\",\"score\":1.0,\"metadata\":{\"text\":\"those by James of Venice and William of Moerbeke. After the Scholastic Thomas Aquinas wrote his Summa Theologica, working from Moerbeke's translations and calling Aristotle \\\"The Philosopher\\\",[172] the demand for Aristotle's writings grew, and the Greek manuscripts returned to the West, stimulating a revival of Aristotelianism in Europe that continued into the Renaissance.[173] These thinkers blended Aristotelian philosophy with Christianity, bringing the thought of Ancient Greece into the Middle Ages.\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"9689a804-5a95-5696-97da-a076a3eb8320\",\"score\":1.0,\"metadata\":{\"text\":\"Aristotle[A] (Greek: \u1f08\u03c1\u03b9\u03c3\u03c4\u03bf\u03c4\u03ad\u03bb\u03b7\u03c2 Aristot\u00e9l\u0113s, pronounced [aristot\u00e9l\u025b\u02d0s]; 384\u2013322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"dd19a0d6-4cef-590b-9721-35c26b1ee056\",\"score\":1.0,\"metadata\":{\"text\":\", individualism, teleology, and meteorology.[151] The scholar Taneli Kukkonen notes that \\\"in the best 20th-century scholarship Aristotle comes alive as a thinker wrestling with the full weight of the Greek philosophical tradition.\\\"[148] What follows is an overview of the transmission and influence of his texts and ideas into the modern era.\\n\\nHis successor, Theophrastus\\nMain articles: Theophrastus and Historia Plantarum (Theophrastus)\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"8d125c7a-0084-5adf-b094-c96c91611897\",\"score\":1.0,\"metadata\":{\"text\":\"man being has ever known as much as he did\\\".[145] Among countless other achievements, Aristotle was the founder of formal logic,[146] pioneered the study of zoology, and left every future scientist and philosopher in his debt through his contributions to the scientific method.[2][147][148] Taneli Kukkonen, observes that his achievement in founding two sciences is unmatched, and his reach in influencing \\\"every branch of intellectual enterprise\\\" including Western ethical and political theory, theology,\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"40d671b0-a412-5822-b088-461baf2324e6\",\"score\":1.0,\"metadata\":{\"text\":\"ight) in the Kit\u0101b na\u02bft al-\u1e25ayaw\u0101n, c.\u20091220.[166] Aristotle was one of the most revered Western thinkers in early Islamic theology. Most of the still extant works of Aristotle,[167] as well as a number of the original Greek commentaries, were translated into Arabic and studied by Muslim philosophers, scientists and scholars. Averroes, Avicenna and Alpharabius, who wrote on Aristotle in great depth, also influenced Thomas Aquinas and other Western Christian scholastic philosophers. Alkindus greatly admired\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}"Aristotle's contributions to philosophy, particularly his development of formal logic, had a profound influence on later thinkers across various cultures and eras. Here are some key ways in which his work influenced subsequent intellectual traditions: 1. **Medieval Islamic Philosophy**: Aristotle was highly revered among medieval Muslim scholars, who referred to him as "The First Teacher." His works were extensively translated into Arabic and studied by philosophers such as Averroes and Avicenna. These scholars not only preserved Aristotle's works but also expanded upon them, influencing both Islamic and Western thought [5][10]. diff --git a/docs/documentation/quickstart.mdx b/docs/documentation/quickstart.mdx index 44142d674..65511a33e 100644 --- a/docs/documentation/quickstart.mdx +++ b/docs/documentation/quickstart.mdx @@ -97,7 +97,7 @@ Example output: { 'fragment_id': '34c32587-e2c9-529f-b0a7-884e9a3c3b2e', 'extraction_id': '8edf5123-0a5c-568c-bf97-654b6adaf8dc', - 'document_id': 'db02076e-989a-59cd-98d5-e24e15a0bd27', + 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'collection_ids': [], 'score': 0.780314067545999, diff --git a/docs/images/chat.png b/docs/images/chat.png index 94bab6441..786fcc898 100644 Binary files a/docs/images/chat.png and b/docs/images/chat.png differ diff --git a/docs/images/login.png b/docs/images/login.png index b31eb232d..0848e2bce 100644 Binary files a/docs/images/login.png and b/docs/images/login.png differ diff --git a/docs/images/oss_collections_page.png b/docs/images/oss_collections_page.png new file mode 100644 index 000000000..8a94c028c Binary files /dev/null and b/docs/images/oss_collections_page.png differ diff --git a/docs/images/oss_dashboard_documents.png b/docs/images/oss_dashboard_documents.png index e0741c723..073eaaa38 100644 Binary files a/docs/images/oss_dashboard_documents.png and b/docs/images/oss_dashboard_documents.png differ diff --git a/docs/images/settings.png b/docs/images/settings.png deleted file mode 100644 index ee8fe5e63..000000000 Binary files a/docs/images/settings.png and /dev/null differ diff --git a/docs/images/settings_config.png b/docs/images/settings_config.png index 763f19f8d..a576e17e9 100644 Binary files a/docs/images/settings_config.png and b/docs/images/settings_config.png differ diff --git a/docs/images/settings_prompts.png b/docs/images/settings_prompts.png index 3723db46c..3316cb22e 100644 Binary files a/docs/images/settings_prompts.png and b/docs/images/settings_prompts.png differ diff --git a/docs/images/users.png b/docs/images/users.png index b14ec5310..b910af67a 100644 Binary files a/docs/images/users.png and b/docs/images/users.png differ diff --git a/docs/introduction/system.mdx b/docs/introduction/system.mdx index e0674f087..f47619380 100644 --- a/docs/introduction/system.mdx +++ b/docs/introduction/system.mdx @@ -47,7 +47,6 @@ graph TD subgraph "Storage" PG[(Postgres + pgvector)] - Neo[(Neo4j Graph DB)] FS[File Storage] end @@ -83,7 +82,7 @@ R2R is built on a modular, service-oriented architecture designed for scalabilit 3. **Orchestration**: Manages complex workflows and long-running tasks using a message queue system. -4. **Storage**: Utilizes PostgreSQL with pgvector for vector storage and search, Neo4j for graph data, and a file system for raw document storage. +4. **Storage**: Utilizes PostgreSQL with pgvector for vector storage and search, and graph search. 5. **Providers**: Pluggable components for embedding, LLM, auth, and ingestion services, supporting multimodal ingestion and flexible model integration. diff --git a/docs/mint.json b/docs/mint.json index d3890efc5..dd03e1058 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -82,8 +82,23 @@ "icon": "bars-progress", "pages" : [ "documentation/installation/overview", - "documentation/installation/docker", - "documentation/installation/local-system" + { + "group": "Light", + "icon": "feather", + "pages": [ + "documentation/installation/light/local-system", + "documentation/installation/light/docker" + ] + }, + { + "group": "Full", + "icon": "gem", + "pages": [ + "documentation/installation/full/docker", + "documentation/installation/full/local-system" + ] + } + ] }, "documentation/quickstart", @@ -226,8 +241,6 @@ "pages": [ "documentation/deployment/troubleshooting/vector_store_issues", "documentation/deployment/troubleshooting/connection_strings", - "documentation/deployment/troubleshooting/graph_db", - "documentation/deployment/troubleshooting/neo4j", "documentation/deployment/troubleshooting/workflows" ] }, diff --git a/docs/other/neo4j-setup.mdx b/docs/other/neo4j-setup.mdx deleted file mode 100644 index 56658c99c..000000000 --- a/docs/other/neo4j-setup.mdx +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: 'Neo4j' -description: 'Step by step guide for desktop installation' -icon: 'server' ---- - -We found it painful to setup a valid Neo4j instance on our machines, so we took the time to make the following step by step guide: - - - - - Start by downloading the Neo4j Desktop from the [neo4j website here](https://neo4j.com/download). - - - - - - Next, run start the desktop client and select `New` >> `Create Project`. - - - - - - Select a password (we chose `neo4j123`) and ensure you are using version `5.20.0`, then click `Create`. - - - - - - Navigate to the project and select it, your screen should look like the image below: - - - - - - Enable the `APOC` plugin for your project - - - - - A manual reboot may be necessary for this change to take effect. - - - - That's it, you can should now be able to connect your database using environment variables like those shown below -```bash -export NEO4J_USER=neo4j -export NEO4J_PASSWORD=neo4j123 -export NEO4J_URL=bolt://localhost:7687 -export NEO4J_DATABASE=neo4j -``` - - - diff --git a/docs/sciphi-cloud/projects.mdx b/docs/sciphi-cloud/projects.mdx index 7d912d267..131bc0967 100644 --- a/docs/sciphi-cloud/projects.mdx +++ b/docs/sciphi-cloud/projects.mdx @@ -74,7 +74,7 @@ Every search query and subsequent search results that flow through the R2R syste The configuration file for your R2R system as well as editable system prompts can be found at `https://sciphi-b30ed103-...YOUR-PROJECT...-qwpin2swwa/settings`. - + diff --git a/docs/sciphi-cloud/quickstart.mdx b/docs/sciphi-cloud/quickstart.mdx index af65f42cc..3332de493 100644 --- a/docs/sciphi-cloud/quickstart.mdx +++ b/docs/sciphi-cloud/quickstart.mdx @@ -109,7 +109,7 @@ r2r --base-url=$SCIPHI_CLOUD_URL rag --query="who was aristotle?" --stream --use rag_results = client.rag( query="Who is John?", vector_search_settings={"use_hybrid_search": True}, - rag_generation_config=GenerationConfig(model="gpt-4o-mini", temperature=0.0, stream=True) + rag_generation_config=GenerationConfig(model="openai/gpt-4o-mini", temperature=0.0, stream=True) ) for chunk in rag_results: diff --git a/js/sdk/src/r2rClient.ts b/js/sdk/src/r2rClient.ts index 392498e24..8bce88dfa 100644 --- a/js/sdk/src/r2rClient.ts +++ b/js/sdk/src/r2rClient.ts @@ -464,7 +464,7 @@ export class r2rClient { metadatas?: Record[]; document_ids?: string[]; user_ids?: (string | null)[]; - chunking_config?: Record; + ingestion_config?: Record; } = {}, ): Promise { this._ensureAuthenticated(); @@ -527,8 +527,8 @@ export class r2rClient { ? JSON.stringify(options.document_ids) : undefined, user_ids: options.user_ids ? JSON.stringify(options.user_ids) : undefined, - chunking_config: options.chunking_config - ? JSON.stringify(options.chunking_config) + ingestion_config: options.ingestion_config + ? JSON.stringify(options.ingestion_config) : undefined, }; @@ -566,7 +566,7 @@ export class r2rClient { options: { document_ids: string[]; metadatas?: Record[]; - chunking_config?: Record; + ingestion_config?: Record; }, ): Promise { this._ensureAuthenticated(); @@ -604,8 +604,8 @@ export class r2rClient { metadatas: options.metadatas ? JSON.stringify(options.metadatas) : undefined, - chunking_config: options.chunking_config - ? JSON.stringify(options.chunking_config) + ingestion_config: options.ingestion_config + ? JSON.stringify(options.ingestion_config) : undefined, }; @@ -894,28 +894,28 @@ export class r2rClient { }); } - /** - * Inspect the knowledge graph associated with your R2R deployment. - * @param limit The maximum number of nodes to return. Defaults to 100. - * @returns A promise that resolves to the response from the server. - */ - @feature("inspectKnowledgeGraph") - async inspectKnowledgeGraph( - offset?: number, - limit?: number, - ): Promise> { - this._ensureAuthenticated(); - - const params: Record = {}; - if (offset !== undefined) { - params.offset = offset; - } - if (limit !== undefined) { - params.limit = limit; - } - - return this._makeRequest("GET", "inspect_knowledge_graph", { params }); - } + // /** + // * Inspect the knowledge graph associated with your R2R deployment. + // * @param limit The maximum number of nodes to return. Defaults to 100. + // * @returns A promise that resolves to the response from the server. + // */ + // @feature("inspectKnowledgeGraph") + // async inspectKnowledgeGraph( + // offset?: number, + // limit?: number, + // ): Promise> { + // this._ensureAuthenticated(); + + // const params: Record = {}; + // if (offset !== undefined) { + // params.offset = offset; + // } + // if (limit !== undefined) { + // params.limit = limit; + // } + + // return this._makeRequest("GET", "inspect_knowledge_graph", { params }); + // } /** * Get an overview of existing collections. diff --git a/py/Dockerfile b/py/Dockerfile index 2fdd95e4f..17bbc3f2b 100644 --- a/py/Dockerfile +++ b/py/Dockerfile @@ -3,17 +3,21 @@ FROM python:3.10-slim AS builder # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ gcc g++ musl-dev curl libffi-dev gfortran libopenblas-dev \ - && apt-get clean && rm -rf /var/lib/apt/lists/* + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y RUN pip install --no-cache-dir poetry +# Add Rust to PATH +ENV PATH="/root/.cargo/bin:${PATH}" + RUN mkdir -p /app/py WORKDIR /app/py COPY pyproject.toml /app/py/pyproject.toml # Install dependencies RUN poetry config virtualenvs.create false \ - && poetry install --extras "core" --no-dev --no-root \ + && poetry install --extras "core ingestion-bundle" --no-dev --no-root \ && pip install --no-cache-dir gunicorn uvicorn # Create the final image diff --git a/py/Dockerfile.dev b/py/Dockerfile.dev deleted file mode 100644 index 6f997ca20..000000000 --- a/py/Dockerfile.dev +++ /dev/null @@ -1,63 +0,0 @@ -# Stage 1: Builder -FROM python:3.10-slim AS builder - -# Install system dependencies in a single RUN command to reduce layers -RUN apt-get update && apt-get install -y --no-install-recommends \ - gcc g++ musl-dev curl libffi-dev gfortran libopenblas-dev \ - && apt-get clean && rm -rf /var/lib/apt/lists/* - -# Set working directory -WORKDIR /app - -# Install Poetry -RUN pip install --no-cache-dir poetry - -# Copy only dependency files first to leverage caching -COPY pyproject.toml poetry.lock /app/ - -# Set working directory to /app/py for dependency installation -WORKDIR /app - -# Install Python dependencies without creating a virtual environment -RUN poetry config virtualenvs.create false \ - && poetry install --extras "core" --no-dev --no-root \ - && pip install --no-cache-dir gunicorn uvicorn - -# Copy only the necessary application files -COPY core /app/core -COPY r2r /app/r2r -COPY shared /app/shared -COPY r2r.toml /app/r2r.toml - -# Stage 2: Final Image -FROM python:3.10-slim - -# Install runtime dependencies -RUN apt-get update && apt-get install -y --no-install-recommends curl \ - && apt-get clean && rm -rf /var/lib/apt/lists/* - -# Set working directory -WORKDIR /app - -# Copy installed Python packages and binaries from the builder stage -COPY --from=builder /usr/local/lib/python3.10/site-packages /usr/local/lib/python3.10/site-packages -COPY --from=builder /usr/local/bin /usr/local/bin - -# Copy only the necessary application files from the builder -COPY --from=builder /app/core /app/core -COPY --from=builder /app/r2r /app/r2r -COPY --from=builder /app/shared /app/shared -COPY --from=builder /app/r2r.toml /app/r2r.toml - -COPY pyproject.toml /app/pyproject.toml - -# Set environment variables for port and host -ARG PORT=7272 -ARG HOST=0.0.0.0 -ENV PORT=${PORT} HOST=${HOST} - -# Expose the specified port -EXPOSE ${PORT} - -# Use the exec form of CMD for better signal handling -CMD ["uvicorn", "core.main.app_entry:app", "--host", "0.0.0.0", "--port", "7272"] diff --git a/py/cli/__init__.py b/py/cli/__init__.py index 2a831b242..11ca47716 100644 --- a/py/cli/__init__.py +++ b/py/cli/__init__.py @@ -11,7 +11,7 @@ "auth", "ingestion", "management", - "restructure", + "kg", "retrieval", "server", ] diff --git a/py/cli/commands/kg.py b/py/cli/commands/kg.py new file mode 100644 index 000000000..0e2a4c5d3 --- /dev/null +++ b/py/cli/commands/kg.py @@ -0,0 +1,73 @@ +import json + +import asyncclick as click +from asyncclick import pass_context + +from cli.command_group import cli +from cli.utils.timer import timer + + +@cli.command() +@click.option( + "--collection-id", + required=True, + help="Collection ID to create graph for.", +) +@click.option( + "--kg-creation-settings", + required=False, + help="Settings for the graph creation process.", +) +@pass_context +def create_graph(ctx, collection_id, kg_creation_settings): + """ + Create a new graph. + """ + client = ctx.obj + + if kg_creation_settings: + try: + kg_creation_settings = json.loads(kg_creation_settings) + except json.JSONDecodeError: + click.echo( + "Error: kg-creation-settings must be a valid JSON string" + ) + return + + with timer(): + response = client.create_graph(collection_id, kg_creation_settings) + + click.echo(json.dumps(response, indent=2)) + + +@cli.command() +@click.option( + "--collection-id", + required=True, + help="Collection ID to enrich graph for.", +) +@click.option( + "--kg-enrichment-settings", + required=False, + help="Settings for the graph enrichment process.", +) +@pass_context +def enrich_graph(ctx, collection_id, kg_enrichment_settings): + """ + Enrich an existing graph. + """ + client = ctx.obj + + if kg_enrichment_settings: + try: + kg_enrichment_settings = json.loads(kg_enrichment_settings) + except json.JSONDecodeError: + click.echo( + "Error: kg-enrichment-settings must be a valid JSON string" + ) + return + + with timer(): + response = client.enrich_graph(collection_id, kg_enrichment_settings) + + click.echo(json.dumps(response, indent=2)) diff --git a/py/cli/commands/management.py b/py/cli/commands/management.py index e12924e11..10210505a 100644 --- a/py/cli/commands/management.py +++ b/py/cli/commands/management.py @@ -150,29 +150,8 @@ def document_chunks(ctx, document_id, offset, limit): for index, chunk in enumerate(chunks, 1): click.echo(f"\nChunk {index}:") if isinstance(chunk, dict): - click.echo(f"Fragment ID: {chunk.get('fragment_id', 'N/A')}") + click.echo(f"Extraction ID: {chunk.get('id', 'N/A')}") click.echo(f"Text: {chunk.get('text', '')[:100]}...") click.echo(f"Metadata: {chunk.get('metadata', {})}") else: click.echo(f"Unexpected chunk format: {chunk}") - - -@cli.command() -@click.option( - "--offset", - default=None, - help="The offset to start from. Defaults to 0.", -) -@click.option( - "--limit", - default=None, - help="The maximum number of nodes to return. Defaults to 100.", -) -@pass_context -def inspect_knowledge_graph(ctx, offset, limit): - """Inspect the knowledge graph.""" - client = ctx.obj - with timer(): - response = client.inspect_knowledge_graph(offset, limit) - - click.echo(response["results"]) diff --git a/py/cli/commands/restructure.py b/py/cli/commands/restructure.py deleted file mode 100644 index 4c7e54d93..000000000 --- a/py/cli/commands/restructure.py +++ /dev/null @@ -1,55 +0,0 @@ -import json - -import asyncclick as click -from asyncclick import pass_context - -from cli.command_group import cli -from cli.utils.timer import timer - - -@cli.command() -@click.option( - "--document-ids", - required=False, - default=None, - help="Document IDs to create graph for (comma-separated)", -) -@pass_context -def create_graph(ctx, document_ids): - """ - Create a new graph. - """ - client = ctx.obj - with timer(): - if document_ids is None: - document_ids = [] - else: - document_ids = document_ids.split(",") - response = client.create_graph(document_ids) - - click.echo(json.dumps(response, indent=2)) - - -@cli.command() -@click.option( - "--force-enrichment", - required=False, - default=False, - help="Force enrichment of the graph even if graph creation is still in progress for some documents.", -) -@click.option( - "--skip-clustering", - required=False, - default=False, - help="Perform leiden clustering on the graph to create communities.", -) -@pass_context -def enrich_graph(ctx, force_enrichment, skip_clustering): - """ - Perform graph enrichment over the entire graph. - """ - client = ctx.obj - with timer(): - response = client.enrich_graph(force_enrichment, skip_clustering) - - click.echo(json.dumps(response, indent=2)) diff --git a/py/cli/commands/server.py b/py/cli/commands/server.py index 08f9c3fec..720361c83 100644 --- a/py/cli/commands/server.py +++ b/py/cli/commands/server.py @@ -87,14 +87,30 @@ def logs(ctx, run_type_filter, offset, limit): def docker_down(volumes, remove_orphans, project_name): """Bring down the Docker Compose setup and attempt to remove the network if necessary.""" result = bring_down_docker_compose(project_name, volumes, remove_orphans) + remove_r2r_network() if result != 0: click.echo( - "An error occurred while bringing down the Docker Compose setup. Attempting to remove the network..." + f"An error occurred while bringing down the {project_name} Docker Compose setup. Attempting to remove the network..." ) else: - click.echo("Docker Compose setup has been successfully brought down.") + click.echo( + f"{project_name} Docker Compose setup has been successfully brought down." + ) + + result = bring_down_docker_compose("r2r-full", volumes, remove_orphans) + + # TODO - Clean up the way we do this r2r-down + click.echo(f"Also attempting to bring down the full deployment") + if result != 0: + click.echo( + f"An error occurred while bringing down the r2r-full Docker Compose setup. Attempting to remove the network..." + ) + else: + click.echo( + f"r2r-full Docker Compose setup has been successfully brought down." + ) @cli.command() @@ -181,23 +197,13 @@ def generate_report(): @click.option("--port", default=7272, help="Port to run the server on") @click.option("--docker", is_flag=True, help="Run using Docker") @click.option( - "--exclude-neo4j", default=False, help="Exclude Neo4j from Docker setup" -) -@click.option( - "--exclude-ollama", default=True, help="Exclude Ollama from Docker setup" + "--full", + is_flag=True, + help="Run the full R2R compose? This includes Hatchet and Unstructured.", ) @click.option( - "--exclude-postgres", - default=False, - help="Exclude Postgres from Docker setup", + "--project-name", default="r2r", help="Project name for Docker deployment" ) -@click.option( - "--exclude-hatchet", - default=False, - help="Exclude Hatchet from Docker setup", -) -@click.option("--project-name", default="r2r", help="Project name for Docker") -@click.option("--image", help="Docker image to use") @click.option( "--config-name", default=None, help="Name of the R2R configuration to use" ) @@ -212,12 +218,7 @@ def generate_report(): default=False, help="Run in debug mode. Only for development.", ) -@click.option( - "--dev", - is_flag=True, - default=False, - help="Run in development mode", -) +@click.option("--image", help="Docker image to use") @click.option( "--image-env", default="prod", @@ -227,20 +228,37 @@ async def serve( host, port, docker, - exclude_neo4j, - exclude_ollama, - exclude_postgres, - exclude_hatchet, + full, project_name, - image, config_name, config_path, build, - dev, + image, image_env, ): """Start the R2R server.""" load_dotenv() + click.echo("Spinning up an R2R deployment...") + + click.echo(f"Running on {host}:{port}, with docker={docker}") + + if full: + click.echo( + "Running the full R2R compose which includes `Hatchet` and `Unstructured.io`." + ) + if project_name == "r2r": # overwrite project name if full compose + project_name = "r2r-full" + else: + click.echo("Running the lightweight R2R compose.") + + if config_path and config_name: + raise click.UsageError( + "Both `config-path` and `config-name` were provided. Please provide only one." + ) + if build: + click.echo( + "`build` flag detected. Building Docker image from local repository..." + ) if image and image_env: click.echo( "WARNING: Both `image` and `image_env` were provided. Using `image`." @@ -291,7 +309,7 @@ def image_exists(img): "-t", image, "-f", - f"Dockerfile{'.dev' if dev else ''}", + f"Dockerfile", ".", ], check=True, @@ -311,10 +329,7 @@ def image_exists(img): run_docker_serve( host, port, - exclude_neo4j, - exclude_ollama, - exclude_postgres, - exclude_hatchet, + full, project_name, image, config_name, @@ -344,7 +359,7 @@ def image_exists(img): click.secho(f"Navigating to R2R application at {url}.", fg="blue") webbrowser.open(url) else: - await run_local_serve(host, port, config_name, config_path) + await run_local_serve(host, port, config_name, config_path, full) @cli.command() diff --git a/py/cli/main.py b/py/cli/main.py index abe1080bc..0c4756d9f 100644 --- a/py/cli/main.py +++ b/py/cli/main.py @@ -2,8 +2,8 @@ from cli.commands import ( auth, ingestion, + kg, management, - restructure, retrieval, server, templates, @@ -31,11 +31,10 @@ def add_command_with_telemetry(command): add_command_with_telemetry(management.users_overview) add_command_with_telemetry(management.documents_overview) add_command_with_telemetry(management.document_chunks) -add_command_with_telemetry(management.inspect_knowledge_graph) # Restructure -add_command_with_telemetry(restructure.create_graph) -add_command_with_telemetry(restructure.enrich_graph) +add_command_with_telemetry(kg.create_graph) +add_command_with_telemetry(kg.enrich_graph) # Retrieval add_command_with_telemetry(retrieval.search) @@ -73,4 +72,4 @@ def main(): if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/py/cli/utils/docker_utils.py b/py/cli/utils/docker_utils.py index 875d33fec..dcbe91ae1 100644 --- a/py/cli/utils/docker_utils.py +++ b/py/cli/utils/docker_utils.py @@ -15,7 +15,7 @@ def bring_down_docker_compose(project_name, volumes, remove_orphans): compose_files = get_compose_files() - docker_command = f"docker compose -f {compose_files['base']} -f {compose_files['neo4j']} -f {compose_files['ollama']} -f {compose_files['postgres']} -f {compose_files['hatchet']}" + docker_command = f"docker compose -f {compose_files['base']} -f {compose_files['full']}" docker_command += f" --project-name {project_name}" if volumes: @@ -26,7 +26,7 @@ def bring_down_docker_compose(project_name, volumes, remove_orphans): docker_command += " down" - click.echo("Bringing down Docker Compose setup...") + click.echo(f"Bringing down {project_name} Docker Compose setup...") return os.system(docker_command) @@ -76,19 +76,20 @@ async def run_local_serve( port: int, config_name: Optional[str] = None, config_path: Optional[str] = None, + full: bool = False, ) -> None: try: from r2r import R2RBuilder, R2RConfig except ImportError as e: click.echo( - f"Error: {e}\n\nNote, you must install the `r2r core` package to run the R2R server locally." + f"Error: You must install the `r2r core` package to run the R2R server locally." ) - sys.exit(1) + raise e if config_path and config_name: raise ValueError("Cannot specify both config_path and config_name") if not config_path and not config_name: - config_name = "default" + config_name = "default" if not full else "full" r2r_instance = await R2RBuilder( config=R2RConfig.load(config_name, config_path) @@ -105,23 +106,27 @@ async def run_local_serve( available_port = find_available_port(port) await r2r_instance.orchestration_provider.start_worker() + + # TODO: make this work with autoreload, currently due to hatchet, it causes a reload error + # import uvicorn + # uvicorn.run( + # "core.main.app_entry:app", host=host, port=available_port, reload=False + # ) + r2r_instance.serve(host, available_port) def run_docker_serve( host: str, port: int, - exclude_neo4j: bool, - exclude_ollama: bool, - exclude_postgres: bool, - exclude_hatchet: bool, + full: bool, project_name: str, image: str, config_name: Optional[str] = None, config_path: Optional[str] = None, ): check_docker_compose_version() - check_set_docker_env_vars(exclude_neo4j, exclude_ollama, exclude_postgres) + check_set_docker_env_vars() if config_path and config_name: raise ValueError("Cannot specify both config_path and config_name") @@ -139,10 +144,7 @@ def run_docker_serve( compose_files, host, port, - exclude_neo4j, - exclude_ollama, - exclude_postgres, - exclude_hatchet, + full, project_name, image, config_name, @@ -151,9 +153,11 @@ def run_docker_serve( click.secho("R2R now runs on port 7272 by default!", fg="yellow") click.echo("Pulling Docker images...") + click.echo(f"Calling `{pull_command}`") os.system(pull_command) click.echo("Starting Docker Compose setup...") + click.echo(f"Calling `{up_command}`") os.system(up_command) @@ -204,7 +208,6 @@ def check_llm_reqs(llm_provider, model_provider, include_ollama=False): def check_external_ollama(ollama_url="http://localhost:11434/api/version"): - try: response = requests.get(ollama_url, timeout=5) if response.status_code == 200: @@ -235,35 +238,16 @@ def check_external_ollama(ollama_url="http://localhost:11434/api/version"): sys.exit(1) -def check_set_docker_env_vars( - exclude_neo4j=False, exclude_ollama=True, exclude_postgres=False -): - env_vars = [] - if not exclude_neo4j: - neo4j_vars = [ - "NEO4J_USER", - "NEO4J_PASSWORD", - "NEO4J_URL", - "NEO4J_DATABASE", - ] - env_vars.extend(neo4j_vars) - - if not exclude_postgres: - postgres_vars = [ - "POSTGRES_HOST", - "POSTGRES_USER", - "POSTGRES_PASSWORD", - "POSTGRES_PORT", - "POSTGRES_DBNAME", - # "POSTGRES_PROJECT_NAME", TODO - uncomment in next release - ] - env_vars.extend(postgres_vars) +def check_set_docker_env_vars(): - if not exclude_ollama: - ollama_vars = [ - "OLLAMA_API_BASE", - ] - env_vars.extend(ollama_vars) + env_vars = { + "POSTGRES_PROJECT_NAME": "r2r", + "POSTGRES_HOST": "postgres", + "POSTGRES_PORT": "5432", + "POSTGRES_DBNAME": "postgres", + "POSTGRES_USER": "postgres", + "POSTGRES_PASSWORD": "postgres", + } is_test = ( "pytest" in sys.modules @@ -275,6 +259,10 @@ def check_set_docker_env_vars( for var in env_vars: if value := os.environ.get(var): warning_text = click.style("Warning:", fg="red", bold=True) + + if value == env_vars[var]: + continue + prompt = ( f"{warning_text} It's only necessary to set this environment variable when connecting to an instance not managed by R2R.\n" f"Environment variable {var} is set to '{value}'. Unset it?" @@ -286,14 +274,6 @@ def check_set_docker_env_vars( click.echo(f"Kept {var}") -def set_config_env_vars(obj): - if config_path := obj.get("config_path"): - os.environ["CONFIG_PATH"] = f'"{config_path}"' - else: - config_name = obj.get("config_name") or "default" - os.environ["CONFIG_NAME"] = f'"{config_name}"' - - def get_compose_files(): package_dir = os.path.join( os.path.dirname(os.path.abspath(__file__)), @@ -302,10 +282,7 @@ def get_compose_files(): ) compose_files = { "base": os.path.join(package_dir, "compose.yaml"), - "neo4j": os.path.join(package_dir, "compose.neo4j.yaml"), - "ollama": os.path.join(package_dir, "compose.ollama.yaml"), - "postgres": os.path.join(package_dir, "compose.postgres.yaml"), - "hatchet": os.path.join(package_dir, "compose.hatchet.yaml"), + "full": os.path.join(package_dir, "compose.full.yaml"), } for name, path in compose_files.items(): @@ -337,24 +314,16 @@ def build_docker_command( compose_files, host, port, - exclude_neo4j, - exclude_ollama, - exclude_postgres, - exclude_hatchet, + full, project_name, image, config_name, config_path, ): - base_command = f"docker compose -f {compose_files['base']}" - if not exclude_neo4j: - base_command += f" -f {compose_files['neo4j']}" - if not exclude_ollama: - base_command += f" -f {compose_files['ollama']}" - if not exclude_postgres: - base_command += f" -f {compose_files['postgres']}" - if not exclude_hatchet: - base_command += f" -f {compose_files['hatchet']}" + if not full: + base_command = f"docker compose -f {compose_files['base']}" + else: + base_command = f"docker compose -f {compose_files['full']}" base_command += f" --project-name {project_name}" @@ -374,6 +343,8 @@ def build_docker_command( os.environ["CONFIG_PATH"] = ( os.path.abspath(config_path) if config_path else "" ) + elif full: + os.environ["CONFIG_NAME"] = "full" pull_command = f"{base_command} pull" up_command = f"{base_command} up -d" diff --git a/py/cli/utils/telemetry.py b/py/cli/utils/telemetry.py index 56b5c1b74..cda8bfe59 100644 --- a/py/cli/utils/telemetry.py +++ b/py/cli/utils/telemetry.py @@ -4,6 +4,7 @@ import os import threading import uuid +from importlib.metadata import version from typing import Optional import asyncclick as click @@ -63,6 +64,7 @@ async def tracked_callback(*args, **kwargs): "status": "success", "args": command_args, "params": command_params, + "version": version("r2r"), }, ) @@ -79,6 +81,7 @@ async def tracked_callback(*args, **kwargs): "error_message": str(e), "args": command_args, "params": command_params, + "version": version("r2r"), }, ) raise @@ -115,6 +118,7 @@ def tracked_callback(*args, **kwargs): "status": "success", "args": command_args, "params": command_params, + "version": version("r2r"), }, ), daemon=True, @@ -136,6 +140,7 @@ def tracked_callback(*args, **kwargs): "error_message": str(e), "args": command_args, "params": command_params, + "version": version("r2r"), }, ), daemon=True, @@ -144,4 +149,4 @@ def tracked_callback(*args, **kwargs): raise command.callback = tracked_callback - return command + return command \ No newline at end of file diff --git a/py/compose.full.yaml b/py/compose.full.yaml new file mode 100644 index 000000000..2290372b5 --- /dev/null +++ b/py/compose.full.yaml @@ -0,0 +1,432 @@ +version: '3.8' + +x-depends-on: + postgres: &postgres-dependency + condition: service_healthy + hatchet-engine: &hatchet-dependency + condition: service_healthy + +networks: + r2r-network: + name: r2r-network + driver: bridge + attachable: true + ipam: + driver: default + config: + - subnet: 172.28.0.0/16 + labels: + - "com.docker.compose.recreate=always" + +volumes: + hatchet_certs: + hatchet_config: + hatchet_api_key: + postgres_data: + hatchet_rabbitmq_data: + hatchet_rabbitmq.conf: + +services: + + postgres: + image: pgvector/pgvector:pg16 + environment: + - POSTGRES_USER=${POSTGRES_USER:-postgres} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres} + - POSTGRES_DB=${POSTGRES_DBNAME:-postgres} + - POSTGRES_MAX_CONNECTIONS=${POSTGRES_MAX_CONNECTIONS:-1024} + volumes: + - postgres_data:/var/lib/postgresql/data + networks: + - r2r-network + ports: + - "5432:5432" + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-postgres}"] + interval: 10s + timeout: 5s + retries: 5 + restart: on-failure + + hatchet-rabbitmq: + image: "rabbitmq:3-management" + hostname: "hatchet-rabbitmq" + ports: + - "5673:5672" + - "15673:15672" + environment: + RABBITMQ_DEFAULT_USER: "user" + RABBITMQ_DEFAULT_PASS: "password" + volumes: + - "hatchet_rabbitmq_data:/var/lib/rabbitmq" + - "hatchet_rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf" + healthcheck: + test: ["CMD", "rabbitmqctl", "status"] + interval: 10s + timeout: 10s + retries: 5 + networks: + - r2r-network + + hatchet-create-db: + image: postgres:latest + command: > + sh -c " + set -e + echo 'Waiting for PostgreSQL to be ready...' + while ! pg_isready -h postgres -p 5432 -U postgres; do + sleep 1 + done + echo 'PostgreSQL is ready, checking if database exists...' + if ! PGPASSWORD=${POSTGRES_PASSWORD:-postgres} psql -h postgres -p 5432 -U ${POSTGRES_USER:-postgres} -lqt | grep -qw ${HATCHET_POSTGRES_DBNAME:-hatchet}; then + echo 'Database does not exist, creating it...' + PGPASSWORD=${POSTGRES_PASSWORD:-postgres} createdb -h postgres -p 5432 -U ${POSTGRES_USER:-postgres} -w ${HATCHET_POSTGRES_DBNAME:-hatchet} + else + echo 'Database already exists, skipping creation.' + fi + " + environment: + POSTGRES_USER: "${POSTGRES_USER:-postgres}" + POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" + POSTGRES_HOST: "${POSTGRES_HOST:-postgres}" + POSTGRES_PORT: "${POSTGRES_PORT:-5432}" + HATCHET_POSTGRES_DBNAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}" + depends_on: + postgres: + condition: service_healthy + networks: + - r2r-network + + hatchet-migration: + image: ghcr.io/hatchet-dev/hatchet/hatchet-migrate:latest + environment: + POSTGRES_USER: "${POSTGRES_USER:-postgres}" + POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" + POSTGRES_HOST: "${POSTGRES_HOST:-postgres}" + POSTGRES_PORT: "${POSTGRES_PORT:-5432}" + HATCHET_POSTGRES_DBNAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}" + DATABASE_URL: "postgres://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@${POSTGRES_HOST:-postgres}:${POSTGRES_PORT:-5432}/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable" + DATABASE_POSTGRES_PORT: "${POSTGRES_PORT:-5432}" + DATABASE_POSTGRES_HOST: "${POSTGRES_HOST:-postgres}" + DATABASE_POSTGRES_USERNAME: "${POSTGRES_USER:-postgres}" + DATABASE_POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" + HATCHET_DATABASE_POSTGRES_DB_NAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}" + depends_on: + hatchet-create-db: + condition: service_completed_successfully + postgres: + condition: service_healthy + networks: + - r2r-network + + hatchet-setup-config: + image: ghcr.io/hatchet-dev/hatchet/hatchet-admin:latest + command: /hatchet/hatchet-admin quickstart --skip certs --generated-config-dir /hatchet/config --overwrite=false + environment: + POSTGRES_USER: "${POSTGRES_USER:-postgres}" + POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" + POSTGRES_HOST: "${POSTGRES_HOST:-postgres}" + POSTGRES_PORT: "${POSTGRES_PORT:-5432}" + HATCHET_POSTGRES_DBNAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}" + HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH: "${HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH:-13421772800}" + HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH: "${HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH:-13421772800}" + DATABASE_URL: "postgres://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@${POSTGRES_HOST:-postgres}:${POSTGRES_PORT:-5432}/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable" + DATABASE_POSTGRES_PORT: "${POSTGRES_PORT:-5432}" + DATABASE_POSTGRES_HOST: "${POSTGRES_HOST:-postgres}" + DATABASE_POSTGRES_USERNAME: "${POSTGRES_USER:-postgres}" + DATABASE_POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" + HATCHET_DATABASE_POSTGRES_DB_NAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}" + SERVER_TASKQUEUE_RABBITMQ_URL: amqp://user:password@hatchet-rabbitmq:5672/ + SERVER_AUTH_COOKIE_DOMAIN: "http://host.docker.internal:7274" + SERVER_URL: "http://host.docker.internal:7274" + SERVER_AUTH_COOKIE_INSECURE: "t" + SERVER_GRPC_BIND_ADDRESS: "0.0.0.0" + SERVER_GRPC_INSECURE: "t" + SERVER_GRPC_BROADCAST_ADDRESS: "host.docker.internal:7077" + SERVER_GRPC_MAX_MSG_SIZE: 13421772800 + volumes: + - hatchet_certs:/hatchet/certs + - hatchet_config:/hatchet/config + depends_on: + hatchet-migration: + condition: service_completed_successfully + hatchet-rabbitmq: + condition: service_healthy + postgres: + condition: service_healthy + networks: + - r2r-network + + hatchet-engine: + image: ghcr.io/hatchet-dev/hatchet/hatchet-engine:latest + command: /hatchet/hatchet-engine --config /hatchet/config + restart: on-failure + depends_on: + hatchet-setup-config: + condition: service_completed_successfully + ports: + - "7077:7077" + environment: + POSTGRES_USER: "${POSTGRES_USER:-postgres}" + POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" + POSTGRES_HOST: "${POSTGRES_HOST:-postgres}" + POSTGRES_PORT: "${POSTGRES_PORT:-5432}" + HATCHET_POSTGRES_DBNAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}" + DATABASE_URL: "postgres://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@${POSTGRES_HOST:-postgres}:${POSTGRES_PORT:-5432}/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable" + SERVER_GRPC_BROADCAST_ADDRESS: "host.docker.internal:7077" + SERVER_GRPC_BIND_ADDRESS: "0.0.0.0" + SERVER_GRPC_PORT: 7077 + SERVER_GRPC_INSECURE: "t" + SERVER_GRPC_MAX_MSG_SIZE: 13421772800 + volumes: + - hatchet_certs:/hatchet/certs + - hatchet_config:/hatchet/config + networks: + r2r-network: + aliases: + - host.docker.internal + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:7077/health"] + interval: 10s + timeout: 5s + retries: 5 + + hatchet-api: + image: ghcr.io/hatchet-dev/hatchet/hatchet-api:latest + command: /hatchet/hatchet-api --config /hatchet/config + restart: on-failure + depends_on: + hatchet-setup-config: + condition: service_completed_successfully + environment: + POSTGRES_USER: "${POSTGRES_USER:-postgres}" + POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" + POSTGRES_HOST: "${POSTGRES_HOST:-postgres}" + POSTGRES_PORT: "${POSTGRES_PORT:-5432}" + HATCHET_POSTGRES_DBNAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}" + DATABASE_URL: "postgres://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@${POSTGRES_HOST:-postgres}:${POSTGRES_PORT:-5432}/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable" + volumes: + - hatchet_certs:/hatchet/certs + - hatchet_config:/hatchet/config + networks: + - r2r-network + labels: + - "traefik.enable=true" + - "traefik.http.routers.hatchet-api.rule=PathPrefix(`/api`)" + - "traefik.http.services.hatchet-api.loadbalancer.server.port=8080" + - "traefik.http.routers.hatchet-api.entrypoints=hatchet" + + hatchet-dashboard: + image: ghcr.io/hatchet-dev/hatchet/hatchet-frontend:latest + networks: + - r2r-network + labels: + - "traefik.enable=true" + - "traefik.http.routers.hatchet-dashboard.rule=PathPrefix(`/`)" + - "traefik.http.services.hatchet-frontend.loadbalancer.server.port=80" + - "traefik.http.routers.hatchet-dashboard.entrypoints=hatchet" + + setup-token: + image: ghcr.io/hatchet-dev/hatchet/hatchet-admin:latest + command: > + sh -c " + set -e + echo 'Starting token creation process...' + + # Attempt to create token and capture both stdout and stderr + TOKEN_OUTPUT=$$(/hatchet/hatchet-admin token create --config /hatchet/config --tenant-id 707d0855-80ab-4e1f-a156-f1c4546cbf52 2>&1) + + # Extract the token (assuming it's the only part that looks like a JWT) + TOKEN=$$(echo \"$$TOKEN_OUTPUT\" | grep -Eo 'eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*') + + if [ -z \"$$TOKEN\" ]; then + echo 'Error: Failed to extract token. Full command output:' >&2 + echo \"$$TOKEN_OUTPUT\" >&2 + exit 1 + fi + + echo \"$$TOKEN\" > /tmp/hatchet_api_key + echo 'Token created and saved to /tmp/hatchet_api_key' + + # Copy token to final destination + echo -n \"$$TOKEN\" > /hatchet_api_key/api_key.txt + echo 'Token copied to /hatchet_api_key/api_key.txt' + + # Verify token was copied correctly + if [ \"$$(cat /tmp/hatchet_api_key)\" != \"$(cat /hatchet_api_key/api_key.txt)\" ]; then + echo 'Error: Token copy failed, files do not match' >&2 + echo 'Content of /tmp/hatchet_api_key:' + cat /tmp/hatchet_api_key + echo 'Content of /hatchet_api_key/api_key.txt:' + cat /hatchet_api_key/api_key.txt + exit 1 + fi + + echo 'Hatchet API key has been saved successfully' + echo 'Token length:' $${#TOKEN} + echo 'Token (first 20 chars):' $${TOKEN:0:20} + echo 'Token structure:' $$(echo $$TOKEN | awk -F. '{print NF-1}') 'parts' + # Check each part of the token + for i in 1 2 3; do + PART=$$(echo $$TOKEN | cut -d. -f$$i) + echo 'Part' $$i 'length:' $${#PART} + echo 'Part' $$i 'base64 check:' $$(echo $$PART | base64 -d >/dev/null 2>&1 && echo 'Valid' || echo 'Invalid') + done + # Final validation attempt + if ! echo $$TOKEN | awk -F. '{print $$2}' | base64 -d 2>/dev/null | jq . >/dev/null 2>&1; then + echo 'Warning: Token payload is not valid JSON when base64 decoded' >&2 + else + echo 'Token payload appears to be valid JSON' + fi + " + networks: + - r2r-network + environment: + POSTGRES_USER: "${POSTGRES_USER:-postgres}" + POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" + POSTGRES_HOST: "${POSTGRES_HOST:-postgres}" + POSTGRES_PORT: "${POSTGRES_PORT:-5432}" + POSTGRES_DBNAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}" + DATABASE_URL: "postgres://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@${POSTGRES_HOST:-postgres}:${POSTGRES_PORT:-5432}/${POSTGRES_DBNAME:-postgres}?sslmode=disable" + DATABASE_POSTGRES_PORT: "${POSTGRES_PORT:-5432}" + DATABASE_POSTGRES_HOST: "${POSTGRES_HOST:-postgres}" + DATABASE_POSTGRES_USERNAME: "${POSTGRES_USER:-postgres}" + DATABASE_POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" + DATABASE_POSTGRES_DB_NAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}" + volumes: + - hatchet_certs:/hatchet/certs + - hatchet_config:/hatchet/config + - hatchet_api_key:/hatchet_api_key + depends_on: + postgres: + condition: service_healthy + hatchet-setup-config: + condition: service_completed_successfully + + unstructured: + image: ${UNSTRUCTURED_IMAGE:-ragtoriches/unst-prod} + ports: + - "7275:7275" + networks: + - r2r-network + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:7275/health"] + interval: 10s + timeout: 5s + retries: 5 + r2r: + image: ${R2R_IMAGE:-ragtoriches/prod:latest-unstructured} + build: + context: . + args: + PORT: ${PORT:-7272} + ports: + - "${PORT:-7272}:${PORT:-7272}" + environment: + - PYTHONUNBUFFERED=1 + - PORT=${PORT:-7272} + - HOST=${HOST:-0.0.0.0} + - CONFIG_NAME=${CONFIG_NAME:-} + - CONFIG_PATH=${CONFIG_PATH:-} + - POSTGRES_USER=${POSTGRES_USER:-postgres} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres} + - POSTGRES_HOST=${POSTGRES_HOST:-postgres} + - POSTGRES_PORT=${POSTGRES_PORT:-5432} + - POSTGRES_DBNAME=${POSTGRES_DBNAME:-postgres} + - POSTGRES_PROJECT_NAME=${POSTGRES_PROJECT_NAME:-r2r_default} + - OPENAI_API_KEY=${OPENAI_API_KEY:-} + - OPENAI_API_BASE=${OPENAI_API_BASE:-} + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-} + - AZURE_API_KEY=${AZURE_API_KEY:-} + - AZURE_API_BASE=${AZURE_API_BASE:-} + - AZURE_API_VERSION=${AZURE_API_VERSION:-} + - GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS:-} + - VERTEX_PROJECT=${VERTEX_PROJECT:-} + - VERTEX_LOCATION=${VERTEX_LOCATION:-} + - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-} + - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-} + - AWS_REGION_NAME=${AWS_REGION_NAME:-} + - GROQ_API_KEY=${GROQ_API_KEY:-} + - COHERE_API_KEY=${COHERE_API_KEY:-} + - ANYSCALE_API_KEY=${ANYSCALE_API_KEY:-} + - OLLAMA_API_BASE=${OLLAMA_API_BASE:-http://host.docker.internal:11434} + - UNSTRUCTURED_API_KEY=${UNSTRUCTURED_API_KEY:-} + - UNSTRUCTURED_API_URL=${UNSTRUCTURED_API_URL:-https://api.unstructured.io/general/v0/general} + - UNSTRUCTURED_LOCAL_URL=${UNSTRUCTURED_LOCAL_URL:-http://unstructured:7275} + - UNSTRUCTURED_NUM_WORKERS=${UNSTRUCTURED_NUM_WORKERS:-10} + - HATCHET_CLIENT_TLS_STRATEGY=none + - HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH=${HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH:-13421772800} + - HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH=${HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH:-13421772800} + command: > + sh -c ' + if [ -z "$${HATCHET_CLIENT_TOKEN}" ]; then + export HATCHET_CLIENT_TOKEN=$$(cat /hatchet_api_key/api_key.txt) + fi + exec uvicorn core.main.app_entry:app --host $${HOST} --port $${PORT} + ' + networks: + - r2r-network + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:${PORT:-7272}/v2/health"] + interval: 6s + timeout: 5s + retries: 5 + restart: on-failure + volumes: + - ${CONFIG_PATH:-/}:${CONFIG_PATH:-/app/config} + - hatchet_api_key:/hatchet_api_key:ro + labels: + - "traefik.enable=true" + - "traefik.http.routers.r2r.rule=PathPrefix(`/r2r-api`)" + - "traefik.http.services.r2r.loadbalancer.server.port=${PORT:-7272}" + - "traefik.http.middlewares.r2r-strip-prefix.stripprefix.prefixes=/api" + - "traefik.http.middlewares.r2r-strip-prefix.stripprefix.prefixes=/r2r-api" + - "traefik.http.middlewares.r2r-add-v2.addprefix.prefix=/v2" + - "traefik.http.routers.r2r.middlewares=r2r-strip-prefix,r2r-add-v2,r2r-headers" + - "traefik.http.middlewares.r2r-headers.headers.customrequestheaders.Access-Control-Allow-Origin=*" + - "traefik.http.middlewares.r2r-headers.headers.customrequestheaders.Access-Control-Allow-Methods=GET,POST,OPTIONS" + - "traefik.http.middlewares.r2r-headers.headers.customrequestheaders.Access-Control-Allow-Headers=DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization" + - "traefik.http.middlewares.r2r-headers.headers.customresponseheaders.Access-Control-Expose-Headers=Content-Length,Content-Range" + extra_hosts: + - host.docker.internal:host-gateway + depends_on: + setup-token: + condition: service_completed_successfully + postgres: + condition: service_healthy + unstructured: + condition: service_healthy + + r2r-dashboard: + image: emrgntcmplxty/r2r-dashboard:latest + environment: + - NEXT_PUBLIC_API_URL=http://localhost:${R2R_DASHBOARD_PORT:-8001}/r2r-api + networks: + - r2r-network + labels: + - "traefik.enable=true" + - "traefik.http.routers.r2r-dashboard.rule=PathPrefix(`/`)" + - "traefik.http.services.r2r-dashboard.loadbalancer.server.port=3000" + - "traefik.http.routers.r2r-dashboard.entrypoints=r2r" + + traefik: + image: traefik:v2.9 + command: + - "--api.insecure=true" + - "--providers.docker=true" + - "--providers.docker.exposedbydefault=false" + - "--entrypoints.r2r.address=:${R2R_DASHBOARD_PORT:-8001}" + - "--entrypoints.hatchet.address=:${HATCHET_DASHBOARD_PORT:-8002}" + - "--accesslog=true" + - "--accesslog.filepath=/var/log/traefik/access.log" + ports: + - "${R2R_DASHBOARD_PORT:-8001}:${R2R_DASHBOARD_PORT:-8001}" + - "${HATCHET_DASHBOARD_PORT:-8002}:${HATCHET_DASHBOARD_PORT:-8002}" + - "${TRAEFIK_DASHBOARD_PORT:-8081}:${TRAEFIK_DASHBOARD_PORT:-8081}" + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + networks: + - r2r-network + labels: + - "traefik.http.middlewares.no-cache-headers.headers.customResponseHeaders.Cache-Control=no-cache, no-store, must-revalidate" + - "traefik.http.routers.hatchet-api.middlewares=no-cache-headers" diff --git a/py/compose.hatchet.yaml b/py/compose.hatchet.yaml deleted file mode 100644 index 1032b34a8..000000000 --- a/py/compose.hatchet.yaml +++ /dev/null @@ -1,186 +0,0 @@ -networks: - r2r-network: - name: r2r-network - -services: - hatchet-rabbitmq: - image: "rabbitmq:3-management" - hostname: "hatchet-rabbitmq" - ports: - - "5673:5672" - - "15673:15672" - environment: - RABBITMQ_DEFAULT_USER: "user" - RABBITMQ_DEFAULT_PASS: "password" - volumes: - - "hatchet_rabbitmq_data:/var/lib/rabbitmq" - - "hatchet_rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf" - healthcheck: - test: ["CMD", "rabbitmqctl", "status"] - interval: 10s - timeout: 10s - retries: 5 - networks: - - r2r-network - hatchet-create-db: - image: postgres:latest - command: > - sh -c " - set -e - echo 'Waiting for PostgreSQL to be ready...' - while ! pg_isready -h postgres -p 5432 -U postgres; do - sleep 1 - done - echo 'PostgreSQL is ready, checking if database exists...' - if ! PGPASSWORD=${POSTGRES_PASSWORD:-postgres} psql -h postgres -p 5432 -U ${POSTGRES_USER:-postgres} -lqt | grep -qw ${HATCHET_POSTGRES_DBNAME:-hatchet}; then - echo 'Database does not exist, creating it...' - PGPASSWORD=${POSTGRES_PASSWORD:-postgres} createdb -h postgres -p 5432 -U ${POSTGRES_USER:-postgres} -w ${HATCHET_POSTGRES_DBNAME:-hatchet} - else - echo 'Database already exists, skipping creation.' - fi - " - environment: - POSTGRES_USER: "${POSTGRES_USER:-postgres}" - POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" - POSTGRES_HOST: "${POSTGRES_HOST:-postgres}" - POSTGRES_PORT: "${POSTGRES_PORT:-5432}" - HATCHET_POSTGRES_DBNAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}" - depends_on: - postgres: - condition: service_healthy - networks: - - r2r-network - hatchet-migration: - image: ghcr.io/hatchet-dev/hatchet/hatchet-migrate:latest - environment: - POSTGRES_USER: "${POSTGRES_USER:-postgres}" - POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" - POSTGRES_HOST: "${POSTGRES_HOST:-postgres}" - POSTGRES_PORT: "${POSTGRES_PORT:-5432}" - HATCHET_POSTGRES_DBNAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}" - DATABASE_URL: "postgres://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@${POSTGRES_HOST:-postgres}:${POSTGRES_PORT:-5432}/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable" - DATABASE_POSTGRES_PORT: "${POSTGRES_PORT:-5432}" - DATABASE_POSTGRES_HOST: "${POSTGRES_HOST:-postgres}" - DATABASE_POSTGRES_USERNAME: "${POSTGRES_USER:-postgres}" - DATABASE_POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" - HATCHET_DATABASE_POSTGRES_DB_NAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}" - depends_on: - hatchet-create-db: - condition: service_completed_successfully - postgres: - condition: service_healthy - networks: - - r2r-network - - hatchet-setup-config: - image: ghcr.io/hatchet-dev/hatchet/hatchet-admin:latest - command: /hatchet/hatchet-admin quickstart --skip certs --generated-config-dir /hatchet/config --overwrite=false - environment: - POSTGRES_USER: "${POSTGRES_USER:-postgres}" - POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" - POSTGRES_HOST: "${POSTGRES_HOST:-postgres}" - POSTGRES_PORT: "${POSTGRES_PORT:-5432}" - HATCHET_POSTGRES_DBNAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}" - DATABASE_URL: "postgres://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@${POSTGRES_HOST:-postgres}:${POSTGRES_PORT:-5432}/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable" - DATABASE_POSTGRES_PORT: "${POSTGRES_PORT:-5432}" - DATABASE_POSTGRES_HOST: "${POSTGRES_HOST:-postgres}" - DATABASE_POSTGRES_USERNAME: "${POSTGRES_USER:-postgres}" - DATABASE_POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" - HATCHET_DATABASE_POSTGRES_DB_NAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}" - SERVER_TASKQUEUE_RABBITMQ_URL: amqp://user:password@hatchet-rabbitmq:5672/ - SERVER_AUTH_COOKIE_DOMAIN: "http://host.docker.internal:7274" - SERVER_URL: "http://host.docker.internal:7274" - SERVER_AUTH_COOKIE_INSECURE: "t" - SERVER_GRPC_BIND_ADDRESS: "0.0.0.0" - SERVER_GRPC_INSECURE: "t" - SERVER_GRPC_BROADCAST_ADDRESS: "host.docker.internal:7077" - SERVER_GRPC_MAX_MSG_SIZE: 13421772800 - - volumes: - - hatchet_certs:/hatchet/certs - - hatchet_config:/hatchet/config - depends_on: - hatchet-migration: - condition: service_completed_successfully - hatchet-rabbitmq: - condition: service_healthy - postgres: - condition: service_healthy - networks: - - r2r-network - - hatchet-engine: - image: ghcr.io/hatchet-dev/hatchet/hatchet-engine:latest - command: /hatchet/hatchet-engine --config /hatchet/config - restart: on-failure - depends_on: - hatchet-setup-config: - condition: service_completed_successfully - ports: - - "7077:7077" - environment: - POSTGRES_USER: "${POSTGRES_USER:-postgres}" - POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" - POSTGRES_HOST: "${POSTGRES_HOST:-postgres}" - POSTGRES_PORT: "${POSTGRES_PORT:-5432}" - HATCHET_POSTGRES_DBNAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}" - DATABASE_URL: "postgres://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@${POSTGRES_HOST:-postgres}:${POSTGRES_PORT:-5432}/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable" - SERVER_GRPC_BROADCAST_ADDRESS: "host.docker.internal:7077" - SERVER_GRPC_BIND_ADDRESS: "0.0.0.0" - SERVER_GRPC_PORT: 7077 - SERVER_GRPC_INSECURE: "t" - SERVER_GRPC_MAX_MSG_SIZE: 13421772800 - volumes: - - hatchet_certs:/hatchet/certs - - hatchet_config:/hatchet/config - networks: - r2r-network: - aliases: - - host.docker.internal - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:7077/health"] - interval: 10s - timeout: 5s - retries: 5 - - hatchet-api: - image: ghcr.io/hatchet-dev/hatchet/hatchet-api:latest - command: /hatchet/hatchet-api --config /hatchet/config - restart: on-failure - depends_on: - hatchet-setup-config: - condition: service_completed_successfully - environment: - POSTGRES_USER: "${POSTGRES_USER:-postgres}" - POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" - POSTGRES_HOST: "${POSTGRES_HOST:-postgres}" - POSTGRES_PORT: "${POSTGRES_PORT:-5432}" - HATCHET_POSTGRES_DBNAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}" - DATABASE_URL: "postgres://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@${POSTGRES_HOST:-postgres}:${POSTGRES_PORT:-5432}/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable" - volumes: - - hatchet_certs:/hatchet/certs - - hatchet_config:/hatchet/config - networks: - - r2r-network - labels: - - "traefik.enable=true" - - "traefik.http.routers.hatchet-api.rule=PathPrefix(`/api`)" - - "traefik.http.services.hatchet-api.loadbalancer.server.port=8080" - - "traefik.http.routers.hatchet-api.entrypoints=hatchet" - - hatchet-dashboard: - image: ghcr.io/hatchet-dev/hatchet/hatchet-frontend:latest - networks: - - r2r-network - labels: - - "traefik.enable=true" - - "traefik.http.routers.hatchet-dashboard.rule=PathPrefix(`/`)" - - "traefik.http.services.hatchet-frontend.loadbalancer.server.port=80" - - "traefik.http.routers.hatchet-dashboard.entrypoints=hatchet" - -volumes: - hatchet_rabbitmq_data: - hatchet_rabbitmq.conf: - hatchet_config: - hatchet_certs: - hatchet_api_key: diff --git a/py/compose.neo4j.yaml b/py/compose.neo4j.yaml deleted file mode 100644 index 689ef30d8..000000000 --- a/py/compose.neo4j.yaml +++ /dev/null @@ -1,41 +0,0 @@ -networks: - r2r-network: - name: r2r-network - -services: - r2r: - depends_on: - neo4j: - condition: service_healthy - - neo4j: - image: neo4j:5.21.0 - ports: - - "7474:7474" # HTTP - - "7687:7687" # Bolt - environment: - - NEO4J_AUTH=${NEO4J_AUTH:-neo4j/ineedastrongerpassword} - - NEO4J_server_memory_pagecache_size=${NEO4J_PAGECACHE_SIZE:-2G} - - NEO4J_server_memory_heap_max__size=${NEO4J_HEAP_SIZE:-2G} - - NEO4J_apoc_export_file_enabled=true - - NEO4J_apoc_import_file_enabled=true - - NEO4J_apoc_import_file_use__neo4j__config=true - - NEO4J_PLUGINS=["apoc", "graph-data-science"] - - NEO4J_dbms_security_procedures_unrestricted=apoc.*,gds.* - - NEO4J_dbms_security_procedures_allowlist=apoc.*,gds.* - volumes: - - neo4j_data:/data - - neo4j_logs:/logs - - neo4j_plugins:/plugins - networks: - - r2r-network - healthcheck: - test: ["CMD", "neo4j", "status"] - interval: 10s - timeout: 5s - retries: 5 - -volumes: - neo4j_data: - neo4j_logs: - neo4j_plugins: diff --git a/py/compose.ollama.yaml b/py/compose.ollama.yaml deleted file mode 100644 index 9765cc1cb..000000000 --- a/py/compose.ollama.yaml +++ /dev/null @@ -1,28 +0,0 @@ -networks: - r2r-network: - name: r2r-network - -services: - r2r: - depends_on: - ollama: - condition: service_healthy - - ollama: - image: ollama/ollama - ports: - - "11434:11434" - environment: - - OLLAMA_HOST=0.0.0.0 - volumes: - - ollama_data:/root/.ollama - networks: - - r2r-network - healthcheck: - test: ["CMD", "ollama", "ps"] - interval: 10s - timeout: 5s - retries: 5 - -volumes: - ollama_data: diff --git a/py/compose.postgres.yaml b/py/compose.postgres.yaml deleted file mode 100644 index 31d189f23..000000000 --- a/py/compose.postgres.yaml +++ /dev/null @@ -1,26 +0,0 @@ -networks: - r2r-network: - name: r2r-network - -services: - postgres: - image: pgvector/pgvector:pg16 - environment: - - POSTGRES_USER=${POSTGRES_USER:-postgres} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres} - - POSTGRES_DB=${POSTGRES_DBNAME:-postgres} - volumes: - - postgres_data:/var/lib/postgresql/data - networks: - - r2r-network - ports: - - "5432:5432" - healthcheck: - test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-postgres}"] - interval: 10s - timeout: 5s - retries: 5 - restart: on-failure - -volumes: - postgres_data: diff --git a/py/compose.yaml b/py/compose.yaml index 931b7b7a4..4b20c2d72 100644 --- a/py/compose.yaml +++ b/py/compose.yaml @@ -1,12 +1,6 @@ x-depends-on: - neo4j: &neo4j-dependency - condition: service_healthy - ollama: &ollama-dependency - condition: service_healthy postgres: &postgres-dependency condition: service_healthy - hatchet-engine: &hatchet-dependency - condition: service_healthy networks: r2r-network: @@ -21,85 +15,26 @@ networks: - "com.docker.compose.recreate=always" services: - - setup-token: - image: ghcr.io/hatchet-dev/hatchet/hatchet-admin:latest - - command: > - sh -c " - set -e - echo 'Starting token creation process...' - - # Attempt to create token and capture both stdout and stderr - TOKEN_OUTPUT=$$(/hatchet/hatchet-admin token create --config /hatchet/config --tenant-id 707d0855-80ab-4e1f-a156-f1c4546cbf52 2>&1) - - # Extract the token (assuming it's the only part that looks like a JWT) - TOKEN=$$(echo "$$TOKEN_OUTPUT" | grep -Eo 'eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*') - - if [ -z "$$TOKEN" ]; then - echo 'Error: Failed to extract token. Full command output:' >&2 - echo "$$TOKEN_OUTPUT" >&2 - exit 1 - fi - - echo "$$TOKEN" > /tmp/hatchet_api_key - echo 'Token created and saved to /tmp/hatchet_api_key' - - # Copy token to final destination - echo -n "$$TOKEN" > /hatchet_api_key/api_key.txt - echo 'Token copied to /hatchet_api_key/api_key.txt' - - # Verify token was copied correctly - if [ "$$(cat /tmp/hatchet_api_key)" != "$$(cat /hatchet_api_key/api_key.txt)" ]; then - echo 'Error: Token copy failed, files do not match' >&2 - echo 'Content of /tmp/hatchet_api_key:' - cat /tmp/hatchet_api_key - echo 'Content of /hatchet_api_key/api_key.txt:' - cat /hatchet_api_key/api_key.txt - exit 1 - fi - - echo 'Hatchet API key has been saved successfully' - echo 'Token length:' $${#TOKEN} - echo 'Token (first 20 chars):' $${TOKEN:0:20} - echo 'Token structure:' $$(echo $$TOKEN | awk -F. '{print NF-1}') 'parts' - # Check each part of the token - for i in 1 2 3; do - PART=$$(echo $$TOKEN | cut -d. -f$$i) - echo 'Part' $$i 'length:' $${#PART} - echo 'Part' $$i 'base64 check:' $$(echo $$PART | base64 -d >/dev/null 2>&1 && echo 'Valid' || echo 'Invalid') - done - # Final validation attempt - if ! echo $$TOKEN | awk -F. '{print $$2}' | base64 -d 2>/dev/null | jq . >/dev/null 2>&1; then - echo 'Warning: Token payload is not valid JSON when base64 decoded' >&2 - else - echo 'Token payload appears to be valid JSON' - fi - " - - networks: - - r2r-network + postgres: + image: pgvector/pgvector:pg16 environment: - POSTGRES_USER: "${POSTGRES_USER:-postgres}" - POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" - POSTGRES_HOST: "${POSTGRES_HOST:-postgres}" - POSTGRES_PORT: "${POSTGRES_PORT:-5432}" - POSTGRES_DBNAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}" - DATABASE_URL: "postgres://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@${POSTGRES_HOST:-postgres}:${POSTGRES_PORT:-5432}/${POSTGRES_DBNAME:-postgres}?sslmode=disable" - DATABASE_POSTGRES_PORT: "${POSTGRES_PORT:-5432}" - DATABASE_POSTGRES_HOST: "${POSTGRES_HOST:-postgres}" - DATABASE_POSTGRES_USERNAME: "${POSTGRES_USER:-postgres}" - DATABASE_POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" - DATABASE_POSTGRES_DB_NAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}" + - POSTGRES_USER=${POSTGRES_USER:-postgres} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres} + - POSTGRES_DB=${POSTGRES_DBNAME:-postgres} + - POSTGRES_MAX_CONNECTIONS=${POSTGRES_MAX_CONNECTIONS:-1024} volumes: - - hatchet_certs:/hatchet/certs - - hatchet_config:/hatchet/config - - hatchet_api_key:/hatchet_api_key - depends_on: - postgres: - condition: service_healthy - hatchet-setup-config: - condition: service_completed_successfully + - postgres_data:/var/lib/postgresql/data + networks: + - r2r-network + ports: + - "5432:5432" + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-postgres}"] + interval: 10s + timeout: 5s + retries: 5 + restart: on-failure + r2r: image: ${R2R_IMAGE:-ragtoriches/prod:latest-unstructured} build: @@ -124,13 +59,7 @@ services: - POSTGRES_PORT=${POSTGRES_PORT:-5432} - POSTGRES_DBNAME=${POSTGRES_DBNAME:-postgres} - POSTGRES_PROJECT_NAME=${POSTGRES_PROJECT_NAME:-${CONFIG_NAME:-vecs}} - - POSTGRES_VECS_COLLECTION=${POSTGRES_VECS_COLLECTION:-${CONFIG_NAME:-vecs}} - - # Neo4j - - NEO4J_USER=${NEO4J_USER:-neo4j} - - NEO4J_PASSWORD=${NEO4J_PASSWORD:-ineedastrongerpassword} - - NEO4J_URL=${NEO4J_URL:-bolt://neo4j:7687} - - NEO4J_DATABASE=${NEO4J_DATABASE:-neo4j} + - POSTGRES_MAX_CONNECTIONS=${POSTGRES_MAX_CONNECTIONS:-} # OpenAI - OPENAI_API_KEY=${OPENAI_API_KEY:-} @@ -166,25 +95,6 @@ services: # Ollama - OLLAMA_API_BASE=${OLLAMA_API_BASE:-http://host.docker.internal:11434} - # Unstructured - - UNSTRUCTURED_API_KEY=${UNSTRUCTURED_API_KEY:-} - - UNSTRUCTURED_API_URL=${UNSTRUCTURED_API_URL:-https://api.unstructured.io/general/v0/general} - - UNSTRUCTURED_LOCAL_URL=${UNSTRUCTURED_LOCAL_URL:-http://unstructured:7275} - - UNSTRUCTURED_NUM_WORKERS=${UNSTRUCTURED_NUM_WORKERS:-10} - - # Hatchet - - HATCHET_CLIENT_TLS_STRATEGY=none - - HATCHET_CLIENT_TOKEN - - HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH=${HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH:-13421772800} - HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH=${HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH:-13421772800} - command: > - sh -c ' - if [ -z "$${HATCHET_CLIENT_TOKEN}" ]; then - export HATCHET_CLIENT_TOKEN=$$(cat /hatchet_api_key/api_key.txt) - fi - exec uvicorn core.main.app_entry:app --host $${HOST} --port $${PORT} - ' networks: - r2r-network healthcheck: @@ -195,7 +105,6 @@ services: restart: on-failure volumes: - ${CONFIG_PATH:-/}:${CONFIG_PATH:-/app/config} - - hatchet_api_key:/hatchet_api_key:ro labels: - "traefik.enable=true" - "traefik.http.routers.r2r.rule=PathPrefix(`/r2r-api`)" @@ -211,14 +120,8 @@ services: extra_hosts: - host.docker.internal:host-gateway depends_on: - setup-token: - condition: service_completed_successfully postgres: condition: service_healthy - neo4j: - condition: service_healthy - unstructured: - condition: service_healthy r2r-dashboard: image: emrgntcmplxty/r2r-dashboard:latest @@ -239,12 +142,10 @@ services: - "--providers.docker=true" - "--providers.docker.exposedbydefault=false" - "--entrypoints.r2r.address=:${R2R_DASHBOARD_PORT:-8001}" - - "--entrypoints.hatchet.address=:${HATCHET_DASHBOARD_PORT:-8002}" - "--accesslog=true" - "--accesslog.filepath=/var/log/traefik/access.log" ports: - "${R2R_DASHBOARD_PORT:-8001}:${R2R_DASHBOARD_PORT:-8001}" - - "${HATCHET_DASHBOARD_PORT:-8002}:${HATCHET_DASHBOARD_PORT:-8002}" - "${TRAEFIK_DASHBOARD_PORT:-8081}:${TRAEFIK_DASHBOARD_PORT:-8081}" volumes: - /var/run/docker.sock:/var/run/docker.sock:ro @@ -254,19 +155,5 @@ services: - "traefik.http.middlewares.no-cache-headers.headers.customResponseHeaders.Cache-Control=no-cache, no-store, must-revalidate" - "traefik.http.routers.hatchet-api.middlewares=no-cache-headers" - unstructured: - image: ${UNSTRUCTURED_IMAGE:-ragtoriches/unst-prod} - ports: - - "7275:7275" - networks: - - r2r-network - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:7275/health"] - interval: 10s - timeout: 5s - retries: 5 - volumes: - hatchet_certs: - hatchet_config: - hatchet_api_key: + postgres_data: \ No newline at end of file diff --git a/py/core/__init__.py b/py/core/__init__.py index b554282ce..be0ec7f7d 100644 --- a/py/core/__init__.py +++ b/py/core/__init__.py @@ -4,7 +4,6 @@ # corresponding flake8 error codes are F403, F405 from .agent import * from .base import * -from .integrations import * from .main import * from .parsers import * from .pipelines import * @@ -30,6 +29,9 @@ # Optional: Prevent propagation to the root logger logger.propagate = False +logging.getLogger("httpx").setLevel(logging.WARNING) +logging.getLogger("LiteLLM").setLevel(logging.WARNING) + __all__ = [ ## AGENT @@ -50,10 +52,10 @@ "DataType", "Document", "DocumentExtraction", - "DocumentFragment", "DocumentInfo", "IngestionStatus", - "RestructureStatus", + "KGCreationStatus", + "KGEnrichmentStatus", "DocumentType", # Embedding abstractions "EmbeddingPurpose", @@ -133,12 +135,6 @@ # Auth provider "AuthConfig", "AuthProvider", - # Chunking provider - "UnstructuredChunkingConfig", - "R2RChunkingConfig", - "ChunkingConfig", - "ChunkingProvider", - "Strategy", # Crypto provider "CryptoConfig", "CryptoProvider", @@ -156,10 +152,6 @@ # LLM provider "CompletionConfig", "CompletionProvider", - # Parsing provider - "ParsingConfig", - "ParsingProvider", - "OverrideParser", # Prompt provider "PromptConfig", "PromptProvider", @@ -175,9 +167,6 @@ "RelationshipType", "format_entity_types", "format_relations", - ## INTEGRATIONS - # Serper - "SerperClient", ## MAIN ## R2R ABSTRACTIONS "R2RProviders", @@ -203,19 +192,18 @@ "IngestionRouter", "ManagementRouter", "RetrievalRouter", - "RestructureRouter", + "KGRouter", ## R2R SERVICES "AuthService", "IngestionService", "ManagementService", "RetrievalService", - "RestructureService", + "KgService", ## PARSERS # Media parsers "AudioParser", "DOCXParser", "ImageParser", - "MovieParser", "PDFParser", "PDFParserUnstructured", "PDFParserMarker", @@ -238,7 +226,6 @@ "EmbeddingPipe", "KGTriplesExtractionPipe", "ParsingPipe", - "ChunkingPipe", "QueryTransformPipe", "SearchRAGPipe", "StreamingSearchRAGPipe", @@ -250,9 +237,6 @@ # Auth "SupabaseAuthProvider", "R2RAuthProvider", - # Chunking - "R2RChunkingProvider", - "UnstructuredChunkingProvider", # Crypto "BCryptProvider", "BCryptConfig", @@ -263,13 +247,14 @@ "OllamaEmbeddingProvider", "OpenAIEmbeddingProvider", # KG - "Neo4jKGProvider", + "PostgresKGProvider", # LLM "OpenAICompletionProvider", "LiteCompletionProvider", - # Parsing - "R2RParsingProvider", - "UnstructuredParsingProvider", + # Ingestion + "UnstructuredIngestionProvider", + "R2RIngestionProvider", + "ChunkingStrategy", # Prompts "R2RPromptProvider", ] diff --git a/py/core/base/__init__.py b/py/core/base/__init__.py index b3f34af79..4a42d0307 100644 --- a/py/core/base/__init__.py +++ b/py/core/base/__init__.py @@ -20,10 +20,10 @@ "DataType", "Document", "DocumentExtraction", - "DocumentFragment", "DocumentInfo", "IngestionStatus", - "RestructureStatus", + "KGCreationStatus", + "KGEnrichmentStatus", "DocumentType", # Embedding abstractions "EmbeddingPurpose", @@ -48,7 +48,7 @@ "KGSearchSettings", "VectorSearchResult", "VectorSearchSettings", - # Restructure abstractions + # KG abstractions "KGCreationSettings", "KGEnrichmentSettings", # User abstractions @@ -107,12 +107,6 @@ # Auth provider "AuthConfig", "AuthProvider", - # Chunking provider - "R2RChunkingConfig", - "UnstructuredChunkingConfig", - "ChunkingConfig", - "ChunkingProvider", - "Strategy", # Crypto provider "CryptoConfig", "CryptoProvider", @@ -121,22 +115,23 @@ "DatabaseProvider", "RelationalDBProvider", "VectorDBProvider", + "PostgresConfigurationSettings", # Embedding provider "EmbeddingConfig", "EmbeddingProvider", # File provider "FileConfig", "FileProvider", + # Ingestion provider + "IngestionConfig", + "IngestionProvider", + "ChunkingStrategy", # Knowledge Graph provider "KGConfig", "KGProvider", # LLM provider "CompletionConfig", "CompletionProvider", - # Parsing provider - "ParsingConfig", - "ParsingProvider", - "OverrideParser", # Prompt provider "PromptConfig", "PromptProvider", @@ -149,6 +144,7 @@ "format_search_results_for_stream", "generate_run_id", "generate_id_from_label", + "generate_default_user_collection_id", "increment_version", "EntityType", "RelationshipType", diff --git a/py/core/base/abstractions/__init__.py b/py/core/base/abstractions/__init__.py index f9947e8fb..74d2e2252 100644 --- a/py/core/base/abstractions/__init__.py +++ b/py/core/base/abstractions/__init__.py @@ -4,11 +4,11 @@ DataType, Document, DocumentExtraction, - DocumentFragment, DocumentInfo, DocumentType, IngestionStatus, - RestructureStatus, + KGCreationStatus, + KGEnrichmentStatus, ) from shared.abstractions.embedding import ( EmbeddingPurpose, @@ -27,6 +27,7 @@ RelationshipType, Triple, ) +from shared.abstractions.kg import KGCreationSettings, KGEnrichmentSettings from shared.abstractions.llm import ( GenerationConfig, LLMChatCompletion, @@ -35,10 +36,6 @@ RAGCompletion, ) from shared.abstractions.prompt import Prompt -from shared.abstractions.restructure import ( - KGCreationSettings, - KGEnrichmentSettings, -) from shared.abstractions.search import ( AggregateSearchResult, HybridSearchSettings, @@ -73,10 +70,10 @@ "DataType", "Document", "DocumentExtraction", - "DocumentFragment", "DocumentInfo", "IngestionStatus", - "RestructureStatus", + "KGCreationStatus", + "KGEnrichmentStatus", "DocumentType", # Embedding abstractions "EmbeddingPurpose", diff --git a/py/core/base/api/models/__init__.py b/py/core/base/api/models/__init__.py index 777ad2ee5..b3d350afd 100644 --- a/py/core/base/api/models/__init__.py +++ b/py/core/base/api/models/__init__.py @@ -11,6 +11,12 @@ WrappedIngestionResponse, WrappedUpdateResponse, ) +from shared.api.models.kg.responses import ( + KGCreationResponse, + KGEnrichmentResponse, + WrappedKGCreationResponse, + WrappedKGEnrichmentResponse, +) from shared.api.models.management.responses import ( AnalyticsResponse, AppSettingsResponse, @@ -42,12 +48,6 @@ WrappedUserOverviewResponse, WrappedUsersInCollectionResponse, ) -from shared.api.models.restructure.responses import ( - KGCreationResponse, - KGEnrichmentResponse, - WrappedKGCreationResponse, - WrappedKGEnrichmentResponse, -) from shared.api.models.retrieval.responses import ( RAGAgentResponse, RAGResponse, diff --git a/py/core/base/logging/base.py b/py/core/base/logging/base.py index 3a947478d..9b9aeed93 100644 --- a/py/core/base/logging/base.py +++ b/py/core/base/logging/base.py @@ -9,4 +9,4 @@ class RunType(str, Enum): INGESTION = "INGESTION" AUTH = "AUTH" UNSPECIFIED = "UNSPECIFIED" - RESTRUCTURE = "RESTRUCTURE" + KG = "KG" diff --git a/py/core/base/logging/run_logger.py b/py/core/base/logging/run_logger.py index 0c84a48fa..f768c516f 100644 --- a/py/core/base/logging/run_logger.py +++ b/py/core/base/logging/run_logger.py @@ -33,7 +33,7 @@ def validate_config(self) -> None: @property def supported_providers(self) -> list[str]: - return ["local", "postgres", "redis"] + return ["local", "postgres"] class RunLoggingProvider(Provider): @@ -88,6 +88,10 @@ class LocalRunLoggingProvider(RunLoggingProvider): def __init__(self, config: LoggingConfig): self.log_table = config.log_table self.log_info_table = config.log_info_table + # TODO - Should we re-consider this naming convention? + # e.g. it is confusing to have `POSTGRES_PROJECT_NAME` refer + # to a global project name that is used in non-Postgres contexts + self.project_name = os.getenv("POSTGRES_PROJECT_NAME", "default") self.logging_path = config.logging_path or os.getenv( "LOCAL_DB_PATH", "local.sqlite" ) @@ -107,9 +111,10 @@ def __init__(self, config: LoggingConfig): async def _init(self): self.conn = await self.aiosqlite.connect(self.logging_path) + await self.conn.execute( f""" - CREATE TABLE IF NOT EXISTS {self.log_table} ( + CREATE TABLE IF NOT EXISTS {self.project_name}_{self.log_table} ( timestamp DATETIME, run_id TEXT, key TEXT, @@ -119,7 +124,7 @@ async def _init(self): ) await self.conn.execute( f""" - CREATE TABLE IF NOT EXISTS {self.log_info_table} ( + CREATE TABLE IF NOT EXISTS {self.project_name}_{self.log_info_table} ( timestamp DATETIME, run_id TEXT UNIQUE, run_type TEXT, @@ -155,7 +160,7 @@ async def log( await self.conn.execute( f""" - INSERT INTO {self.log_table} (timestamp, run_id, key, value) + INSERT INTO {self.project_name}_{self.log_table} (timestamp, run_id, key, value) VALUES (datetime('now'), ?, ?, ?) """, (str(run_id), key, value), @@ -175,7 +180,7 @@ async def info_log( await self.conn.execute( f""" - INSERT INTO {self.log_info_table} (timestamp, run_id, run_type, user_id) + INSERT INTO {self.project_name}_{self.log_info_table} (timestamp, run_id, run_type, user_id) VALUES (datetime('now'), ?, ?, ?) ON CONFLICT(run_id) DO UPDATE SET timestamp = datetime('now'), @@ -200,7 +205,7 @@ async def get_info_logs( cursor = await self.conn.cursor() query = "SELECT run_id, run_type, timestamp, user_id" - query += f" FROM {self.log_info_table}" + query += f" FROM {self.project_name}_{self.log_info_table}" conditions = [] params = [] if run_type_filter: @@ -241,7 +246,7 @@ async def get_logs( placeholders = ",".join(["?" for _ in run_ids]) query = f""" SELECT run_id, key, value, timestamp - FROM {self.log_table} + FROM {self.project_name}_{self.log_table} WHERE run_id IN ({placeholders}) ORDER BY timestamp DESC """ @@ -276,7 +281,7 @@ async def score_completion( cursor = await self.conn.cursor() await cursor.execute( - f"SELECT value FROM {self.log_table} WHERE run_id = ? AND key = 'completion_record'", + f"SELECT value FROM {self.project_name}_{self.log_table} WHERE run_id = ? AND key = 'completion_record'", (str(run_id),), ) row = await cursor.fetchone() @@ -302,7 +307,7 @@ async def score_completion( ] await cursor.execute( - f"UPDATE {self.log_table} SET value = ? WHERE run_id = ? AND key = 'completion_record'", + f"UPDATE {self.project_name}_{self.log_table} SET value = ? WHERE run_id = ? AND key = 'completion_record'", (json.dumps(completion_record), str(run_id)), ) @@ -339,6 +344,7 @@ def __init__(self, config: PostgresLoggingConfig): self.log_table = config.log_table self.log_info_table = config.log_info_table self.config = config + self.project_name = os.getenv("POSTGRES_PROJECT_NAME", "default") self.pool = None if not os.getenv("POSTGRES_DBNAME"): raise ValueError( @@ -371,9 +377,10 @@ async def _init(self): statement_cache_size=0, # Disable statement caching ) async with self.pool.acquire() as conn: + await conn.execute( f""" - CREATE TABLE IF NOT EXISTS {self.log_table} ( + CREATE TABLE IF NOT EXISTS {self.project_name}.{self.log_table} ( timestamp TIMESTAMPTZ, run_id UUID, key TEXT, @@ -383,7 +390,7 @@ async def _init(self): ) await conn.execute( f""" - CREATE TABLE IF NOT EXISTS {self.log_info_table} ( + CREATE TABLE IF NOT EXISTS {self.project_name}.{self.log_info_table} ( timestamp TIMESTAMPTZ, run_id UUID UNIQUE, run_type TEXT, @@ -418,7 +425,7 @@ async def log( async with self.pool.acquire() as conn: await conn.execute( - f"INSERT INTO {self.log_table} (timestamp, run_id, key, value) VALUES (NOW(), $1, $2, $3)", + f"INSERT INTO {self.project_name}.{self.log_table} (timestamp, run_id, key, value) VALUES (NOW(), $1, $2, $3)", run_id, key, value, @@ -437,7 +444,7 @@ async def info_log( async with self.pool.acquire() as conn: await conn.execute( - f"INSERT INTO {self.log_info_table} (timestamp, run_id, run_type, user_id) VALUES (NOW(), $1, $2, $3)", + f"INSERT INTO {self.project_name}.{self.log_info_table} (timestamp, run_id, run_type, user_id) VALUES (NOW(), $1, $2, $3)", run_id, run_type, user_id, @@ -455,7 +462,7 @@ async def get_info_logs( "Initialize the connection pool before attempting to log." ) - query = f"SELECT run_id, run_type, timestamp, user_id FROM {self.log_info_table}" + query = f"SELECT run_id, run_type, timestamp, user_id FROM {self.project_name}.{self.log_info_table}" conditions = [] params = [] param_count = 1 @@ -502,7 +509,7 @@ async def get_logs( query = f""" SELECT * FROM ( SELECT *, ROW_NUMBER() OVER (PARTITION BY run_id ORDER BY timestamp DESC) as rn - FROM {self.log_table} + FROM {self.project_name}.{self.log_table} WHERE run_id::text IN ({placeholders}) ) sub WHERE sub.rn <= ${len(run_ids) + 1} @@ -523,7 +530,7 @@ async def score_completion( async with self.pool.acquire() as conn: row = await conn.fetchrow( - f"SELECT value FROM {self.log_table} WHERE run_id = $1 AND key = 'completion_record'", + f"SELECT value FROM {self.project_name}.{self.log_table} WHERE run_id = $1 AND key = 'completion_record'", run_id, ) @@ -550,7 +557,7 @@ async def score_completion( ] await conn.execute( - f"UPDATE {self.log_table} SET value = $1 WHERE run_id = $2 AND key = 'completion_record'", + f"UPDATE {self.project_name}.{self.log_table} SET value = $1 WHERE run_id = $2 AND key = 'completion_record'", json.dumps(completion_record), run_id, ) diff --git a/py/core/base/pipeline/base_pipeline.py b/py/core/base/pipeline/base_pipeline.py index 0414ef90f..e4e6d2fa5 100644 --- a/py/core/base/pipeline/base_pipeline.py +++ b/py/core/base/pipeline/base_pipeline.py @@ -67,18 +67,21 @@ async def run( **kwargs, ) self.futures[config_name].set_result(current_input) - return ( - current_input - if stream - else await self._consume_all(current_input) - ) + except Exception as error: + # TODO: improve error handling here error_trace = traceback.format_exc() logger.error( f"Pipeline failed with error: {error}\n\nStack trace:\n{error_trace}" ) raise error + return ( + current_input + if stream + else await self._consume_all(current_input) + ) + async def _consume_all(self, gen: AsyncGenerator) -> list[Any]: result = [] async for item in gen: diff --git a/py/core/base/providers/__init__.py b/py/core/base/providers/__init__.py index 3318f1c8f..7fa7341e5 100644 --- a/py/core/base/providers/__init__.py +++ b/py/core/base/providers/__init__.py @@ -1,45 +1,38 @@ from .auth import AuthConfig, AuthProvider from .base import Provider, ProviderConfig -from .chunking import ( - ChunkingConfig, - ChunkingProvider, - R2RChunkingConfig, - Strategy, - UnstructuredChunkingConfig, -) from .crypto import CryptoConfig, CryptoProvider from .database import ( DatabaseConfig, DatabaseProvider, + PostgresConfigurationSettings, RelationalDBProvider, VectorDBProvider, ) from .embedding import EmbeddingConfig, EmbeddingProvider from .file import FileConfig, FileProvider +from .ingestion import ChunkingStrategy, IngestionConfig, IngestionProvider from .kg import KGConfig, KGProvider from .llm import CompletionConfig, CompletionProvider -from .orchestration import OrchestrationConfig, OrchestrationProvider -from .parsing import OverrideParser, ParsingConfig, ParsingProvider +from .orchestration import OrchestrationConfig, OrchestrationProvider, Workflow from .prompt import PromptConfig, PromptProvider __all__ = [ - # Base provider classes - "Provider", - "ProviderConfig", # Auth provider "AuthConfig", "AuthProvider", - # Chunking provider - "UnstructuredChunkingConfig", - "ChunkingConfig", - "R2RChunkingConfig", - "ChunkingProvider", - "Strategy", + # Base provider classes + "Provider", + "ProviderConfig", + # Ingestion provider + "IngestionConfig", + "IngestionProvider", + "ChunkingStrategy", # Crypto provider "CryptoConfig", "CryptoProvider", # Database providers "DatabaseConfig", + "PostgresConfigurationSettings", "DatabaseProvider", "RelationalDBProvider", "VectorDBProvider", @@ -55,10 +48,7 @@ # Orchestration provider "OrchestrationConfig", "OrchestrationProvider", - # Parsing provider - "ParsingConfig", - "ParsingProvider", - "OverrideParser", + "Workflow", # Prompt provider "PromptConfig", "PromptProvider", diff --git a/py/core/base/providers/chunking.py b/py/core/base/providers/chunking.py deleted file mode 100644 index 231b14e4f..000000000 --- a/py/core/base/providers/chunking.py +++ /dev/null @@ -1,188 +0,0 @@ -import logging -from abc import ABC, abstractmethod -from enum import Enum -from typing import AsyncGenerator, Optional, Union - -from ..abstractions import DocumentExtraction -from .base import Provider, ProviderConfig - -logger = logging.getLogger(__name__) - - -class Strategy(str, Enum): - # Unstructured methods - BY_TITLE = "by_title" - BASIC = "basic" - # R2R methods - RECURSIVE = "recursive" - CHARACTER = "character" - - -class ChunkingConfig(ProviderConfig): - provider: str = "unstructured_local" - - def validate_config(self) -> None: - if self.provider not in self.supported_providers: - raise ValueError(f"Provider {self.provider} is not supported.") - - @property - def supported_providers(self) -> list[str]: - return ["r2r", "unstructured_local", "unstructured_api"] - - class Config: - json_schema_extra = { - "type": "object", - "properties": { - "provider": {"type": "string"}, - }, - "example": { - "provider": "unstructured_local", - "strategy": "auto", - "chunking_strategy": "by_title", - "new_after_n_chars": 512, - "max_characters": 1_024, - "combine_under_n_chars": 128, - "overlap": 20, - }, - } - - -class R2RChunkingConfig(ChunkingConfig): - provider: str = "r2r" - method: Strategy = Strategy.RECURSIVE - chunk_size: int = 512 - chunk_overlap: int = 20 - max_chunk_size: Optional[int] = None - - def validate_config(self) -> None: - if self.provider not in self.supported_providers: - raise ValueError(f"Provider {self.provider} is not supported.") - if self.chunk_size <= 0: - raise ValueError("chunk_size must be greater than 0") - if self.chunk_overlap < 0: - raise ValueError("chunk_overlap must be non-negative") - - @property - def supported_providers(self) -> list[str]: - return ["r2r", "unstructured_local", "unstructured_api"] - - class Config: - json_schema_extra = { - "type": "object", - "properties": { - "provider": {"type": "string"}, - "method": {"type": "string"}, - "chunk_size": {"type": "integer"}, - "chunk_overlap": {"type": "integer"}, - "max_chunk_size": {"type": "integer"}, - }, - "required": ["provider", "method", "chunk_size", "chunk_overlap"], - "example": { - "provider": "r2r", - "method": "recursive", - "chunk_size": 512, - "chunk_overlap": 20, - "max_chunk_size": 1024, - }, - } - - -class UnstructuredChunkingConfig(ChunkingConfig): - provider: str = "unstructured_local" # or unstructured_api - - combine_under_n_chars: Optional[int] = 128 - max_characters: Optional[int] = 500 - coordinates: bool = False - encoding: Optional[str] = "utf-8" - extract_image_block_types: Optional[list[str]] = None - gz_uncompressed_content_type: Optional[str] = None - hi_res_model_name: Optional[str] = None - include_orig_elements: Optional[bool] = True - include_page_breaks: bool = False - - languages: Optional[list[str]] = None - multipage_sections: bool = True - new_after_n_chars: Optional[int] = 1500 - ocr_languages: Optional[list[str]] = None - output_format: str = "application/json" - overlap: int = 0 - overlap_all: bool = False - pdf_infer_table_structure: bool = True - - similarity_threshold: Optional[float] = None - skip_infer_table_types: Optional[list[str]] = None - split_pdf_concurrency_level: int = 5 - split_pdf_page: bool = True - starting_page_number: Optional[int] = None - strategy: str = "auto" - chunking_strategy: Strategy = Strategy.BY_TITLE - unique_element_ids: bool = False - xml_keep_tags: bool = False - - def validate_config(self) -> None: - super().validate_config() - if self.strategy not in ["auto", "fast", "hi_res"]: - raise ValueError("strategy must be 'auto', 'fast', or 'hi_res'") - - -class ChunkingProvider(Provider, ABC): - def __init__(self, config: ChunkingConfig): - super().__init__(config) - self.config = config - - @abstractmethod - async def chunk( - self, parsed_document: Union[str, DocumentExtraction] - ) -> AsyncGenerator[str, None]: - """Chunk the parsed document using the configured chunking strategy.""" - pass - - multipage_sections: bool = True - new_after_n_chars: Optional[int] = 1500 - ocr_languages: Optional[list[str]] = None - output_format: str = "application/json" - overlap: int = 0 - overlap_all: bool = False - pdf_infer_table_structure: bool = True - - similarity_threshold: Optional[float] = None - skip_infer_table_types: Optional[list[str]] = None - split_pdf_concurrency_level: int = 5 - split_pdf_page: bool = True - starting_page_number: Optional[int] = None - strategy: str = "auto" - chunking_strategy: Strategy = Strategy.BY_TITLE - unique_element_ids: bool = False - xml_keep_tags: bool = False - - def validate_config(self) -> None: - if self.strategy not in ["auto", "fast", "hi_res"]: - raise ValueError("strategy must be 'auto', 'fast', or 'hi_res'") - - -__all__ = [ - "GenerationConfig", - "KGSearchSettings", - "MessageType", - "Message", - "ChunkingConfig", - "KGSearchResultType", - "KGSearchMethod", - "KGEntityResult", - "KGRelationshipResult", - "KGCommunityResult", - "KGGlobalResult", - "KGSearchResult", - "R2RException", - "Token", - "HybridSearchSettings", - "VectorSearchSettings", - "KGCreationSettings", - "KGEnrichmentSettings", - "KGCreationResponse", - "KGEnrichmentResponse", - "UserResponse", - "VectorSearchResult", - "SearchResponse", - "RAGResponse", -] diff --git a/py/core/base/providers/database.py b/py/core/base/providers/database.py index 8f167a11b..d9b300e0c 100644 --- a/py/core/base/providers/database.py +++ b/py/core/base/providers/database.py @@ -2,11 +2,40 @@ from abc import ABC, abstractmethod from typing import Any, Optional +from pydantic import BaseModel + from .base import Provider, ProviderConfig logger = logging.getLogger(__name__) +class PostgresConfigurationSettings(BaseModel): + """ + Configuration settings with defaults defined by the PGVector docker image. + + These settings are helpful in managing the connections to the database. + To tune these settings for a specific deployment, see https://pgtune.leopard.in.ua/ + """ + + max_connections: Optional[int] = 100 + shared_buffers: Optional[int] = 16384 + effective_cache_size: Optional[int] = 524288 + maintenance_work_mem: Optional[int] = 65536 + checkpoint_completion_target: Optional[float] = 0.9 + wal_buffers: Optional[int] = 512 + default_statistics_target: Optional[int] = 100 + random_page_cost: Optional[float] = 4 + effective_io_concurrency: Optional[int] = 1 + work_mem: Optional[int] = 4096 + huge_pages: Optional[str] = "try" + min_wal_size: Optional[int] = 80 + max_wal_size: Optional[int] = 1024 + max_worker_processes: Optional[int] = 8 + max_parallel_workers_per_gather: Optional[int] = 2 + max_parallel_workers: Optional[int] = 8 + max_parallel_maintenance_workers: Optional[int] = 2 + + class DatabaseConfig(ProviderConfig): """A base database configuration class""" @@ -18,6 +47,11 @@ class DatabaseConfig(ProviderConfig): db_name: Optional[str] = None vecs_collection: Optional[str] = None project_name: Optional[str] = None + postgres_configuration_settings: Optional[ + PostgresConfigurationSettings + ] = None + default_collection_name: str = "Default" + default_collection_description: str = "Your default collection." def __post_init__(self): self.validate_config() @@ -66,3 +100,7 @@ def _initialize_vector_db(self) -> VectorDBProvider: @abstractmethod async def _initialize_relational_db(self) -> RelationalDBProvider: pass + + @abstractmethod + def _get_table_name(self, base_name: str) -> str: + pass diff --git a/py/core/base/providers/ingestion.py b/py/core/base/providers/ingestion.py new file mode 100644 index 000000000..b11ec5c84 --- /dev/null +++ b/py/core/base/providers/ingestion.py @@ -0,0 +1,31 @@ +import logging +from abc import ABC +from enum import Enum + +from .base import Provider, ProviderConfig + +logger = logging.getLogger(__name__) + + +class IngestionConfig(ProviderConfig): + provider: str = "r2r" + excluded_parsers: list[str] = ["mp4"] + + @property + def supported_providers(self) -> list[str]: + return ["r2r", "unstructured_local", "unstructured_api"] + + def validate_config(self) -> None: + if self.provider not in self.supported_providers: + raise ValueError(f"Provider {self.provider} is not supported.") + + +class IngestionProvider(Provider, ABC): + pass + + +class ChunkingStrategy(str, Enum): + RECURSIVE = "recursive" + CHARACTER = "character" + BASIC = "basic" + BY_TITLE = "by_title" diff --git a/py/core/base/providers/kg.py b/py/core/base/providers/kg.py index a52a367c8..92ddcfa3f 100644 --- a/py/core/base/providers/kg.py +++ b/py/core/base/providers/kg.py @@ -3,10 +3,10 @@ import logging from abc import ABC, abstractmethod from typing import Any, Optional, Tuple +from uuid import UUID from ..abstractions import ( Entity, - GenerationConfig, KGCreationSettings, KGEnrichmentSettings, KGExtraction, @@ -19,6 +19,7 @@ logger = logging.getLogger(__name__) +# TODO - Bolt down types for KGConfig class KGConfig(ProviderConfig): """A base KG config class""" @@ -40,7 +41,7 @@ def validate_config(self) -> None: @property def supported_providers(self) -> list[str]: - return ["neo4j", "local"] + return ["local", "postgres"] class KGProvider(ABC): @@ -58,39 +59,27 @@ def __init__(self, config: KGConfig) -> None: def validate_config(self) -> None: self.config.validate_config() - @property - @abstractmethod - def client(self) -> Any: - """Get client.""" - pass - - @abstractmethod - def get(self, subj: str) -> list[list[str]]: - """Abstract method to get triplets.""" - pass - @abstractmethod - def get_rel_map( - self, - subjs: Optional[list[str]] = None, - depth: int = 2, - limit: int = 30, - ) -> dict[str, list[list[str]]]: - """Abstract method to get depth-aware rel map.""" + async def add_entities( + self, entities: list[Entity], *args, **kwargs + ) -> None: + """Abstract method to add entities.""" pass @abstractmethod - def upsert_entities(self, entities: list[Entity], *args, **kwargs) -> None: - """Abstract method to add triplet.""" + async def add_triples(self, triples: list[Triple]) -> None: + """Abstract method to add triples.""" pass @abstractmethod - def upsert_triples(self, triples: list[Triple]) -> None: - """Abstract method to add triplet.""" + async def add_kg_extractions( + self, kg_extractions: list[KGExtraction] + ) -> None: + """Abstract method to add KG extractions.""" pass @abstractmethod - def get_entities( + async def get_entities( self, entity_ids: list[str] | None = None, with_description: bool = False, @@ -99,29 +88,24 @@ def get_entities( pass @abstractmethod - def get_triples(self, triple_ids: list[str] | None = None) -> list[Triple]: + async def get_triples( + self, triple_ids: list[str] | None = None + ) -> list[Triple]: """Abstract method to get triples.""" pass @abstractmethod - def upsert_nodes_and_relationships( - self, kg_extractions: list[KGExtraction] - ) -> None: - """Abstract method to add triplet.""" - pass - - @abstractmethod - def delete(self, subj: str, rel: str, obj: str) -> None: - """Abstract method to delete triplet.""" + async def delete_triples(self, triple_ids: list[int]) -> None: + """Abstract method to delete triples.""" pass @abstractmethod - def get_schema(self, refresh: bool = False) -> str: + async def get_schema(self, refresh: bool = False) -> str: """Abstract method to get the schema of the graph store.""" pass @abstractmethod - def structured_query( + async def structured_query( self, query: str, param_map: Optional[dict[str, Any]] = None ) -> Any: """Abstract method to query the graph store with statement and parameters.""" @@ -136,7 +120,7 @@ async def vector_query( # TODO - Type this method. @abstractmethod - def update_extraction_prompt( + async def update_extraction_prompt( self, prompt_provider: Any, entity_types: list[Any], @@ -147,7 +131,7 @@ def update_extraction_prompt( # TODO - Type this method. @abstractmethod - def update_kg_search_prompt( + async def update_kg_search_prompt( self, prompt_provider: Any, entity_types: list[Any], @@ -157,12 +141,33 @@ def update_kg_search_prompt( pass @abstractmethod - def create_vector_index( + async def create_vector_index( self, node_type: str, node_property: str, dimension: int ) -> None: """Abstract method to create a vector index.""" pass + @abstractmethod + async def perform_graph_clustering( + self, + collection_id: UUID, + leiden_params: dict, # TODO - Add typing for leiden_params + ) -> Tuple[int, int, set[tuple[int, Any]]]: + """Abstract method to perform graph clustering.""" + pass + + @abstractmethod + async def get_entity_map( + self, offset: int, limit: int, document_id: str + ) -> dict[str, Any]: + """Abstract method to get the entity map.""" + pass + + @abstractmethod + async def get_community_details(self, community_number: int): + """Abstract method to get community details.""" + pass + def escape_braces(s: str) -> str: """ diff --git a/py/core/base/providers/orchestration.py b/py/core/base/providers/orchestration.py index a676bf650..b9dbefa0d 100644 --- a/py/core/base/providers/orchestration.py +++ b/py/core/base/providers/orchestration.py @@ -1,9 +1,15 @@ from abc import abstractmethod -from typing import Any, Callable +from enum import Enum +from typing import Any from .base import Provider, ProviderConfig +class Workflow(Enum): + INGESTION = "ingestion" + KG = "kg" + + class OrchestrationConfig(ProviderConfig): provider: str max_threads: int = 256 @@ -14,7 +20,7 @@ def validate_config(self) -> None: @property def supported_providers(self) -> list[str]: - return ["hatchet"] + return ["hatchet", "simple"] class OrchestrationProvider(Provider): @@ -24,7 +30,7 @@ def __init__(self, config: OrchestrationConfig): self.worker = None @abstractmethod - def register_workflow(self, workflow: Any) -> None: + async def start_worker(self): pass @abstractmethod @@ -32,13 +38,28 @@ def get_worker(self, name: str, max_threads: int) -> Any: pass @abstractmethod - def workflow(self, *args, **kwargs) -> Callable: + def step(self, *args, **kwargs) -> Any: + pass + + @abstractmethod + def workflow(self, *args, **kwargs) -> Any: + pass + + @abstractmethod + def failure(self, *args, **kwargs) -> Any: pass @abstractmethod - def step(self, *args, **kwargs) -> Callable: + def register_workflows(self, workflow: Workflow, service: Any) -> None: pass @abstractmethod - def start_worker(self): + async def run_workflow( + self, + workflow_name: str, + parameters: dict, + options: dict, + *args, + **kwargs, + ) -> dict[str, str]: pass diff --git a/py/core/base/providers/parsing.py b/py/core/base/providers/parsing.py deleted file mode 100644 index 4b18bc619..000000000 --- a/py/core/base/providers/parsing.py +++ /dev/null @@ -1,46 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Any, AsyncGenerator - -from pydantic import BaseModel, Field - -from ..abstractions import Document, DocumentType -from .base import Provider, ProviderConfig -from .chunking import ChunkingConfig - - -class OverrideParser(BaseModel): - document_type: DocumentType - parser: str - - -class ParsingConfig(ProviderConfig): - provider: str = "r2r" - excluded_parsers: list[DocumentType] = Field(default_factory=list) - override_parsers: list[OverrideParser] = Field(default_factory=list) - chunking_config: ChunkingConfig = Field(default_factory=ChunkingConfig) - - @property - def supported_providers(self) -> list[str]: - return ["r2r", "unstructured_local", "unstructured_api"] - - def validate_config(self) -> None: - if self.provider not in self.supported_providers: - raise ValueError(f"Provider {self.provider} is not supported.") - - -class ParsingProvider(Provider, ABC): - def __init__(self, config: ParsingConfig): - super().__init__(config) - self.config = config - - @abstractmethod - async def parse( - self, file_content: bytes, document: Document - ) -> AsyncGenerator[Any, None]: - """Parse the document using the configured parsing strategy.""" - pass - - @abstractmethod - def get_parser_for_document_type(self, doc_type: DocumentType) -> str: - """Get the appropriate parser for a given document type.""" - pass diff --git a/py/core/base/utils/__init__.py b/py/core/base/utils/__init__.py index 5ebe8efd2..3a488f6e4 100644 --- a/py/core/base/utils/__init__.py +++ b/py/core/base/utils/__init__.py @@ -6,6 +6,7 @@ format_relations, format_search_results_for_llm, format_search_results_for_stream, + generate_default_user_collection_id, generate_id_from_label, generate_run_id, generate_user_document_id, @@ -22,6 +23,7 @@ "format_search_results_for_llm", "generate_run_id", "generate_user_document_id", + "generate_default_user_collection_id", "increment_version", "decrement_version", "run_pipeline", diff --git a/py/core/configs/full.toml b/py/core/configs/full.toml new file mode 100644 index 000000000..86d8b80e5 --- /dev/null +++ b/py/core/configs/full.toml @@ -0,0 +1,11 @@ +[ingestion] +provider = "unstructured_local" +strategy = "auto" +chunking_strategy = "by_title" +new_after_n_chars = 512 +max_characters = 1_024 +combine_under_n_chars = 128 +overlap = 20 + +[orchestration] +provider = "hatchet" diff --git a/py/core/configs/full_local_llm.toml b/py/core/configs/full_local_llm.toml new file mode 100644 index 000000000..1414b51f4 --- /dev/null +++ b/py/core/configs/full_local_llm.toml @@ -0,0 +1,38 @@ +[agent] +system_instruction_name = "rag_agent" +tool_names = ["search"] + + [agent.generation_config] + model = "ollama/llama3.1" + +[completion] +provider = "litellm" +concurrent_request_limit = 1 + + [completion.generation_config] + model = "ollama/llama3.1" + temperature = 0.1 + top_p = 1 + max_tokens_to_sample = 1_024 + stream = false + add_generation_kwargs = { } + +[embedding] +provider = "ollama" +base_model = "mxbai-embed-large" +base_dimension = 1_024 +batch_size = 128 +add_title_as_prefix = true +concurrent_request_limit = 2 + +[ingestion] +provider = "unstructured_local" +strategy = "auto" +chunking_strategy = "by_title" +new_after_n_chars = 512 +max_characters = 1_024 +combine_under_n_chars = 128 +overlap = 20 + +[orchestration] +provider = "hatchet" diff --git a/py/core/configs/local_llm.toml b/py/core/configs/local_llm.toml index c268ff468..d16bbe951 100644 --- a/py/core/configs/local_llm.toml +++ b/py/core/configs/local_llm.toml @@ -17,9 +17,6 @@ concurrent_request_limit = 1 stream = false add_generation_kwargs = { } -[database] -provider = "postgres" - [embedding] provider = "ollama" base_model = "mxbai-embed-large" @@ -28,31 +25,5 @@ batch_size = 128 add_title_as_prefix = true concurrent_request_limit = 2 - -[kg] -provider = "neo4j" # set this to "None" if you don't want to use a knowledge graph -batch_size = 256 - - [kg.kg_creation_settings] - kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" - entity_types = [] # if empty, all entities are extracted - relation_types = [] # if empty, all relations are extracted - fragment_merge_count = 4 # number of fragments to merge into a single extraction - max_knowledge_triples = 100 - generation_config = { model = "ollama/llama3.1" } # and other params, model used for triplet extraction - - [kg.kg_enrichment_settings] - community_reports_prompt = "graphrag_community_reports_prompt" - max_description_input_length = 8192 - max_summary_input_length = 65536 - generation_config = { model = "ollama/llama3.1" } # and other params, model used for node description and graph clustering - leiden_params = { max_levels = 10 } # more params in https://neo4j.com/docs/graph-data-science/current/algorithms/leiden/ - - [kg.kg_search_settings] - map_system_prompt = "graphrag_map_system_prompt" - reduce_system_prompt = "graphrag_reduce_system_prompt" - generation_config = { model = "ollama/llama3.1" } - -[parsing] -provider = "unstructured_local" -excluded_parsers = [ "gif", "jpeg", "jpg", "png", "svg", "mp3", "mp4" ] +[orchestration] +provider = "simple" diff --git a/py/core/configs/neo4j_kg_no_vector_postgres.toml b/py/core/configs/neo4j_kg_no_vector_postgres.toml deleted file mode 100644 index 4a7386d4f..000000000 --- a/py/core/configs/neo4j_kg_no_vector_postgres.toml +++ /dev/null @@ -1,24 +0,0 @@ -[auth] -provider = "None" - -[database] -provider = "None" - -[kg] -provider = "neo4j" -batch_size = 256 - - [kg.kg_creation_settings] - kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" - entity_types = [] # if empty, all entities are extracted - relation_types = [] # if empty, all relations are extracted - fragment_merge_count = 4 # number of fragments to merge into a single extraction - max_knowledge_triples = 100 - generation_config = { model = "gpt-4o-mini" } # and other params, model used for triplet extraction - - [kg.kg_enrichment_settings] - generation_config = { model = "gpt-4o-mini" } # and other params, model used for node description and graph clustering - leiden_params = { max_levels = 10 } # more params here: https://neo4j.com/docs/graph-data-science/current/algorithms/leiden/ - - [kg.kg_search_settings] - generation_config = { model = "gpt-4o-mini" } diff --git a/py/core/configs/r2r_with_local_parsing.toml b/py/core/configs/r2r_with_local_parsing.toml deleted file mode 100644 index a32c89229..000000000 --- a/py/core/configs/r2r_with_local_parsing.toml +++ /dev/null @@ -1,60 +0,0 @@ -[auth] -provider = "r2r" -access_token_lifetime_in_minutes = 60 -refresh_token_lifetime_in_days = 7 -require_authentication = false -require_email_verification = false -default_admin_email = "admin@example.com" -default_admin_password = "change_me_immediately" - -[chunking] -provider = "r2r" -method = "recursive" -chunk_size = 512 -chunk_overlap = 50 - -[completion] -provider = "litellm" -concurrent_request_limit = 16 - - [completion.generation_config] - model = "openai/gpt-4o" - temperature = 0.1 - top_p = 1 - max_tokens_to_sample = 1_024 - stream = false - add_generation_kwargs = { } - -[crypto] -provider = "bcrypt" - -[database] -provider = "postgres" - -[embedding] -provider = "litellm" -base_model = "openai/text-embedding-3-small" -base_dimension = 512 -batch_size = 128 -add_title_as_prefix = false -rerank_model = "None" -concurrent_request_limit = 256 - -[kg] -provider = "None" - -[logging] -provider = "local" -log_table = "logs" -log_info_table = "log_info" - -[parsing] -provider = "r2r" -excluded_parsers = ["mp4"] - -[prompt] -provider = "r2r" - -[agent] -system_instruction_name = "rag_agent" -tool_names = ["search"] diff --git a/py/core/configs/unstructured_api.toml b/py/core/configs/unstructured_api.toml deleted file mode 100644 index d3942e627..000000000 --- a/py/core/configs/unstructured_api.toml +++ /dev/null @@ -1,33 +0,0 @@ -[parsing] -provider = "unstructured_api" # or unstructured_local - -[chunking] -provider = "unstructured_api" # or unstructured_local -strategy = "hi_res" # fast or hi_res -chunking_strategy = "by_title" # by_title or recursive - -# combine_under_n_chars= -# coordinates= -# encoding= -# extract_image_block_types= -# gz_uncompressed_content_type= -# hi_res_model_name= -# include_orig_elements= -# include_page_breaks= -# languages= -# max_characters= -# multipage_sections= -# new_after_n_chars= -# ocr_languages= -# output_format= -# overlap= -# overlap_all= -# pdf_infer_table_structure= -# similarity_threshold= -# skip_infer_table_types= -# split_pdf_concurrency_level= -# split_pdf_page= -# starting_page_number= -# strategy= -# unique_element_ids= -# xml_keep_tags= diff --git a/py/core/examples/hello_r2r.py b/py/core/examples/hello_r2r.py index 06041e1ca..979f80333 100644 --- a/py/core/examples/hello_r2r.py +++ b/py/core/examples/hello_r2r.py @@ -18,6 +18,6 @@ # RAG Results: # Search Results: -# AggregateSearchResult(vector_search_results=[VectorSearchResult(id=2d71e689-0a0e-5491-a50b-4ecb9494c832, score=0.6848798582029441, metadata={'text': 'John is a person that works at Google.', 'version': 'v0', 'chunk_order': 0, 'document_id': 'ed76b6ee-dd80-5172-9263-919d493b439a', 'extraction_id': '1ba494d7-cb2f-5f0e-9f64-76c31da11381', 'associatedQuery': 'Who is john'})], kg_search_results=None) +# AggregateSearchResult(vector_search_results=[VectorSearchResult(id=2d71e689-0a0e-5491-a50b-4ecb9494c832, score=0.6848798582029441, metadata={'text': 'John is a person that works at Google.', 'version': 'v0', 'chunk_order': 0, 'document_id': 'ed76b6ee-dd80-5172-9263-919d493b439a', 'id': '1ba494d7-cb2f-5f0e-9f64-76c31da11381', 'associatedQuery': 'Who is john'})], kg_search_results=None) # Completion: # ChatCompletion(id='chatcmpl-9g0HnjGjyWDLADe7E2EvLWa35cMkB', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='John is a person that works at Google [1].', role='assistant', function_call=None, tool_calls=None))], created=1719797903, model='gpt-4o-mini', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=11, prompt_tokens=145, total_tokens=156)) diff --git a/py/core/examples/scripts/advanced_kg_cookbook.py b/py/core/examples/scripts/advanced_kg_cookbook.py index e13e75232..058d1f5c2 100644 --- a/py/core/examples/scripts/advanced_kg_cookbook.py +++ b/py/core/examples/scripts/advanced_kg_cookbook.py @@ -85,7 +85,6 @@ def main( local_mode=True, base_url="http://localhost:7272", ): - # Specify the entity types for the KG extraction prompt entity_types = [ "COMPANY", @@ -128,7 +127,7 @@ def main( client = R2RClient(base_url=base_url) r2r_prompts = R2RPromptProvider() - prompt = "graphrag_triplet_extraction_zero_shot" + prompt = "graphrag_triples_extraction_few_shot" r2r_prompts.update_prompt( prompt, @@ -159,12 +158,6 @@ def main( except: continue - print( - client.inspect_knowledge_graph(0, 1000, print_descriptions=True)[ - "results" - ] - ) - if __name__ == "__main__": fire.Fire(main) diff --git a/py/core/examples/scripts/run_auth_workflow.py b/py/core/examples/scripts/run_auth_workflow.py index 09e588440..53841769b 100644 --- a/py/core/examples/scripts/run_auth_workflow.py +++ b/py/core/examples/scripts/run_auth_workflow.py @@ -43,7 +43,7 @@ # # Uncomment to delete the user account # # Delete account (requires password confirmation) - # delete_result = client.delete_user("password123") + # delete_result = client.delete_user(login_result["id"], "password123") # print(delete_result) logout_result = client.logout() diff --git a/py/core/integrations/__init__.py b/py/core/integrations/__init__.py deleted file mode 100644 index 0830f40ca..000000000 --- a/py/core/integrations/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .serper import SerperClient - -__all__ = ["SerperClient"] diff --git a/py/core/integrations/serper.py b/py/core/integrations/serper.py deleted file mode 100644 index 14333d1a5..000000000 --- a/py/core/integrations/serper.py +++ /dev/null @@ -1,103 +0,0 @@ -import http.client -import json -import os - - -# TODO - Move process json to dedicated data processing module -def process_json(json_object, indent=0): - """ - Recursively traverses the JSON object (dicts and lists) to create an unstructured text blob. - """ - text_blob = "" - if isinstance(json_object, dict): - for key, value in json_object.items(): - padding = " " * indent - if isinstance(value, (dict, list)): - text_blob += ( - f"{padding}{key}:\n{process_json(value, indent + 1)}" - ) - else: - text_blob += f"{padding}{key}: {value}\n" - elif isinstance(json_object, list): - for index, item in enumerate(json_object): - padding = " " * indent - if isinstance(item, (dict, list)): - text_blob += f"{padding}Item {index + 1}:\n{process_json(item, indent + 1)}" - else: - text_blob += f"{padding}Item {index + 1}: {item}\n" - return text_blob - - -# TODO - Introduce abstract "Integration" ABC. -class SerperClient: - def __init__(self, api_base: str = "google.serper.dev") -> None: - api_key = os.getenv("SERPER_API_KEY") - if not api_key: - raise ValueError( - "Please set the `SERPER_API_KEY` environment variable to use `SerperClient`." - ) - - self.api_base = api_base - self.headers = { - "X-API-KEY": api_key, - "Content-Type": "application/json", - } - - @staticmethod - def _extract_results(result_data: dict) -> list: - formatted_results = [] - - for key, value in result_data.items(): - # Skip searchParameters as it's not a result entry - if key == "searchParameters": - continue - - # Handle 'answerBox' as a single item - if key == "answerBox": - value["type"] = key # Add the type key to the dictionary - formatted_results.append(value) - # Handle lists of results - elif isinstance(value, list): - for item in value: - item["type"] = key # Add the type key to the dictionary - formatted_results.append(item) - # Handle 'peopleAlsoAsk' and potentially other single item formats - elif isinstance(value, dict): - value["type"] = key # Add the type key to the dictionary - formatted_results.append(value) - - return formatted_results - - # TODO - Add explicit typing for the return value - def get_raw(self, query: str, limit: int = 10) -> list: - connection = http.client.HTTPSConnection(self.api_base) - payload = json.dumps({"q": query, "num_outputs": limit}) - connection.request("POST", "/search", payload, self.headers) - response = connection.getresponse() - data = response.read() - json_data = json.loads(data.decode("utf-8")) - return SerperClient._extract_results(json_data) - - @staticmethod - def construct_context(results: list) -> str: - # Organize results by type - organized_results = {} - for result in results: - result_type = result.metadata.pop( - "type", "Unknown" - ) # Pop the type and use as key - if result_type not in organized_results: - organized_results[result_type] = [result.metadata] - else: - organized_results[result_type].append(result.metadata) - - context = "" - # Iterate over each result type - for result_type, items in organized_results.items(): - context += f"# {result_type} Results:\n" - for index, item in enumerate(items, start=1): - # Process each item under the current type - context += f"Item {index}:\n" - context += process_json(item) + "\n" - - return context diff --git a/py/core/integrations/unstructured/main.py b/py/core/integrations/unstructured/main.py deleted file mode 100644 index 3c73ccfe6..000000000 --- a/py/core/integrations/unstructured/main.py +++ /dev/null @@ -1,59 +0,0 @@ -import asyncio -import base64 -import concurrent.futures -import logging -import os -from io import BytesIO -from typing import Dict, List - -from fastapi import FastAPI, HTTPException -from pydantic import BaseModel -from unstructured.partition.auto import partition - -logger = logging.getLogger(__name__) - -app = FastAPI() - - -class PartitionRequestModel(BaseModel): - file_content: str - chunking_config: Dict - - -class PartitionResponseModel(BaseModel): - elements: List[Dict] - - -executor = concurrent.futures.ThreadPoolExecutor( - max_workers=int(os.environ.get("MAX_INGESTION_WORKERS", 10)) -) - - -def run_partition(file_content: str, chunking_config: Dict) -> List[Dict]: - file_content_bytes = base64.b64decode(file_content) - file_io = BytesIO(file_content_bytes) - elements = partition(file=file_io, **chunking_config) - return [element.to_dict() for element in elements] - - -@app.get("/health") -async def health_endpoint(): - return {"status": "ok"} - - -@app.post("/partition", response_model=PartitionResponseModel) -async def partition_endpoint(request: PartitionRequestModel): - try: - logger.info(f"Partitioning request received") - loop = asyncio.get_event_loop() - elements = await loop.run_in_executor( - executor, - run_partition, - request.file_content, - request.chunking_config, - ) - logger.info(f"Partitioning completed") - return PartitionResponseModel(elements=elements) - except Exception as e: - logger.error(f"Error partitioning file: {str(e)}") - raise HTTPException(status_code=500, detail=str(e)) diff --git a/py/core/main/__init__.py b/py/core/main/__init__.py index cec677483..7fb2dea34 100644 --- a/py/core/main/__init__.py +++ b/py/core/main/__init__.py @@ -4,7 +4,7 @@ # from .app_entry import r2r_app from .assembly import * -from .hatchet import * +from .orchestration import * from .services import * __all__ = [ @@ -19,13 +19,9 @@ "IngestionRouter", "ManagementRouter", "RetrievalRouter", - "RestructureRouter", + "KGRouter", ## R2R APP "R2RApp", - ## R2R APP ENTRY - # "r2r_app", - ## R2R HATCHET - "r2r_hatchet", ## R2R ASSEMBLY # Builder "R2RBuilder", @@ -41,5 +37,5 @@ "IngestionService", "ManagementService", "RetrievalService", - "RestructureService", + "KgService", ] diff --git a/py/core/main/abstractions.py b/py/core/main/abstractions.py index 7647a70f9..fa93036e8 100644 --- a/py/core/main/abstractions.py +++ b/py/core/main/abstractions.py @@ -4,14 +4,13 @@ from core.base.pipes import AsyncPipe from core.base.providers import ( AuthProvider, - ChunkingProvider, CompletionProvider, DatabaseProvider, EmbeddingProvider, FileProvider, + IngestionProvider, KGProvider, OrchestrationProvider, - ParsingProvider, PromptProvider, ) from core.pipelines import RAGPipeline, SearchPipeline @@ -19,15 +18,14 @@ class R2RProviders(BaseModel): auth: AuthProvider - chunking: ChunkingProvider database: DatabaseProvider + ingestion: IngestionProvider + embedding: EmbeddingProvider + file: FileProvider kg: KGProvider llm: CompletionProvider - embedding: EmbeddingProvider orchestration: OrchestrationProvider prompt: PromptProvider - parsing: ParsingProvider - file: FileProvider class Config: arbitrary_types_allowed = True @@ -35,19 +33,17 @@ class Config: class R2RPipes(BaseModel): parsing_pipe: AsyncPipe - chunking_pipe: AsyncPipe embedding_pipe: AsyncPipe - vector_storage_pipe: AsyncPipe - vector_search_pipe: AsyncPipe - rag_pipe: AsyncPipe - streaming_rag_pipe: AsyncPipe kg_search_pipe: AsyncPipe - kg_extraction_pipe: AsyncPipe + kg_triples_extraction_pipe: AsyncPipe kg_storage_pipe: AsyncPipe - kg_node_extraction_pipe: AsyncPipe - kg_node_description_pipe: AsyncPipe + kg_entity_description_pipe: AsyncPipe kg_clustering_pipe: AsyncPipe kg_community_summary_pipe: AsyncPipe + rag_pipe: AsyncPipe + streaming_rag_pipe: AsyncPipe + vector_storage_pipe: AsyncPipe + vector_search_pipe: AsyncPipe class Config: arbitrary_types_allowed = True diff --git a/py/core/main/api/__init__.py b/py/core/main/api/__init__.py index 4762399c2..6b50c605a 100644 --- a/py/core/main/api/__init__.py +++ b/py/core/main/api/__init__.py @@ -1,8 +1,8 @@ from .auth_router import AuthRouter from .base_router import BaseRouter from .ingestion_router import IngestionRouter +from .kg_router import KGRouter from .management_router import ManagementRouter -from .restructure_router import RestructureRouter from .retrieval_router import RetrievalRouter __all__ = [ @@ -12,5 +12,5 @@ "ManagementRouter", "RetrievalRouter", "BaseRouter", - "RestructureRouter", + "KGRouter", ] diff --git a/py/core/main/api/auth_router.py b/py/core/main/api/auth_router.py index 1a3bd7851..f906a1830 100644 --- a/py/core/main/api/auth_router.py +++ b/py/core/main/api/auth_router.py @@ -22,18 +22,12 @@ class AuthRouter(BaseRouter): def __init__( self, - auth_service: AuthService, - run_type: RunType = RunType.INGESTION, - orchestration_provider: Optional[OrchestrationProvider] = None, + service: AuthService, + orchestration_provider: OrchestrationProvider, + run_type: RunType = RunType.UNSPECIFIED, ): - super().__init__(auth_service, run_type, orchestration_provider) - self.service: AuthService = auth_service # for type hinting - - def _register_workflows(self): - pass - - def _load_openapi_extras(self): - return {} + super().__init__(service, orchestration_provider, run_type) + self.service: AuthService = service # for type hinting def _setup_routes(self): @self.router.post("/register", response_model=WrappedUserResponse) diff --git a/py/core/main/api/base_router.py b/py/core/main/api/base_router.py index 58265e529..de33d03f5 100644 --- a/py/core/main/api/base_router.py +++ b/py/core/main/api/base_router.py @@ -19,8 +19,8 @@ class BaseRouter: def __init__( self, service: "Service", + orchestration_provider: OrchestrationProvider, run_type: RunType = RunType.UNSPECIFIED, - orchestration_provider: Optional[OrchestrationProvider] = None, ): self.service = service self.run_type = run_type @@ -59,6 +59,7 @@ async def wrapper(*args, **kwargs): if isinstance(results, StreamingResponse): return results return {"results": results, **outer_kwargs} + except R2RException as re: raise HTTPException( status_code=re.status_code, @@ -67,16 +68,20 @@ async def wrapper(*args, **kwargs): "error_type": type(re).__name__, }, ) + except Exception as e: + await self.service.logging_connection.log( run_id=run_id, key="error", value=str(e), ) + logger.error( f"Error in base endpoint {func.__name__}() - \n\n{str(e)}", exc_info=True, ) + raise HTTPException( status_code=500, detail={ @@ -96,10 +101,8 @@ def build_router(cls, engine): def _setup_routes(self): pass - @abstractmethod def _register_workflows(self): pass - @abstractmethod def _load_openapi_extras(self): - pass + return {} diff --git a/py/core/main/api/data/ingestion_router_openapi.yml b/py/core/main/api/data/ingestion_router_openapi.yml index 94257f8c0..08a419ff1 100644 --- a/py/core/main/api/data/ingestion_router_openapi.yml +++ b/py/core/main/api/data/ingestion_router_openapi.yml @@ -30,7 +30,7 @@ ingest_files: metadatas: "An optional list of JSON metadata to affix to each file" document_ids: "An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_user_document_id` method." versions: "An optional list of versions for each file. If not provided, the system will default to the tag `v1`." - chunking_config: "An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration." + ingestion_config: "An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration." update_files: openapi_extra: @@ -58,4 +58,4 @@ update_files: files: "The list of paths of input files to update in the system." document_ids: "An optional list of document ids for each file. If not provided, the system will attempt to generate the corresponding unique from the `generate_user_document_id` method." metadatas: "An optional list of JSON metadata to affix to each file" - chunking_config: "JSON string for chunking configuration override" + ingestion_config: "JSON string for chunking configuration override" diff --git a/py/core/main/api/data/restructure_router_openapi.yml b/py/core/main/api/data/kg_router_openapi.yml similarity index 100% rename from py/core/main/api/data/restructure_router_openapi.yml rename to py/core/main/api/data/kg_router_openapi.yml diff --git a/py/core/main/api/ingestion_router.py b/py/core/main/api/ingestion_router.py index 58276d30e..bad85f518 100644 --- a/py/core/main/api/ingestion_router.py +++ b/py/core/main/api/ingestion_router.py @@ -1,5 +1,4 @@ import base64 -import json import logging from io import BytesIO from pathlib import Path @@ -10,15 +9,13 @@ from fastapi import Depends, File, Form, UploadFile from pydantic import Json -from core.base import ChunkingConfig, R2RException, generate_user_document_id +from core.base import R2RException, generate_user_document_id from core.base.api.models import ( WrappedIngestionResponse, WrappedUpdateResponse, ) -from core.base.providers import OrchestrationProvider +from core.base.providers import OrchestrationProvider, Workflow -from ...main.hatchet import r2r_hatchet -from ..hatchet import IngestFilesWorkflow, UpdateFilesWorkflow from ..services.ingestion_service import IngestionService from .base_router import BaseRouter, RunType @@ -29,22 +26,28 @@ class IngestionRouter(BaseRouter): def __init__( self, service: IngestionService, + orchestration_provider: OrchestrationProvider, run_type: RunType = RunType.INGESTION, - orchestration_provider: Optional[OrchestrationProvider] = None, ): - if not orchestration_provider: - raise ValueError( - "IngestionRouter requires an orchestration provider." - ) - super().__init__(service, run_type, orchestration_provider) + super().__init__(service, orchestration_provider, run_type) self.service: IngestionService = service def _register_workflows(self): - self.orchestration_provider.register_workflow( - IngestFilesWorkflow(self.service) - ) - self.orchestration_provider.register_workflow( - UpdateFilesWorkflow(self.service) + self.orchestration_provider.register_workflows( + Workflow.INGESTION, + self.service, + { + "ingest-file-changed": ( + "Ingestion task queued successfully." + if self.orchestration_provider.config.provider != "simple" + else "Ingestion task completed successfully." + ), + "update-files": ( + "Update task queued successfully." + if self.orchestration_provider.config.provider != "simple" + else "Update task queued successfully." + ), + }, ) def _load_openapi_extras(self): @@ -79,9 +82,9 @@ async def ingest_files_app( metadatas: Optional[Json[list[dict]]] = Form( None, description=ingest_files_descriptions.get("metadatas") ), - chunking_config: Optional[str] = Form( + ingestion_config: Optional[Json[dict]] = Form( None, - description=ingest_files_descriptions.get("chunking_config"), + description=ingest_files_descriptions.get("ingestion_config"), ), auth_user=Depends(self.service.providers.auth.auth_wrapper), response_model=WrappedIngestionResponse, @@ -93,11 +96,7 @@ async def ingest_files_app( A valid user authentication token is required to access this endpoint, as regular users can only ingest files for their own access. More expansive collection permissioning is under development. """ - if chunking_config: - chunking_config = ( - json.loads(chunking_config) if chunking_config else None - ) - # self._validate_chunking_config(chunking_config) + # Check if the user is a superuser if not auth_user.is_superuser: for metadata in metadatas or []: @@ -132,7 +131,7 @@ async def ingest_files_app( "file_data": file_data, "document_id": str(document_id), "metadata": metadatas[it] if metadatas else None, - "chunking_config": chunking_config, + "ingestion_config": ingestion_config, "user": auth_user.model_dump_json(), "size_in_bytes": content_length, "is_update": False, @@ -145,9 +144,8 @@ async def ingest_files_app( file_content, file_data["content_type"], ) - - task_id = r2r_hatchet.admin.run_workflow( - "ingest-file", + raw_message = await self.orchestration_provider.run_workflow( + "ingest-file-changed", {"request": workflow_input}, options={ "additional_metadata": { @@ -155,60 +153,11 @@ async def ingest_files_app( } }, ) + raw_message["document_id"] = str(document_id) + messages.append(raw_message) - messages.append( - { - "message": "Ingestion task queued successfully.", - "task_id": str(task_id), - "document_id": str(document_id), - } - ) return messages - @self.router.post( - "/retry_ingest_files", - openapi_extra=ingest_files_extras.get("openapi_extra"), - ) - @self.base_endpoint - async def retry_ingest_files( - document_ids: list[UUID] = Form( - ..., - description=ingest_files_descriptions.get("document_ids"), - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - response_model=WrappedIngestionResponse, - ): - """ - Retry the ingestion of files into the system. - - This endpoint allows you to retry the ingestion of files that have previously failed to ingest into R2R. - - A valid user authentication token is required to access this endpoint, as regular users can only retry the ingestion of their own files. More expansive collection permissioning is under development. - """ - if not auth_user.is_superuser: - documents_overview = await self.service.providers.database.relational.get_documents_overview( - filter_document_ids=document_ids, - filter_user_ids=[auth_user.id], - )[ - "results" - ] - if len(documents_overview) != len(document_ids): - raise R2RException( - status_code=404, - message="One or more documents not found.", - ) - - # FIXME: This is throwing an aiohttp.client_exceptions.ClientConnectionError: Cannot connect to host localhost:8080 ssl:default… can we whitelist the host? - workflow_list = await r2r_hatchet.rest.workflow_run_list() - - # TODO: we want to extract the hatchet run ids for the document ids, and then retry them - - return { - "message": "Retry tasks queued successfully.", - "task_ids": [str(task_id) for task_id in workflow_list], - "document_ids": [str(doc_id) for doc_id in document_ids], - } - update_files_extras = self.openapi_extras.get("update_files", {}) update_files_descriptions = update_files_extras.get( "input_descriptions", {} @@ -229,9 +178,9 @@ async def update_files_app( metadatas: Optional[Json[list[dict]]] = Form( None, description=ingest_files_descriptions.get("metadatas") ), - chunking_config: Optional[Json[ChunkingConfig]] = Form( + ingestion_config: Optional[Json[dict]] = Form( None, - description=ingest_files_descriptions.get("chunking_config"), + description=ingest_files_descriptions.get("ingestion_config"), ), auth_user=Depends(self.service.providers.auth.auth_wrapper), response_model=WrappedUpdateResponse, @@ -243,7 +192,7 @@ async def update_files_app( A valid user authentication token is required to access this endpoint, as regular users can only update their own files. More expansive collection permissioning is under development. """ - self._validate_chunking_config(chunking_config) + self._validate_ingestion_config(ingestion_config) if not auth_user.is_superuser: for metadata in metadatas or []: if "user_id" in metadata and metadata["user_id"] != str( @@ -292,32 +241,24 @@ async def update_files_app( item["document_id"] for item in processed_data ], "metadatas": metadatas, - "chunking_config": ( - chunking_config.model_dump_json() - if chunking_config - else None - ), + "ingestion_config": ingestion_config, "user": auth_user.model_dump_json(), "is_update": True, } - task_id = r2r_hatchet.admin.run_workflow( - "update-files", {"request": workflow_input} + raw_message = await self.orchestration_provider.run_workflow( + "update-files", {"request": workflow_input}, {} ) - - return { - "message": "Update task queued successfully.", - "task_id": str(task_id), - "document_ids": workflow_input["document_ids"], - } + raw_message["message"] = "Update task queued successfully." + raw_message["document_ids"] = workflow_input["document_ids"] + return raw_message @staticmethod - def _validate_chunking_config(chunking_config): + def _validate_ingestion_config(ingestion_config): from ..assembly.factory import R2RProviderFactory - if chunking_config: - chunking_config.validate_config() - R2RProviderFactory.create_chunking_provider(chunking_config) + if ingestion_config: + R2RProviderFactory.create_ingestion_provider(ingestion_config) else: logger.info("No chunking config override provided. Using default.") diff --git a/py/core/main/api/kg_router.py b/py/core/main/api/kg_router.py new file mode 100644 index 000000000..bab4e5131 --- /dev/null +++ b/py/core/main/api/kg_router.py @@ -0,0 +1,133 @@ +import logging +from pathlib import Path +from typing import Optional + +import yaml +from fastapi import Body, Depends +from pydantic import Json + +from core.base.api.models import ( + WrappedKGCreationResponse, + WrappedKGEnrichmentResponse, +) +from core.base.providers import OrchestrationProvider, Workflow + +from ..services.kg_service import KgService +from .base_router import BaseRouter, RunType + +logger = logging.getLogger(__name__) + + +class KGRouter(BaseRouter): + def __init__( + self, + service: KgService, + orchestration_provider: Optional[OrchestrationProvider] = None, + run_type: RunType = RunType.KG, + ): + if not orchestration_provider: + raise ValueError("KGRouter requires an orchestration provider.") + super().__init__(service, orchestration_provider, run_type) + self.service: KgService = service + + def _load_openapi_extras(self): + yaml_path = Path(__file__).parent / "data" / "kg_router_openapi.yml" + with open(yaml_path, "r") as yaml_file: + yaml_content = yaml.safe_load(yaml_file) + return yaml_content + + def _register_workflows(self): + self.orchestration_provider.register_workflows( + Workflow.KG, + self.service, + { + "create-graph": "Graph creation task queued successfully.", + "enrich-graph": "Graph enrichment task queued successfully.", + }, + ) + + def _setup_routes(self): + @self.router.post( + "/create_graph", + ) + @self.base_endpoint + async def create_graph( + collection_id: str = Body( + description="Collection ID to create graph for.", + ), + kg_creation_settings: Optional[Json[dict]] = Body( + default=None, + description="Settings for the graph creation process.", + ), + auth_user=Depends(self.service.providers.auth.auth_wrapper), + response_model=WrappedKGCreationResponse, + ): + """ + Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings. If document IDs are not provided, the graph will be created on all documents in the system. + + This step extracts the relevant entities and relationships from the documents and creates a graph based on the extracted information. + + In order to do GraphRAG, you will need to run the enrich_graph endpoint. + """ + + if not auth_user.is_superuser: + logger.warning("Implement permission checks here.") + + server_kg_creation_settings = ( + self.service.providers.kg.config.kg_creation_settings + ) + if kg_creation_settings: + for key, value in kg_creation_settings.items(): + if value is not None: + setattr(server_kg_creation_settings, key, value) + + workflow_input = { + "collection_id": collection_id, + "kg_creation_settings": server_kg_creation_settings.model_dump_json(), + "user": auth_user.json(), + } + + return await self.orchestration_provider.run_workflow( + "create-graph", {"request": workflow_input}, {} + ) + + @self.router.post( + "/enrich_graph", + ) + @self.base_endpoint + async def enrich_graph( + collection_id: str = Body( + description="Collection name to enrich graph for.", + ), + kg_enrichment_settings: Optional[Json[dict]] = Body( + default=None, + description="Settings for the graph enrichment process.", + ), + auth_user=Depends(self.service.providers.auth.auth_wrapper), + response_model=WrappedKGEnrichmentResponse, + ): + """ + This endpoint enriches the graph with additional information. It creates communities of nodes based on their similarity and adds embeddings to the graph. This step is necessary for GraphRAG to work. + """ + + if not auth_user.is_superuser: + logger.warning("Implement permission checks here.") + + server_kg_enrichment_settings = ( + self.service.providers.kg.config.kg_enrichment_settings + ) + + if kg_enrichment_settings: + for key, value in kg_enrichment_settings.items(): + if value is not None: + setattr(server_kg_enrichment_settings, key, value) + + workflow_input = { + "collection_id": collection_id, + "kg_enrichment_settings": server_kg_enrichment_settings.model_dump_json(), + "user": auth_user.json(), + } + + return await self.orchestration_provider.run_workflow( + "enrich-graph", {"request": workflow_input}, {} + ) diff --git a/py/core/main/api/management_router.py b/py/core/main/api/management_router.py index cb37b0315..f3fb34fa5 100644 --- a/py/core/main/api/management_router.py +++ b/py/core/main/api/management_router.py @@ -41,19 +41,13 @@ class ManagementRouter(BaseRouter): def __init__( self, service: ManagementService, + orchestration_provider: OrchestrationProvider, run_type: RunType = RunType.MANAGEMENT, - orchestration_provider: Optional[OrchestrationProvider] = None, ): - super().__init__(service, run_type, orchestration_provider) + super().__init__(service, orchestration_provider, run_type) self.service: ManagementService = service # for type hinting self.start_time = datetime.now(timezone.utc) - def _register_workflows(self): - pass - - def _load_openapi_extras(self): - return {} - # TODO: remove this from the management route, it should be at the base of the server def _setup_routes(self): @self.router.get("/health") @@ -374,19 +368,21 @@ async def document_chunks_app( ): document_uuid = UUID(document_id) - document_chunks_result = await self.service.document_chunks( + document_chunks = await self.service.document_chunks( document_uuid, offset, limit ) + document_chunks_result = document_chunks["results"] + if not document_chunks_result: raise R2RException( "No chunks found for the given document ID.", 404, ) - is_owner = str( - document_chunks_result["results"][0].get("user_id") - ) == str(auth_user.id) + is_owner = str(document_chunks_result[0].get("user_id")) == str( + auth_user.id + ) if not is_owner and not auth_user.is_superuser: raise R2RException( @@ -394,30 +390,10 @@ async def document_chunks_app( 403, ) - return document_chunks_result["results"], { - "total_entries": document_chunks_result["total_entries"] + return document_chunks_result, { + "total_entries": document_chunks["total_entries"] } - @self.router.get("/inspect_knowledge_graph") - @self.base_endpoint - async def inspect_knowledge_graph( - offset: int = 0, - limit: int = 100, - print_descriptions: bool = False, - auth_user=Depends(self.service.providers.auth.auth_wrapper), - response_model=WrappedKnowledgeGraphResponse, - ): - if not auth_user.is_superuser: - raise R2RException( - "Only a superuser can call the `inspect_knowledge_graph` endpoint.", - 403, - ) - return await self.service.inspect_knowledge_graph( - offset=offset, - limit=limit, - print_descriptions=print_descriptions, - ) - @self.router.get("/collections_overview") @self.base_endpoint async def collections_overview_app( diff --git a/py/core/main/api/restructure_router.py b/py/core/main/api/restructure_router.py deleted file mode 100644 index d65ee80fb..000000000 --- a/py/core/main/api/restructure_router.py +++ /dev/null @@ -1,167 +0,0 @@ -import logging -from pathlib import Path -from typing import Optional - -import yaml -from fastapi import Body, Depends -from pydantic import Json - -from core.base import KGCreationSettings, KGEnrichmentSettings -from core.base.api.models import ( - WrappedKGCreationResponse, - WrappedKGEnrichmentResponse, -) -from core.base.providers import OrchestrationProvider - -from ...main.hatchet import r2r_hatchet -from ..hatchet import ( - CreateGraphWorkflow, - EnrichGraphWorkflow, - KGCommunitySummaryWorkflow, - KgExtractAndStoreWorkflow, -) -from ..services.restructure_service import RestructureService -from .base_router import BaseRouter, RunType - -logger = logging.getLogger(__name__) - - -class RestructureRouter(BaseRouter): - def __init__( - self, - service: RestructureService, - run_type: RunType = RunType.RESTRUCTURE, - orchestration_provider: Optional[OrchestrationProvider] = None, - ): - if not orchestration_provider: - raise ValueError( - "RestructureRouter requires an orchestration provider." - ) - super().__init__(service, run_type, orchestration_provider) - self.service: RestructureService = service - - def _load_openapi_extras(self): - yaml_path = ( - Path(__file__).parent / "data" / "restructure_router_openapi.yml" - ) - with open(yaml_path, "r") as yaml_file: - yaml_content = yaml.safe_load(yaml_file) - return yaml_content - - def _register_workflows(self): - self.orchestration_provider.register_workflow( - EnrichGraphWorkflow(self.service) - ) - self.orchestration_provider.register_workflow( - KgExtractAndStoreWorkflow(self.service) - ) - self.orchestration_provider.register_workflow( - CreateGraphWorkflow(self.service) - ) - self.orchestration_provider.register_workflow( - KGCommunitySummaryWorkflow(self.service) - ) - - def _setup_routes(self): - @self.router.post( - "/create_graph", - ) - @self.base_endpoint - async def create_graph( - document_ids: Optional[list[str]] = Body( - default=None, - description="List of document IDs to create the graph on.", - ), - kg_creation_settings: Optional[Json[KGCreationSettings]] = Body( - default=None, - description="Settings for the graph creation process.", - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - response_model=WrappedKGCreationResponse, - ): - """ - Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings. If document IDs are not provided, the graph will be created on all documents in the system. - - This step extracts the relevant entities and relationships from the documents and creates a graph based on the extracted information. You can view the graph through the neo4j browser. - - In order to do GraphRAG, you will need to run the enrich_graph endpoint. - """ - # Check if the user is a superuser - if not auth_user.is_superuser: - # Add any necessary permission checks here - pass - - if kg_creation_settings is None: - kg_creation_settings = ( - self.service.providers.kg.config.kg_creation_settings - ) - - workflow_input = { - "document_ids": document_ids, - "kg_creation_settings": kg_creation_settings.json(), - "user": auth_user.json(), - } - - task_id = r2r_hatchet.admin.run_workflow( - "create-graph", {"request": workflow_input} - ) - - return { - "message": f"Graph creation task queued successfully. Please check http:// for completion status.", - "task_id": str(task_id), - } - - @self.router.post( - "/enrich_graph", - ) - @self.base_endpoint - async def enrich_graph( - skip_clustering: bool = Body( - default=False, - description="Whether to skip leiden clustering on the graph or not.", - ), - force_enrichment: bool = Body( - default=False, - description="Force Enrichment step even if graph creation is still in progress for some documents.", - ), - kg_enrichment_settings: Optional[ - Json[KGEnrichmentSettings] - ] = Body( - default=None, - description="Settings for the graph enrichment process.", - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - response_model=WrappedKGEnrichmentResponse, - ): - """ - This endpoint enriches the graph with additional information. It creates communities of nodes based on their similarity and adds embeddings to the graph. This step is necessary for GraphRAG to work. - """ - # Check if the user is a superuser - if not auth_user.is_superuser: - # Add any necessary permission checks here - pass - - if kg_enrichment_settings is None: - kg_enrichment_settings = ( - self.service.providers.kg.config.kg_enrichment_settings - ) - - workflow_input = { - "skip_clustering": skip_clustering, - "force_enrichment": force_enrichment, - "generation_config": kg_enrichment_settings.generation_config.to_dict(), - "max_description_input_length": kg_enrichment_settings.max_description_input_length, - "max_summary_input_length": kg_enrichment_settings.max_summary_input_length, - "max_description_input_length": kg_enrichment_settings.max_description_input_length, - "leiden_params": kg_enrichment_settings.leiden_params, - "user": auth_user.json(), - } - - task_id = r2r_hatchet.admin.run_workflow( - "enrich-graph", {"request": workflow_input} - ) - - return { - "message": "Graph enrichment task queued successfully. Please check http:// for completion status.", - "task_id": str(task_id), - } diff --git a/py/core/main/api/retrieval_router.py b/py/core/main/api/retrieval_router.py index c84b4ae87..5c9a5edfb 100644 --- a/py/core/main/api/retrieval_router.py +++ b/py/core/main/api/retrieval_router.py @@ -1,6 +1,6 @@ import asyncio from pathlib import Path -from typing import Optional +from typing import Any, Optional import yaml from fastapi import Body, Depends @@ -29,10 +29,10 @@ class RetrievalRouter(BaseRouter): def __init__( self, service: RetrievalService, + orchestration_provider: OrchestrationProvider, run_type: RunType = RunType.RETRIEVAL, - orchestration_provider: Optional[OrchestrationProvider] = None, ): - super().__init__(service, run_type, orchestration_provider) + super().__init__(service, orchestration_provider, run_type) self.service: RetrievalService = service # for type hinting def _load_openapi_extras(self): @@ -46,6 +46,48 @@ def _load_openapi_extras(self): def _register_workflows(self): pass + def _select_filters( + self, auth_user: Any, vector_search_settings: VectorSearchSettings + ) -> dict[str, Any]: + selected_collections = { + str(cid) + for cid in set(vector_search_settings.selected_collection_ids) + } + + if auth_user.is_superuser: + if selected_collections: + # For superusers, we only filter by selected collections + filters = { + "collection_ids": {"$overlap": list(selected_collections)} + } + else: + filters = {} + else: + user_collections = set(auth_user.collection_ids) + + if selected_collections: + allowed_collections = user_collections.intersection( + selected_collections + ) + else: + allowed_collections = user_collections + # for non-superusers, we filter by user_id and selected & allowed collections + filters = { + "$or": [ + {"user_id": {"$eq": auth_user.id}}, + { + "collection_ids": { + "$overlap": list(allowed_collections) + } + }, + ] # type: ignore + } + + if vector_search_settings.filters != {}: + filters = {"$and": [filters, vector_search_settings.filters]} # type: ignore + + return filters + def _setup_routes(self): search_extras = self.openapi_extras.get("search", {}) search_descriptions = search_extras.get("input_descriptions", {}) @@ -78,35 +120,12 @@ async def search_app( Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. - """ - user_collections = set(auth_user.collection_ids) - selected_collections = set( - vector_search_settings.selected_collection_ids - ) - allowed_collections = user_collections.intersection( - selected_collections - ) - if selected_collections - allowed_collections != set(): - raise ValueError( - "User does not have access to the specified collection(s): " - f"{selected_collections - allowed_collections}" - ) - filters = { - "$or": [ - {"user_id": {"$eq": str(auth_user.id)}}, - { - "collection_ids": { - "$overlap": list(allowed_collections) - } - }, - ] - } - if vector_search_settings.filters != {}: - filters = {"$and": [filters, vector_search_settings.filters]} + vector_search_settings.filters = self._select_filters( + auth_user, vector_search_settings + ) - vector_search_settings.filters = filters results = await self.service.search( query=query, vector_search_settings=vector_search_settings, @@ -155,21 +174,10 @@ async def rag_app( The generation process can be customized using the rag_generation_config parameter. """ - allowed_collections = set(auth_user.collection_ids) - filters = { - "$or": [ - {"user_id": str(auth_user.id)}, - { - "collection_ids": { - "$overlap": list(allowed_collections) - } - }, - ] - } - if vector_search_settings.filters != {}: - filters = {"$and": [filters, vector_search_settings.filters]} - vector_search_settings.filters = filters + vector_search_settings.filters = self._select_filters( + auth_user, vector_search_settings + ) response = await self.service.rag( query=query, @@ -240,23 +248,10 @@ async def agent_app( The agent's behavior can be customized using the rag_generation_config and task_prompt_override parameters. """ - # TODO - Don't just copy paste the same code, refactor this - user = auth_user - allowed_collections = set(user.collection_ids) - filters = { - "$or": [ - {"user_id": str(user.id)}, - { - "collection_ids": { - "$overlap": list(allowed_collections) - } - }, - ] - } - if vector_search_settings.filters != {}: - filters = {"$and": [filters, vector_search_settings.filters]} - vector_search_settings.filters = filters + vector_search_settings.filters = self._select_filters( + auth_user, vector_search_settings + ) try: response = await self.service.agent( diff --git a/py/core/main/app.py b/py/core/main/app.py index be4eefbde..7ad558069 100644 --- a/py/core/main/app.py +++ b/py/core/main/app.py @@ -6,8 +6,8 @@ from .api.auth_router import AuthRouter from .api.ingestion_router import IngestionRouter +from .api.kg_router import KGRouter from .api.management_router import ManagementRouter -from .api.restructure_router import RestructureRouter from .api.retrieval_router import RetrievalRouter from .config import R2RConfig @@ -20,28 +20,27 @@ def __init__( auth_router: AuthRouter, ingestion_router: IngestionRouter, management_router: ManagementRouter, - retrieval_router: RestructureRouter, - restructure_router: RetrievalRouter, + retrieval_router: RetrievalRouter, + kg_router: KGRouter, ): self.config = config self.ingestion_router = ingestion_router self.management_router = management_router self.retrieval_router = retrieval_router self.auth_router = auth_router - self.restructure_router = restructure_router + self.kg_router = kg_router self.orchestration_provider = orchestration_provider self.app = FastAPI() self._setup_routes() self._apply_cors() def _setup_routes(self): - # Include routers in the app self.app.include_router(self.ingestion_router, prefix="/v2") self.app.include_router(self.management_router, prefix="/v2") self.app.include_router(self.retrieval_router, prefix="/v2") self.app.include_router(self.auth_router, prefix="/v2") - self.app.include_router(self.restructure_router, prefix="/v2") + self.app.include_router(self.kg_router, prefix="/v2") @self.app.get("/v2/openapi_spec") async def openapi_spec(): diff --git a/py/core/main/app_entry.py b/py/core/main/app_entry.py index 300f5b8f9..3b6a8a4d6 100644 --- a/py/core/main/app_entry.py +++ b/py/core/main/app_entry.py @@ -38,7 +38,7 @@ async def lifespan(app: FastAPI): yield - # Shutdown + # # Shutdown scheduler.shutdown() diff --git a/py/core/main/assembly/builder.py b/py/core/main/assembly/builder.py index be2e0e700..1d5a42cf8 100644 --- a/py/core/main/assembly/builder.py +++ b/py/core/main/assembly/builder.py @@ -20,15 +20,15 @@ from ..api.auth_router import AuthRouter from ..api.ingestion_router import IngestionRouter +from ..api.kg_router import KGRouter from ..api.management_router import ManagementRouter -from ..api.restructure_router import RestructureRouter from ..api.retrieval_router import RetrievalRouter from ..app import R2RApp from ..config import R2RConfig from ..services.auth_service import AuthService from ..services.ingestion_service import IngestionService +from ..services.kg_service import KgService from ..services.management_service import ManagementService -from ..services.restructure_service import RestructureService from ..services.retrieval_service import RetrievalService from .factory import ( R2RAgentFactory, @@ -45,9 +45,9 @@ class ProviderOverrides: auth: Optional[AuthProvider] = None database: Optional[DatabaseProvider] = None embedding: Optional[EmbeddingProvider] = None + kg: Optional[KGProvider] = None llm: Optional[CompletionProvider] = None prompt: Optional[PromptProvider] = None - kg: Optional[KGProvider] = None crypto: Optional[CryptoProvider] = None orchestration: Optional[OrchestrationProvider] = None @@ -56,15 +56,14 @@ class ProviderOverrides: class PipeOverrides: parsing: Optional[AsyncPipe] = None embedding: Optional[AsyncPipe] = None - vector_storage: Optional[AsyncPipe] = None - vector_search: Optional[AsyncPipe] = None rag: Optional[AsyncPipe] = None streaming_rag: Optional[AsyncPipe] = None + vector_storage: Optional[AsyncPipe] = None + vector_search: Optional[AsyncPipe] = None kg: Optional[AsyncPipe] = None kg_storage: Optional[AsyncPipe] = None kg_search: Optional[AsyncPipe] = None - kg_node_extraction: Optional[AsyncPipe] = None - kg_node_description: Optional[AsyncPipe] = None + kg_entity_description: Optional[AsyncPipe] = None kg_clustering: Optional[AsyncPipe] = None kg_community_summary: Optional[AsyncPipe] = None @@ -83,7 +82,7 @@ class ServiceOverrides: ingestion: Optional["IngestionService"] = None management: Optional["ManagementService"] = None retrieval: Optional["RetrievalService"] = None - restructure: Optional["RestructureService"] = None + kg: Optional["KgService"] = None class R2RBuilder: @@ -183,6 +182,7 @@ def _create_services( ) -> Dict[str, Any]: services = {} for service_type, override in vars(self.service_overrides).items(): + logger.info(f"Creating {service_type} service") service_class = globals()[f"{service_type.capitalize()}Service"] services[service_type] = override or service_class( **service_params @@ -233,19 +233,23 @@ async def build(self, *args, **kwargs) -> R2RApp: orchestration_provider = providers.orchestration routers = { - "auth_router": AuthRouter(services["auth"]).get_router(), + "auth_router": AuthRouter( + services["auth"], orchestration_provider=orchestration_provider + ).get_router(), "ingestion_router": IngestionRouter( services["ingestion"], orchestration_provider=orchestration_provider, ).get_router(), "management_router": ManagementRouter( - services["management"] + services["management"], + orchestration_provider=orchestration_provider, ).get_router(), "retrieval_router": RetrievalRouter( - services["retrieval"] + services["retrieval"], + orchestration_provider=orchestration_provider, ).get_router(), - "restructure_router": RestructureRouter( - services["restructure"], + "kg_router": KGRouter( + services["kg"], orchestration_provider=orchestration_provider, ).get_router(), } diff --git a/py/core/main/assembly/factory.py b/py/core/main/assembly/factory.py index 450f5a035..ec61dda16 100644 --- a/py/core/main/assembly/factory.py +++ b/py/core/main/assembly/factory.py @@ -7,8 +7,6 @@ AsyncPipe, AuthConfig, AuthProvider, - ChunkingConfig, - ChunkingProvider, CompletionConfig, CompletionProvider, CryptoConfig, @@ -19,9 +17,10 @@ EmbeddingProvider, FileConfig, FileProvider, + IngestionConfig, + IngestionProvider, KGProvider, - ParsingConfig, - ParsingProvider, + OrchestrationConfig, PromptConfig, PromptProvider, RunLoggingSingleton, @@ -80,66 +79,54 @@ def create_crypto_provider( ) @staticmethod - def create_parsing_provider( - parsing_config: ParsingConfig, *args, **kwargs - ) -> ParsingProvider: - if parsing_config.provider == "r2r": - from core.providers import R2RParsingProvider - - return R2RParsingProvider(parsing_config) - elif parsing_config.provider in [ - "unstructured_local", - "unstructured_api", - ]: - from core.providers import UnstructuredParsingProvider + def create_ingestion_provider( + ingestion_config: IngestionConfig, *args, **kwargs + ) -> IngestionProvider: + config_dict = ingestion_config.model_dump() + extra_fields = config_dict.pop("extra_fields", {}) - return UnstructuredParsingProvider( - parsing_config.provider == "unstructured_api", parsing_config - ) - else: - raise ValueError( - f"Parsing provider {parsing_config.provider} not supported" - ) + if ingestion_config.provider == "r2r": + from core.providers import R2RIngestionConfig, R2RIngestionProvider - @staticmethod - def create_chunking_provider( - chunking_config: ChunkingConfig, *args, **kwargs - ) -> ChunkingProvider: - chunking_config.validate_config() - if chunking_config.provider == "r2r": - from core.base import R2RChunkingConfig - from core.providers import R2RChunkingProvider - - chunking_config_r2r = R2RChunkingConfig( - **chunking_config.extra_fields + r2r_ingestion_config = R2RIngestionConfig( + **config_dict, **extra_fields ) - return R2RChunkingProvider(chunking_config_r2r) - elif chunking_config.provider in [ + return R2RIngestionProvider(r2r_ingestion_config) + elif ingestion_config.provider in [ "unstructured_local", "unstructured_api", ]: - from core.base import UnstructuredChunkingConfig - from core.providers import UnstructuredChunkingProvider + from core.providers import ( + UnstructuredIngestionConfig, + UnstructuredIngestionProvider, + ) + + unstructured_ingestion_config = UnstructuredIngestionConfig( + **config_dict, **extra_fields + ) - chunking_config_unst = UnstructuredChunkingConfig( - **chunking_config.extra_fields + return UnstructuredIngestionProvider( + unstructured_ingestion_config, ) - return UnstructuredChunkingProvider(chunking_config_unst) else: raise ValueError( - f"Chunking provider {chunking_config.provider} not supported" + f"Ingestion provider {ingestion_config.provider} not supported" ) @staticmethod - def create_orchestration_provider(*args, **kwargs): - from core.base.providers import OrchestrationConfig - from core.providers import HatchetOrchestrationProvider + def create_orchestration_provider( + config: OrchestrationConfig, *args, **kwargs + ): + if config.provider == "hatchet": + from core.providers import HatchetOrchestrationProvider - orchestration_provider = HatchetOrchestrationProvider( - OrchestrationConfig(provider="hatchet") - ) - orchestration_provider.get_worker("r2r-worker") - return orchestration_provider + orchestration_provider = HatchetOrchestrationProvider(config) + orchestration_provider.get_worker("r2r-worker") + return orchestration_provider + elif config.provider == "simple": + from core.providers import SimpleOrchestrationProvider + + return SimpleOrchestrationProvider(config) async def create_database_provider( self, @@ -265,11 +252,18 @@ async def create_prompt_provider( return prompt_provider @staticmethod - def create_kg_provider(kg_config, *args, **kwargs): - if kg_config.provider == "neo4j": - from core.providers import Neo4jKGProvider + async def create_kg_provider( + kg_config, database_provider, embedding_provider, *args, **kwargs + ): + if kg_config.provider == "postgres": + from core.providers import PostgresKGProvider + + provider = PostgresKGProvider( + kg_config, database_provider, embedding_provider + ) + await provider.initialize() + return provider - return Neo4jKGProvider(kg_config) elif kg_config.provider is None: return None else: @@ -279,21 +273,19 @@ def create_kg_provider(kg_config, *args, **kwargs): async def create_providers( self, - embedding_provider_override: Optional[EmbeddingProvider] = None, - llm_provider_override: Optional[CompletionProvider] = None, - prompt_provider_override: Optional[PromptProvider] = None, - kg_provider_override: Optional[KGProvider] = None, - crypto_provider_override: Optional[CryptoProvider] = None, auth_provider_override: Optional[AuthProvider] = None, + crypto_provider_override: Optional[CryptoProvider] = None, database_provider_override: Optional[DatabaseProvider] = None, - parsing_provider_override: Optional[ParsingProvider] = None, - chunking_config: Optional[ChunkingProvider] = None, + embedding_provider_override: Optional[EmbeddingProvider] = None, file_provider_override: Optional[FileProvider] = None, + ingestion_provider_override: Optional[IngestionProvider] = None, + kg_provider_override: Optional[KGProvider] = None, + llm_provider_override: Optional[CompletionProvider] = None, + prompt_provider_override: Optional[PromptProvider] = None, orchestration_provider_override: Optional[Any] = None, *args, **kwargs, ) -> R2RProviders: - embedding_provider = ( embedding_provider_override or self.create_embedding_provider( @@ -301,14 +293,16 @@ async def create_providers( ) ) - llm_provider = llm_provider_override or self.create_llm_provider( - self.config.completion, *args, **kwargs + ingestion_provider = ( + ingestion_provider_override + or self.create_ingestion_provider( + self.config.ingestion, *args, **kwargs + ) ) - kg_provider = kg_provider_override or self.create_kg_provider( - self.config.kg, *args, **kwargs + llm_provider = llm_provider_override or self.create_llm_provider( + self.config.completion, *args, **kwargs ) - crypto_provider = ( crypto_provider_override or self.create_crypto_provider(self.config.crypto, *args, **kwargs) @@ -321,6 +315,14 @@ async def create_providers( ) ) + kg_provider = kg_provider_override or await self.create_kg_provider( + self.config.kg, + database_provider, + embedding_provider, + *args, + **kwargs, + ) + auth_provider = ( auth_provider_override or await self.create_auth_provider( @@ -339,33 +341,21 @@ async def create_providers( ) ) - parsing_provider = ( - parsing_provider_override - or self.create_parsing_provider( - self.config.parsing, *args, **kwargs - ) - ) - - chunking_provider = chunking_config or self.create_chunking_provider( - self.config.chunking, *args, **kwargs - ) - file_provider = file_provider_override or await self.create_file_provider( self.config.file, database_provider, *args, **kwargs # type: ignore ) orchestration_provider = ( orchestration_provider_override - or self.create_orchestration_provider() + or self.create_orchestration_provider(self.config.orchestration) ) return R2RProviders( auth=auth_provider, - chunking=chunking_provider, database=database_provider, embedding=embedding_provider, + ingestion=ingestion_provider, llm=llm_provider, - parsing=parsing_provider, prompt=prompt_provider, kg=kg_provider, orchestration=orchestration_provider, @@ -382,33 +372,30 @@ def create_pipes( self, parsing_pipe_override: Optional[AsyncPipe] = None, embedding_pipe_override: Optional[AsyncPipe] = None, - kg_extraction_pipe_override: Optional[AsyncPipe] = None, + kg_triples_extraction_pipe_override: Optional[AsyncPipe] = None, kg_storage_pipe_override: Optional[AsyncPipe] = None, kg_search_pipe_override: Optional[AsyncPipe] = None, vector_storage_pipe_override: Optional[AsyncPipe] = None, vector_search_pipe_override: Optional[AsyncPipe] = None, rag_pipe_override: Optional[AsyncPipe] = None, streaming_rag_pipe_override: Optional[AsyncPipe] = None, - kg_node_extraction_pipe: Optional[AsyncPipe] = None, - kg_node_description_pipe: Optional[AsyncPipe] = None, + kg_entity_description_pipe: Optional[AsyncPipe] = None, kg_clustering_pipe: Optional[AsyncPipe] = None, kg_community_summary_pipe: Optional[AsyncPipe] = None, - chunking_pipe_override: Optional[AsyncPipe] = None, *args, **kwargs, ) -> R2RPipes: return R2RPipes( parsing_pipe=parsing_pipe_override or self.create_parsing_pipe( - self.config.parsing.excluded_parsers, - self.config.parsing.override_parsers, + self.config.ingestion.excluded_parsers, *args, **kwargs, ), embedding_pipe=embedding_pipe_override or self.create_embedding_pipe(*args, **kwargs), - kg_extraction_pipe=kg_extraction_pipe_override - or self.create_kg_extraction_pipe(*args, **kwargs), + kg_triples_extraction_pipe=kg_triples_extraction_pipe_override + or self.create_kg_triples_extraction_pipe(*args, **kwargs), kg_storage_pipe=kg_storage_pipe_override or self.create_kg_storage_pipe(*args, **kwargs), vector_storage_pipe=vector_storage_pipe_override @@ -421,35 +408,23 @@ def create_pipes( or self.create_rag_pipe(*args, **kwargs), streaming_rag_pipe=streaming_rag_pipe_override or self.create_rag_pipe(True, *args, **kwargs), - kg_node_extraction_pipe=kg_node_extraction_pipe - or self.create_kg_node_extraction_pipe(*args, **kwargs), - kg_node_description_pipe=kg_node_description_pipe - or self.create_kg_node_description_pipe(*args, **kwargs), + kg_entity_description_pipe=kg_entity_description_pipe + or self.create_kg_entity_description_pipe(*args, **kwargs), kg_clustering_pipe=kg_clustering_pipe or self.create_kg_clustering_pipe(*args, **kwargs), kg_community_summary_pipe=kg_community_summary_pipe or self.create_kg_community_summary_pipe(*args, **kwargs), - chunking_pipe=chunking_pipe_override - or self.create_chunking_pipe(*args, **kwargs), ) def create_parsing_pipe(self, *args, **kwargs) -> Any: from core.pipes import ParsingPipe return ParsingPipe( - parsing_provider=self.providers.parsing, + ingestion_provider=self.providers.ingestion, file_provider=self.providers.file, config=AsyncPipe.PipeConfig(name="parsing_pipe"), ) - def create_chunking_pipe(self, *args, **kwargs) -> Any: - from core.pipes import ChunkingPipe - - return ChunkingPipe( - chunking_provider=self.providers.chunking, - config=AsyncPipe.PipeConfig(name="chunking_pipe"), - ) - def create_embedding_pipe(self, *args, **kwargs) -> Any: if self.config.embedding.provider is None: return None @@ -550,7 +525,7 @@ def create_vector_search_pipe(self, *args, **kwargs) -> Any: config=AsyncPipe.PipeConfig(name="routing_search_pipe"), ) - def create_kg_extraction_pipe(self, *args, **kwargs) -> Any: + def create_kg_triples_extraction_pipe(self, *args, **kwargs) -> Any: if self.config.kg.provider is None: return None @@ -561,8 +536,7 @@ def create_kg_extraction_pipe(self, *args, **kwargs) -> Any: llm_provider=self.providers.llm, database_provider=self.providers.database, prompt_provider=self.providers.prompt, - chunking_provider=self.providers.chunking, - config=AsyncPipe.PipeConfig(name="kg_extraction_pipe"), + config=AsyncPipe.PipeConfig(name="kg_triples_extraction_pipe"), ) def create_kg_storage_pipe(self, *args, **kwargs) -> Any: @@ -573,7 +547,6 @@ def create_kg_storage_pipe(self, *args, **kwargs) -> Any: return KGStoragePipe( kg_provider=self.providers.kg, - embedding_provider=self.providers.embedding, config=AsyncPipe.PipeConfig(name="kg_storage_pipe"), ) @@ -615,25 +588,15 @@ def create_rag_pipe(self, stream: bool = False, *args, **kwargs) -> Any: ), ) - def create_kg_node_extraction_pipe(self, *args, **kwargs) -> Any: - from core.pipes import KGNodeExtractionPipe - - return KGNodeExtractionPipe( - kg_provider=self.providers.kg, - llm_provider=self.providers.llm, - prompt_provider=self.providers.prompt, - config=AsyncPipe.PipeConfig(name="kg_node_extraction_pipe"), - ) - - def create_kg_node_description_pipe(self, *args, **kwargs) -> Any: - from core.pipes import KGNodeDescriptionPipe + def create_kg_entity_description_pipe(self, *args, **kwargs) -> Any: + from core.pipes import KGEntityDescriptionPipe - return KGNodeDescriptionPipe( + return KGEntityDescriptionPipe( kg_provider=self.providers.kg, llm_provider=self.providers.llm, prompt_provider=self.providers.prompt, embedding_provider=self.providers.embedding, - config=AsyncPipe.PipeConfig(name="kg_node_description_pipe"), + config=AsyncPipe.PipeConfig(name="kg_entity_description_pipe"), ) def create_kg_clustering_pipe(self, *args, **kwargs) -> Any: @@ -680,7 +643,7 @@ def create_search_pipeline(self, *args, **kwargs) -> SearchPipeline: # Add KG pipes if provider is set if self.config.kg.provider is not None: search_pipeline.add_pipe( - self.pipes.kg_search_pipe, kg_extraction_pipe=True + self.pipes.kg_search_pipe, kg_triples_extraction_pipe=True ) return search_pipeline diff --git a/py/core/main/config.py b/py/core/main/config.py index 8069b660f..37798af1c 100644 --- a/py/core/main/config.py +++ b/py/core/main/config.py @@ -11,14 +11,14 @@ from ..base.agent.agent import AgentConfig from ..base.logging.run_logger import LoggingConfig from ..base.providers.auth import AuthConfig -from ..base.providers.chunking import ChunkingConfig from ..base.providers.crypto import CryptoConfig from ..base.providers.database import DatabaseConfig from ..base.providers.embedding import EmbeddingConfig from ..base.providers.file import FileConfig +from ..base.providers.ingestion import IngestionConfig from ..base.providers.kg import KGConfig from ..base.providers.llm import CompletionConfig -from ..base.providers.parsing import ParsingConfig +from ..base.providers.orchestration import OrchestrationConfig from ..base.providers.prompt import PromptConfig logger = logging.getLogger(__name__) @@ -40,6 +40,7 @@ class R2RConfig: CONFIG_OPTIONS["default"] = None REQUIRED_KEYS: dict[str, list] = { + "completion": ["provider"], "crypto": ["provider"], "auth": ["provider"], "embedding": [ @@ -49,32 +50,32 @@ class R2RConfig: "batch_size", "add_title_as_prefix", ], + "ingestion": ["provider"], "kg": [ "provider", "batch_size", "kg_enrichment_settings", ], - "parsing": ["provider", "excluded_parsers"], - "chunking": ["provider"], - "completion": ["provider"], "logging": ["provider", "log_table"], "prompt": ["provider"], "database": ["provider"], "agent": ["generation_config"], "file": ["provider"], + "orchestration": ["provider"], } + auth: AuthConfig - chunking: ChunkingConfig completion: CompletionConfig crypto: CryptoConfig database: DatabaseConfig embedding: EmbeddingConfig + ingestion: IngestionConfig kg: KGConfig logging: LoggingConfig - parsing: ParsingConfig prompt: PromptConfig agent: AgentConfig file: FileConfig + orchestration: OrchestrationConfig def __init__( self, config_data: dict[str, Any], base_path: Optional[Path] = None @@ -124,18 +125,16 @@ def __init__( ) self.auth = AuthConfig.create(**self.auth) # type: ignore - self.chunking = ChunkingConfig.create(**self.chunking) # type: ignore self.crypto = CryptoConfig.create(**self.crypto) # type: ignore self.database = DatabaseConfig.create(**self.database) # type: ignore self.embedding = EmbeddingConfig.create(**self.embedding) # type: ignore + self.ingestion = IngestionConfig.create(**self.ingestion) # type: ignore self.kg = KGConfig.create(**self.kg) # type: ignore self.logging = LoggingConfig.create(**self.logging) # type: ignore - if "chunking_config" not in self.parsing: # type: ignore - self.parsing["chunking_config"] = self.chunking # type: ignore - self.parsing = ParsingConfig.create(**self.parsing) # type: ignore self.prompt = PromptConfig.create(**self.prompt) # type: ignore self.agent = AgentConfig.create(**self.agent) # type: ignore self.file = FileConfig.create(**self.file) # type: ignore + self.orchestration = OrchestrationConfig.create(**self.orchestration) # type: ignore def _validate_config_section( self, config_data: dict[str, Any], section: str, keys: list @@ -167,19 +166,6 @@ def to_toml(self): } return toml.dumps(config_data) - def save_to_redis(self, redis_client: Any, key: str): - redis_client.set(f"R2RConfig:{key}", self.to_toml()) - - @classmethod - def load_from_redis(cls, redis_client: Any, key: str) -> "R2RConfig": - config_data = redis_client.get(f"R2RConfig:{key}") - if config_data is None: - raise ValueError( - f"Configuration not found in Redis with key '{key}'" - ) - config_data = toml.loads(config_data) - return cls(config_data) - @classmethod def load_default_config(cls) -> dict: with open(R2RConfig.default_config_path) as f: diff --git a/py/core/main/hatchet/__init__.py b/py/core/main/hatchet/__init__.py deleted file mode 100644 index b63ef5de4..000000000 --- a/py/core/main/hatchet/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -from .base import r2r_hatchet -from .ingestion_workflow import IngestFilesWorkflow, UpdateFilesWorkflow -from .restructure_workflow import ( - CreateGraphWorkflow, - EnrichGraphWorkflow, - KGCommunitySummaryWorkflow, - KgExtractAndStoreWorkflow, -) - -__all__ = [ - "r2r_hatchet", - "IngestFilesWorkflow", - "UpdateFilesWorkflow", - "EnrichGraphWorkflow", - "CreateGraphWorkflow", - "KgExtractAndStoreWorkflow", - "KGCommunitySummaryWorkflow", -] diff --git a/py/core/main/hatchet/base.py b/py/core/main/hatchet/base.py deleted file mode 100644 index d38a51d52..000000000 --- a/py/core/main/hatchet/base.py +++ /dev/null @@ -1,6 +0,0 @@ -from hatchet_sdk import Hatchet - -try: - r2r_hatchet = Hatchet() -except ImportError: - r2r_hatchet = None diff --git a/py/core/main/hatchet/ingestion_workflow.py b/py/core/main/hatchet/ingestion_workflow.py deleted file mode 100644 index 107a6cea5..000000000 --- a/py/core/main/hatchet/ingestion_workflow.py +++ /dev/null @@ -1,278 +0,0 @@ -import asyncio -import logging - -from hatchet_sdk import Context - -from core.base import IngestionStatus, increment_version -from core.base.abstractions import DocumentInfo, R2RException - -from ..services import IngestionService, IngestionServiceAdapter -from .base import r2r_hatchet - -logger = logging.getLogger(__name__) - - -@r2r_hatchet.workflow( - name="ingest-file", - timeout="60m", -) -class IngestFilesWorkflow: - def __init__(self, ingestion_service: IngestionService): - self.ingestion_service = ingestion_service - - @r2r_hatchet.step(timeout="60m") - async def parse(self, context: Context) -> dict: - input_data = context.workflow_input()["request"] - print("input_data = ", input_data) - parsed_data = IngestionServiceAdapter.parse_ingest_file_input( - input_data - ) - - ingestion_result = await self.ingestion_service.ingest_file_ingress( - **parsed_data - ) - - document_info = ingestion_result["info"] - - await self.ingestion_service.update_document_status( - document_info, - status=IngestionStatus.PARSING, - ) - - extractions_generator = await self.ingestion_service.parse_file( - document_info - ) - - extractions = [] - async for extraction in extractions_generator: - extractions.append(extraction) - - serializable_extractions = [ - fragment.to_dict() for fragment in extractions - ] - - return { - "status": "Successfully extracted data", - "extractions": serializable_extractions, - "document_info": document_info.to_dict(), - } - - @r2r_hatchet.step(parents=["parse"], timeout="60m") - async def chunk(self, context: Context) -> dict: - document_info_dict = context.step_output("parse")["document_info"] - document_info = DocumentInfo(**document_info_dict) - - await self.ingestion_service.update_document_status( - document_info, - status=IngestionStatus.CHUNKING, - ) - - extractions = context.step_output("parse")["extractions"] - chunking_config = context.workflow_input()["request"].get( - "chunking_config" - ) - - chunk_generator = await self.ingestion_service.chunk_document( - extractions, - chunking_config, - ) - - chunks = [] - async for chunk in chunk_generator: - chunks.append(chunk) - - serializable_chunks = [chunk.to_dict() for chunk in chunks] - - return { - "status": "Successfully chunked data", - "chunks": serializable_chunks, - "document_info": document_info.to_dict(), - } - - @r2r_hatchet.step(parents=["chunk"], timeout="60m") - async def embed(self, context: Context) -> dict: - document_info_dict = context.step_output("chunk")["document_info"] - document_info = DocumentInfo(**document_info_dict) - - await self.ingestion_service.update_document_status( - document_info, - status=IngestionStatus.EMBEDDING, - ) - - chunks = context.step_output("chunk")["chunks"] - - embedding_generator = await self.ingestion_service.embed_document( - chunks - ) - - embeddings = [] - async for embedding in embedding_generator: - embeddings.append(embedding) - - await self.ingestion_service.update_document_status( - document_info, - status=IngestionStatus.STORING, - ) - - storage_generator = await self.ingestion_service.store_embeddings( # type: ignore - embeddings - ) - - async for _ in storage_generator: - pass - - return { - "document_info": document_info.to_dict(), - } - - @r2r_hatchet.step(parents=["embed"], timeout="60m") - async def finalize(self, context: Context) -> dict: - document_info_dict = context.step_output("embed")["document_info"] - document_info = DocumentInfo(**document_info_dict) - - is_update = context.workflow_input()["request"].get("is_update") - - await self.ingestion_service.finalize_ingestion( - document_info, is_update=is_update - ) - - await self.ingestion_service.update_document_status( - document_info, - status=IngestionStatus.SUCCESS, - ) - - return { - "status": "Successfully finalized ingestion", - "document_info": document_info.to_dict(), - } - - @r2r_hatchet.on_failure_step() - async def on_failure(self, context: Context) -> None: - request = context.workflow_input().get("request", {}) - document_id = request.get("document_id") - - if not document_id: - logger.error( - "No document id was found in workflow input to mark a failure." - ) - return - - try: - documents_overview = await self.ingestion_service.providers.database.relational.get_documents_overview( - filter_document_ids=[document_id] - ) - - if not documents_overview: - logger.error( - f"Document with id {document_id} not found in database to mark failure." - ) - return - - document_info = documents_overview[0] - - # Update the document status to FAILURE - await self.ingestion_service.update_document_status( - document_info, - status=IngestionStatus.FAILURE, - ) - - except Exception as e: - logger.error( - f"Failed to update document status for {document_id}: {e}" - ) - - -# TODO: Implement a check to see if the file is actually changed before updating -@r2r_hatchet.workflow(name="update-files", timeout="60m") -class UpdateFilesWorkflow: - def __init__(self, ingestion_service: IngestionService): - self.ingestion_service = ingestion_service - - @r2r_hatchet.step(retries=0, timeout="60m") - async def update_files(self, context: Context) -> None: - data = context.workflow_input()["request"] - parsed_data = IngestionServiceAdapter.parse_update_files_input(data) - - file_datas = parsed_data["file_datas"] - user = parsed_data["user"] - document_ids = parsed_data["document_ids"] - metadatas = parsed_data["metadatas"] - chunking_config = parsed_data["chunking_config"] - file_sizes_in_bytes = parsed_data["file_sizes_in_bytes"] - - if not file_datas: - raise R2RException( - status_code=400, message="No files provided for update." - ) - if len(document_ids) != len(file_datas): - raise R2RException( - status_code=400, - message="Number of ids does not match number of files.", - ) - - documents_overview = ( - await self.ingestion_service.providers.database.relational.get_documents_overview( - filter_document_ids=document_ids, - filter_user_ids=None if user.is_superuser else [user.id], - ) - )["results"] - if len(documents_overview) != len(document_ids): - raise R2RException( - status_code=404, - message="One or more documents not found.", - ) - - results = [] - - for idx, ( - file_data, - doc_id, - doc_info, - file_size_in_bytes, - ) in enumerate( - zip( - file_datas, - document_ids, - documents_overview, - file_sizes_in_bytes, - ) - ): - new_version = increment_version(doc_info.version) - - updated_metadata = ( - metadatas[idx] if metadatas else doc_info.metadata - ) - updated_metadata["title"] = ( - updated_metadata.get("title") - or file_data["filename"].split("/")[-1] - ) - - # Prepare input for ingest_file workflow - ingest_input = { - "file_data": file_data, - "user": data.get("user"), - "metadata": updated_metadata, - "document_id": str(doc_id), - "version": new_version, - "chunking_config": ( - chunking_config.model_dump_json() - if chunking_config - else None - ), - "size_in_bytes": file_size_in_bytes, - "is_update": True, - } - - # Spawn ingest_file workflow as a child workflow - child_result = ( - await context.aio.spawn_workflow( - "ingest-file", - {"request": ingest_input}, - key=f"ingest_file_{doc_id}", - ) - ).result() - results.append(child_result) - - await asyncio.gather(*results) - - return None diff --git a/py/core/main/hatchet/restructure_workflow.py b/py/core/main/hatchet/restructure_workflow.py deleted file mode 100644 index 539624e19..000000000 --- a/py/core/main/hatchet/restructure_workflow.py +++ /dev/null @@ -1,350 +0,0 @@ -import asyncio -import json -import logging -import uuid - -from hatchet_sdk import ConcurrencyLimitStrategy, Context - -from core import GenerationConfig, IngestionStatus, KGCreationSettings -from core.base import R2RDocumentProcessingError -from core.base.abstractions import RestructureStatus - -from ..services import RestructureService -from .base import r2r_hatchet - -logger = logging.getLogger(__name__) - - -@r2r_hatchet.workflow(name="kg-extract-and-store", timeout="60m") -class KgExtractAndStoreWorkflow: - def __init__(self, restructure_service: RestructureService): - self.restructure_service = restructure_service - - @r2r_hatchet.step(retries=3, timeout="60m") - async def kg_extract_and_store(self, context: Context) -> dict: - input_data = context.workflow_input()["request"] - document_id = uuid.UUID(input_data["document_id"]) - fragment_merge_count = input_data["fragment_merge_count"] - max_knowledge_triples = input_data["max_knowledge_triples"] - entity_types = input_data["entity_types"] - relation_types = input_data["relation_types"] - - document_overview = await self.restructure_service.providers.database.relational.get_documents_overview( - filter_document_ids=[document_id] - ) - document_overview = document_overview["results"][0] - - try: - - # Set restructure status to 'processing' - document_overview.restructuring_status = ( - RestructureStatus.PROCESSING - ) - - await self.restructure_service.providers.database.relational.upsert_documents_overview( - document_overview - ) - - errors = await self.restructure_service.kg_extract_and_store( - document_id=document_id, - generation_config=GenerationConfig( - **input_data["generation_config"] - ), - fragment_merge_count=fragment_merge_count, - max_knowledge_triples=max_knowledge_triples, - entity_types=entity_types, - relation_types=relation_types, - ) - # Set restructure status to 'success' if completed successfully - if len(errors) == 0: - document_overview.restructuring_status = ( - RestructureStatus.SUCCESS - ) - await self.restructure_service.providers.database.relational.upsert_documents_overview( - document_overview - ) - else: - - document_overview.restructuring_status = ( - RestructureStatus.FAILURE - ) - await self.restructure_service.providers.database.relational.upsert_documents_overview( - document_overview - ) - raise R2RDocumentProcessingError( - error_message=f"Error in kg_extract_and_store, list of errors: {errors}", - document_id=document_id, - ) - - except Exception as e: - # Set restructure status to 'failure' if an error occurred - document_overview.restructuring_status = RestructureStatus.FAILURE - await self.restructure_service.providers.database.relational.upsert_documents_overview( - document_overview - ) - raise R2RDocumentProcessingError( - error_message=e, - document_id=document_id, - ) - - return {"result": None} - - -@r2r_hatchet.workflow(name="create-graph", timeout="60m") -class CreateGraphWorkflow: - def __init__(self, restructure_service: RestructureService): - self.restructure_service = restructure_service - - @r2r_hatchet.step(retries=1) - async def kg_extraction_ingress(self, context: Context) -> dict: - input_data = context.workflow_input()["request"] - - kg_creation_settings = KGCreationSettings( - **json.loads(input_data["kg_creation_settings"]) - ) - - documents_overview = ( - await self.restructure_service.providers.database.relational.get_documents_overview() - ) - documents_overview = documents_overview["results"] - - document_ids = [ - doc.id - for doc in documents_overview - if doc.restructuring_status != IngestionStatus.SUCCESS - ] - - document_ids = [str(doc_id) for doc_id in document_ids] - - documents_overviews = await self.restructure_service.providers.database.relational.get_documents_overview( - filter_document_ids=document_ids - ) - documents_overviews = documents_overviews["results"] - - # Only run if restructuring_status is pending or failure - filtered_document_ids = [] - for document_overview in documents_overviews: - restructuring_status = document_overview.restructuring_status - if restructuring_status in [ - RestructureStatus.PENDING, - RestructureStatus.FAILURE, - RestructureStatus.ENRICHMENT_FAILURE, - ]: - filtered_document_ids.append(document_overview.id) - elif restructuring_status == RestructureStatus.SUCCESS: - logger.warning( - f"Graph already created for document ID: {document_overview.id}" - ) - elif restructuring_status == RestructureStatus.PROCESSING: - logger.warning( - f"Graph creation is already in progress for document ID: {document_overview.id}" - ) - elif restructuring_status == RestructureStatus.ENRICHED: - logger.warning( - f"Graph is already enriched for document ID: {document_overview.id}" - ) - else: - logger.warning( - f"Unknown restructuring status for document ID: {document_overview.id}" - ) - - results = [] - for document_id in filtered_document_ids: - logger.info( - f"Running Graph Creation Workflow for document ID: {document_id}" - ) - results.append( - ( - context.aio.spawn_workflow( - "kg-extract-and-store", - { - "request": { - "document_id": str(document_id), - "fragment_merge_count": kg_creation_settings.fragment_merge_count, - "max_knowledge_triples": kg_creation_settings.max_knowledge_triples, - "generation_config": kg_creation_settings.generation_config.to_dict(), - "entity_types": kg_creation_settings.entity_types, - "relation_types": kg_creation_settings.relation_types, - } - }, - key=f"kg-extract-and-store_{document_id}", - ) - ) - ) - - if not filtered_document_ids: - logger.info( - "No documents to process, either all graphs were created or in progress, or no documents were provided. Skipping graph creation." - ) - return {"result": "success"} - - logger.info(f"Ran {len(results)} workflows for graph creation") - results = await asyncio.gather(*results) - return {"result": "success"} - - -@r2r_hatchet.workflow(name="enrich-graph", timeout="60m") -class EnrichGraphWorkflow: - def __init__(self, restructure_service: RestructureService): - self.restructure_service = restructure_service - - @r2r_hatchet.step(retries=3, timeout="60m") - async def kg_node_creation(self, context: Context) -> dict: - input_data = context.workflow_input()["request"] - max_description_input_length = input_data[ - "max_description_input_length" - ] - await self.restructure_service.kg_node_creation( - max_description_input_length=max_description_input_length - ) - return {"result": None} - - @r2r_hatchet.step(retries=3, parents=["kg_node_creation"], timeout="60m") - async def kg_clustering(self, context: Context) -> dict: - input_data = context.workflow_input()["request"] - skip_clustering = input_data["skip_clustering"] - force_enrichment = input_data["force_enrichment"] - leiden_params = input_data["leiden_params"] - max_summary_input_length = input_data["max_summary_input_length"] - generation_config = GenerationConfig(**input_data["generation_config"]) - - # todo: check if documets are already being clustered - # check if any documents are still being restructured, need to explicitly set the force_clustering flag to true to run clustering if documents are still being restructured - - documents_overview = ( - await self.restructure_service.providers.database.relational.get_documents_overview() - ) - documents_overview = documents_overview["results"] - - if not force_enrichment: - if any( - document_overview.restructuring_status - == RestructureStatus.PROCESSING - for document_overview in documents_overview - ): - logger.error( - "Graph creation is still in progress for some documents, skipping enrichment, please set force_enrichment to true if you want to run enrichment anyway" - ) - return {"result": None} - - if any( - document_overview.restructuring_status - == RestructureStatus.ENRICHING - for document_overview in documents_overview - ): - logger.error( - "Graph enrichment is still in progress for some documents, skipping enrichment, please set force_enrichment to true if you want to run enrichment anyway" - ) - return {"result": None} - - for document_overview in documents_overview: - if document_overview.restructuring_status in [ - RestructureStatus.SUCCESS, - RestructureStatus.ENRICHMENT_FAILURE, - ]: - document_overview.restructuring_status = ( - RestructureStatus.ENRICHING - ) - - await self.restructure_service.providers.database.relational.upsert_documents_overview( - documents_overview - ) - - try: - if not skip_clustering: - results = await self.restructure_service.kg_clustering( - leiden_params, generation_config - ) - - result = results[0] - - # Run community summary workflows - workflows = [] - for level, community_id in result["intermediate_communities"]: - logger.info( - f"Running KG Community Summary Workflow for community ID: {community_id} at level {level}" - ) - workflows.append( - context.aio.spawn_workflow( - "kg-community-summary", - { - "request": { - "community_id": str(community_id), - "level": level, - "generation_config": generation_config.to_dict(), - "max_summary_input_length": max_summary_input_length, - } - }, - key=f"kg-community-summary_{community_id}_{level}", - ) - ) - - results = await asyncio.gather(*workflows) - else: - logger.info( - "Skipping Leiden clustering as skip_clustering is True, also skipping community summary workflows" - ) - return {"result": None} - - except Exception as e: - logger.error(f"Error in kg_clustering: {str(e)}", exc_info=True) - documents_overview = ( - await self.restructure_service.providers.database.relational.get_documents_overview() - ) - documents_overview = documents_overview["results"] - for document_overview in documents_overview: - if ( - document_overview.restructuring_status - == RestructureStatus.ENRICHING - ): - document_overview.restructuring_status = ( - RestructureStatus.ENRICHMENT_FAILURE - ) - await self.restructure_service.providers.database.relational.upsert_documents_overview( - document_overview - ) - logger.error( - f"Error in kg_clustering for document {document_overview.id}: {str(e)}" - ) - raise e - - finally: - - documents_overview = ( - await self.restructure_service.providers.database.relational.get_documents_overview() - ) - documents_overview = documents_overview["results"] - for document_overview in documents_overview: - if ( - document_overview.restructuring_status - == RestructureStatus.ENRICHING - ): - document_overview.restructuring_status = ( - RestructureStatus.ENRICHED - ) - - await self.restructure_service.providers.database.relational.upsert_documents_overview( - documents_overview - ) - return {"result": None} - - -@r2r_hatchet.workflow(name="kg-community-summary", timeout="60m") -class KGCommunitySummaryWorkflow: - def __init__(self, restructure_service: RestructureService): - self.restructure_service = restructure_service - - @r2r_hatchet.step(retries=1, timeout="60m") - async def kg_community_summary(self, context: Context) -> dict: - input_data = context.workflow_input()["request"] - community_id = input_data["community_id"] - level = input_data["level"] - generation_config = GenerationConfig(**input_data["generation_config"]) - max_summary_input_length = input_data["max_summary_input_length"] - await self.restructure_service.kg_community_summary( - community_id=community_id, - level=level, - max_summary_input_length=max_summary_input_length, - generation_config=generation_config, - ) - return {"result": None} diff --git a/py/core/main/orchestration/__init__.py b/py/core/main/orchestration/__init__.py new file mode 100644 index 000000000..c75d17b8d --- /dev/null +++ b/py/core/main/orchestration/__init__.py @@ -0,0 +1,11 @@ +from .hatchet.ingestion_workflow import hatchet_ingestion_factory +from .hatchet.kg_workflow import hatchet_kg_factory +from .simple.ingestion_workflow import simple_ingestion_factory +from .simple.kg_workflow import simple_kg_factory + +__all__ = [ + "hatchet_ingestion_factory", + "hatchet_kg_factory", + "simple_ingestion_factory", + "simple_kg_factory", +] diff --git a/py/shared/api/models/restructure/__init__.py b/py/core/main/orchestration/hatchet/__init__.py similarity index 100% rename from py/shared/api/models/restructure/__init__.py rename to py/core/main/orchestration/hatchet/__init__.py diff --git a/py/core/main/orchestration/hatchet/ingestion_workflow.py b/py/core/main/orchestration/hatchet/ingestion_workflow.py new file mode 100644 index 000000000..8199f352a --- /dev/null +++ b/py/core/main/orchestration/hatchet/ingestion_workflow.py @@ -0,0 +1,284 @@ +import asyncio +import logging +from typing import TYPE_CHECKING + +from hatchet_sdk import Context + +from core.base import ( + IngestionStatus, + OrchestrationProvider, + generate_id_from_label, + increment_version, +) +from core.base.abstractions import DocumentInfo, R2RException + +from ...services import IngestionService, IngestionServiceAdapter + +if TYPE_CHECKING: + from hatchet_sdk import Hatchet + +logger = logging.getLogger(__name__) + + +def hatchet_ingestion_factory( + orchestration_provider: OrchestrationProvider, service: IngestionService +) -> dict[str, "Hatchet.Workflow"]: + @orchestration_provider.workflow( + name="ingest-file-changed", + timeout="60m", + ) + class HatchetIngestFilesWorkflow: + def __init__(self, ingestion_service: IngestionService): + self.ingestion_service = ingestion_service + + @orchestration_provider.step(timeout="60m") + async def parse(self, context: Context) -> dict: + input_data = context.workflow_input()["request"] + parsed_data = IngestionServiceAdapter.parse_ingest_file_input( + input_data + ) + + ingestion_result = ( + await self.ingestion_service.ingest_file_ingress(**parsed_data) + ) + + document_info = ingestion_result["info"] + + await self.ingestion_service.update_document_status( + document_info, + status=IngestionStatus.PARSING, + ) + + ingestion_config = parsed_data["ingestion_config"] or {} + extractions_generator = await self.ingestion_service.parse_file( + document_info, ingestion_config + ) + + extractions = [] + async for extraction in extractions_generator: + extractions.append(extraction) + + serializable_extractions = [ + extraction.to_dict() for extraction in extractions + ] + + return { + "status": "Successfully extracted data", + "extractions": serializable_extractions, + "document_info": document_info.to_dict(), + } + + @orchestration_provider.step(parents=["parse"], timeout="60m") + async def embed(self, context: Context) -> dict: + document_info_dict = context.step_output("parse")["document_info"] + document_info = DocumentInfo(**document_info_dict) + + await self.ingestion_service.update_document_status( + document_info, + status=IngestionStatus.EMBEDDING, + ) + + extractions = context.step_output("parse")["extractions"] + + embedding_generator = await self.ingestion_service.embed_document( + extractions + ) + + embeddings = [] + async for embedding in embedding_generator: + embeddings.append(embedding) + + await self.ingestion_service.update_document_status( + document_info, + status=IngestionStatus.STORING, + ) + + storage_generator = await self.ingestion_service.store_embeddings( # type: ignore + embeddings + ) + + async for _ in storage_generator: + pass + + # return { + # "document_info": document_info.to_dict(), + # } + + # @orchestration_provider.step(parents=["embed"], timeout="60m") + # async def finalize(self, context: Context) -> dict: + # document_info_dict = context.step_output("embed")["document_info"] + # print("Calling finalize for document_info_dict = ", document_info_dict) + # document_info = DocumentInfo(**document_info_dict) + + is_update = context.workflow_input()["request"].get("is_update") + + await self.ingestion_service.finalize_ingestion( + document_info, is_update=is_update + ) + + await self.ingestion_service.update_document_status( + document_info, + status=IngestionStatus.SUCCESS, + ) + + collection_id = await service.providers.database.relational.assign_document_to_collection( + document_id=document_info.id, + collection_id=generate_id_from_label( + str(document_info.user_id) + ), + ) + + service.providers.database.vector.assign_document_to_collection( + document_id=document_info.id, collection_id=collection_id + ) + + return { + "status": "Successfully finalized ingestion", + "document_info": document_info.to_dict(), + } + + @orchestration_provider.failure() + async def on_failure(self, context: Context) -> None: + request = context.workflow_input().get("request", {}) + document_id = request.get("document_id") + + if not document_id: + logger.error( + "No document id was found in workflow input to mark a failure." + ) + return + + try: + documents_overview = ( + await self.ingestion_service.providers.database.relational.get_documents_overview( + filter_document_ids=[document_id] + ) + )["results"] + + if not documents_overview: + logger.error( + f"Document with id {document_id} not found in database to mark failure." + ) + return + + document_info = documents_overview[0] + + # Update the document status to FAILED + if ( + not document_info.ingestion_status + == IngestionStatus.SUCCESS + ): + await self.ingestion_service.update_document_status( + document_info, + status=IngestionStatus.FAILED, + ) + + except Exception as e: + logger.error( + f"Failed to update document status for {document_id}: {e}" + ) + + # TODO: Implement a check to see if the file is actually changed before updating + @orchestration_provider.workflow(name="update-files", timeout="60m") + class HatchetUpdateFilesWorkflow: + def __init__(self, ingestion_service: IngestionService): + self.ingestion_service = ingestion_service + + @orchestration_provider.step(retries=0, timeout="60m") + async def update_files(self, context: Context) -> None: + data = context.workflow_input()["request"] + parsed_data = IngestionServiceAdapter.parse_update_files_input( + data + ) + + file_datas = parsed_data["file_datas"] + user = parsed_data["user"] + document_ids = parsed_data["document_ids"] + metadatas = parsed_data["metadatas"] + ingestion_config = parsed_data["ingestion_config"] + file_sizes_in_bytes = parsed_data["file_sizes_in_bytes"] + + if not file_datas: + raise R2RException( + status_code=400, message="No files provided for update." + ) + if len(document_ids) != len(file_datas): + raise R2RException( + status_code=400, + message="Number of ids does not match number of files.", + ) + + documents_overview = ( + await self.ingestion_service.providers.database.relational.get_documents_overview( + filter_document_ids=document_ids, + filter_user_ids=None if user.is_superuser else [user.id], + ) + )["results"] + + if len(documents_overview) != len(document_ids): + raise R2RException( + status_code=404, + message="One or more documents not found.", + ) + + results = [] + + for idx, ( + file_data, + doc_id, + doc_info, + file_size_in_bytes, + ) in enumerate( + zip( + file_datas, + document_ids, + documents_overview, + file_sizes_in_bytes, + ) + ): + new_version = increment_version(doc_info.version) + + updated_metadata = ( + metadatas[idx] if metadatas else doc_info.metadata + ) + updated_metadata["title"] = ( + updated_metadata.get("title") + or file_data["filename"].split("/")[-1] + ) + + # Prepare input for ingest_file workflow + ingest_input = { + "file_data": file_data, + "user": data.get("user"), + "metadata": updated_metadata, + "document_id": str(doc_id), + "version": new_version, + "ingestion_config": ( + ingestion_config.model_dump_json() + if ingestion_config + else None + ), + "size_in_bytes": file_size_in_bytes, + "is_update": True, + } + + # Spawn ingest_file workflow as a child workflow + child_result = ( + await context.aio.spawn_workflow( + "ingest-file-changed", + {"request": ingest_input}, + key=f"ingest_file_{doc_id}", + ) + ).result() + results.append(child_result) + + await asyncio.gather(*results) + + return None + + ingest_files_workflow = HatchetIngestFilesWorkflow(service) + update_files_workflow = HatchetUpdateFilesWorkflow(service) + return { + "ingest_files": ingest_files_workflow, + "update_files": update_files_workflow, + } diff --git a/py/core/main/orchestration/hatchet/kg_workflow.py b/py/core/main/orchestration/hatchet/kg_workflow.py new file mode 100644 index 000000000..ebb425f69 --- /dev/null +++ b/py/core/main/orchestration/hatchet/kg_workflow.py @@ -0,0 +1,261 @@ +import asyncio +import json +import logging +import math +import uuid + +from hatchet_sdk import Context + +from core import GenerationConfig +from core.base import OrchestrationProvider + +from ...services import KgService + +logger = logging.getLogger(__name__) +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from hatchet_sdk import Hatchet + + +def hatchet_kg_factory( + orchestration_provider: OrchestrationProvider, service: KgService +) -> dict[str, "Hatchet.Workflow"]: + + def get_input_data_dict(input_data): + for key, value in input_data.items(): + if key == "kg_creation_settings": + input_data[key] = json.loads(value) + input_data[key]["generation_config"] = GenerationConfig( + **input_data[key]["generation_config"] + ) + if key == "kg_enrichment_settings": + input_data[key] = json.loads(value) + + if key == "generation_config": + input_data[key] = GenerationConfig(**input_data[key]) + return input_data + + @orchestration_provider.workflow(name="kg-extract", timeout="360m") + class KGExtractDescribeEmbedWorkflow: + def __init__(self, kg_service: KgService): + self.kg_service = kg_service + + @orchestration_provider.step(retries=1, timeout="360m") + async def kg_extract(self, context: Context) -> dict: + + context.log( + f"Running KG Extraction for input: {context.workflow_input()['request']}" + ) + + input_data = get_input_data_dict( + context.workflow_input()["request"] + ) + + # context.log(f"Running KG Extraction for collection ID: {input_data['collection_id']}") + document_id = input_data["document_id"] + + await self.kg_service.kg_triples_extraction( + document_id=uuid.UUID(document_id), + logger=context.log, + **input_data["kg_creation_settings"], + ) + + return { + "result": f"successfully ran kg triples extraction for document {document_id}" + } + + @orchestration_provider.step( + retries=1, timeout="360m", parents=["kg_extract"] + ) + async def kg_entity_description(self, context: Context) -> dict: + + input_data = get_input_data_dict( + context.workflow_input()["request"] + ) + document_id = input_data["document_id"] + + await self.kg_service.kg_entity_description( + document_id=uuid.UUID(document_id), + **input_data["kg_creation_settings"], + ) + + return { + "result": f"successfully ran kg node description for document {document_id}" + } + + @orchestration_provider.workflow(name="create-graph", timeout="360m") + class CreateGraphWorkflow: + def __init__(self, kg_service: KgService): + self.kg_service = kg_service + + @orchestration_provider.step(retries=1) + async def get_document_ids_for_create_graph( + self, context: Context + ) -> dict: + + input_data = get_input_data_dict( + context.workflow_input()["request"] + ) + collection_id = input_data["collection_id"] + + return_val = { + "document_ids": [ + str(doc_id) + for doc_id in await self.kg_service.get_document_ids_for_create_graph( + collection_id=collection_id, + **input_data["kg_creation_settings"], + ) + ] + } + + if len(return_val["document_ids"]) == 0: + raise ValueError( + "No documents to process, either all documents to create the graph were already created or in progress, or the collection is empty." + ) + + return return_val + + @orchestration_provider.step( + retries=1, parents=["get_document_ids_for_create_graph"] + ) + async def kg_extraction_ingress(self, context: Context) -> dict: + + document_ids = [ + uuid.UUID(doc_id) + for doc_id in context.step_output( + "get_document_ids_for_create_graph" + )["document_ids"] + ] + results = [] + for cnt, document_id in enumerate(document_ids): + context.log( + f"Running Graph Creation Workflow for document ID: {document_id}" + ) + results.append( + ( + context.aio.spawn_workflow( + "kg-extract", + { + "request": { + "document_id": str(document_id), + "kg_creation_settings": context.workflow_input()[ + "request" + ][ + "kg_creation_settings" + ], + } + }, + key=f"kg-extract-{cnt}/{len(document_ids)}", + ) + ) + ) + + if not document_ids: + context.log( + "No documents to process, either all graphs were created or in progress, or no documents were provided. Skipping graph creation." + ) + return {"result": "No documents to process"} + + context.log(f"Ran {len(results)} workflows for graph creation") + results = await asyncio.gather(*results) + return { + "result": f"successfully ran graph creation workflows for {len(results)} documents" + } + + @orchestration_provider.workflow(name="enrich-graph", timeout="360m") + class EnrichGraphWorkflow: + def __init__(self, kg_service: KgService): + self.kg_service = kg_service + + @orchestration_provider.step(retries=1, parents=[], timeout="360m") + async def kg_clustering(self, context: Context) -> dict: + + logger.info("Running KG Clustering") + input_data = get_input_data_dict( + context.workflow_input()["request"] + ) + collection_id = input_data["collection_id"] + + kg_clustering_results = await self.kg_service.kg_clustering( + collection_id=collection_id, + **input_data["kg_enrichment_settings"], + ) + + context.log( + f"Successfully ran kg clustering for collection {collection_id}: {json.dumps(kg_clustering_results)}" + ) + logger.info( + f"Successfully ran kg clustering for collection {collection_id}: {json.dumps(kg_clustering_results)}" + ) + + return { + "result": f"successfully ran kg clustering for collection {collection_id}", + "kg_clustering": kg_clustering_results, + } + + @orchestration_provider.step(retries=1, parents=["kg_clustering"]) + async def kg_community_summary(self, context: Context) -> dict: + + input_data = get_input_data_dict( + context.workflow_input()["request"] + ) + collection_id = input_data["collection_id"] + num_communities = context.step_output("kg_clustering")[ + "kg_clustering" + ][0]["num_communities"] + + parallel_communities = min(100, num_communities) + total_workflows = math.ceil(num_communities / parallel_communities) + workflows = [] + for i, offset in enumerate( + range(0, num_communities, parallel_communities) + ): + workflows.append( + context.aio.spawn_workflow( + "kg-community-summary", + { + "request": { + "offset": offset, + "limit": parallel_communities, + "collection_id": collection_id, + **input_data["kg_enrichment_settings"], + } + }, + key=f"{i}/{total_workflows}_community_summary", + ) + ) + await asyncio.gather(*workflows) + return { + "result": f"Successfully spawned summary workflows for {num_communities} communities." + } + + @orchestration_provider.workflow( + name="kg-community-summary", timeout="360m" + ) + class KGCommunitySummaryWorkflow: + def __init__(self, kg_service: KgService): + self.kg_service = kg_service + + @orchestration_provider.step(retries=1, timeout="360m") + async def kg_community_summary(self, context: Context) -> dict: + input_data = get_input_data_dict( + context.workflow_input()["request"] + ) + + community_summary = await self.kg_service.kg_community_summary( + **input_data + ) + context.log( + f"Successfully ran kg community summary for communities {input_data['offset']} to {input_data['offset'] + len(community_summary)}" + ) + return { + "result": f"successfully ran kg community summary for communities {input_data['offset']} to {input_data['offset'] + len(community_summary)}" + } + + return { + "kg-extract": KGExtractDescribeEmbedWorkflow(service), + "create-graph": CreateGraphWorkflow(service), + "enrich-graph": EnrichGraphWorkflow(service), + "kg-community-summary": KGCommunitySummaryWorkflow(service), + } diff --git a/py/tests/core/parsers/__init__.py b/py/core/main/orchestration/simple/__init__.py similarity index 100% rename from py/tests/core/parsers/__init__.py rename to py/core/main/orchestration/simple/__init__.py diff --git a/py/core/main/orchestration/simple/ingestion_workflow.py b/py/core/main/orchestration/simple/ingestion_workflow.py new file mode 100644 index 000000000..e7f9a0956 --- /dev/null +++ b/py/core/main/orchestration/simple/ingestion_workflow.py @@ -0,0 +1,170 @@ +import asyncio +import logging + +from core.base import R2RException, generate_id_from_label, increment_version + +from ...services import IngestionService + +logger = logging.getLogger(__name__) + + +def simple_ingestion_factory(service: IngestionService): + async def ingest_files(input_data): + document_info = None + try: + from core.base import IngestionStatus + from core.main import IngestionServiceAdapter + + parsed_data = IngestionServiceAdapter.parse_ingest_file_input( + input_data + ) + is_update = parsed_data["is_update"] + + ingestion_result = await service.ingest_file_ingress(**parsed_data) + document_info = ingestion_result["info"] + + await service.update_document_status( + document_info, status=IngestionStatus.PARSING + ) + + ingestion_config = parsed_data["ingestion_config"] + extractions_generator = await service.parse_file( + document_info, ingestion_config + ) + extractions = [ + extraction.model_dump() + async for extraction in extractions_generator + ] + + await service.update_document_status( + document_info, status=IngestionStatus.EMBEDDING + ) + embedding_generator = await service.embed_document(extractions) + embeddings = [ + embedding.model_dump() + async for embedding in embedding_generator + ] + + await service.update_document_status( + document_info, status=IngestionStatus.STORING + ) + storage_generator = await service.store_embeddings(embeddings) + async for _ in storage_generator: + pass + + await service.finalize_ingestion( + document_info, is_update=is_update + ) + + await service.update_document_status( + document_info, status=IngestionStatus.SUCCESS + ) + + try: + collection_id = await service.providers.database.relational.assign_document_to_collection( + document_id=document_info.id, + collection_id=generate_id_from_label( + str(document_info.user_id) + ), + ) + service.providers.database.vector.assign_document_to_collection( + document_id=document_info.id, collection_id=collection_id + ) + except Exception as e: + logger.error( + f"Error during assigning document to collection: {str(e)}" + ) + + except Exception as e: + if document_info is not None: + await service.update_document_status( + document_info, status=IngestionStatus.FAILED + ) + raise R2RException( + status_code=500, message=f"Error during ingestion: {str(e)}" + ) + + async def update_files(input_data): + from core.base import IngestionStatus + from core.main import IngestionServiceAdapter + + parsed_data = IngestionServiceAdapter.parse_update_files_input( + input_data + ) + + file_datas = parsed_data["file_datas"] + user = parsed_data["user"] + document_ids = parsed_data["document_ids"] + metadatas = parsed_data["metadatas"] + ingestion_config = parsed_data["ingestion_config"] + file_sizes_in_bytes = parsed_data["file_sizes_in_bytes"] + + if not file_datas: + raise R2RException( + status_code=400, message="No files provided for update." + ) + if len(document_ids) != len(file_datas): + raise R2RException( + status_code=400, + message="Number of ids does not match number of files.", + ) + + documents_overview = ( + await service.providers.database.relational.get_documents_overview( + filter_document_ids=document_ids, + filter_user_ids=None if user.is_superuser else [user.id], + ) + )["results"] + + if len(documents_overview) != len(document_ids): + raise R2RException( + status_code=404, + message="One or more documents not found.", + ) + + results = [] + + for idx, ( + file_data, + doc_id, + doc_info, + file_size_in_bytes, + ) in enumerate( + zip( + file_datas, + document_ids, + documents_overview, + file_sizes_in_bytes, + ) + ): + new_version = increment_version(doc_info.version) + + updated_metadata = ( + metadatas[idx] if metadatas else doc_info.metadata + ) + updated_metadata["title"] = ( + updated_metadata.get("title") + or file_data["filename"].split("/")[-1] + ) + + ingest_input = { + "file_data": file_data, + "user": user.model_dump(), + "metadata": updated_metadata, + "document_id": str(doc_id), + "version": new_version, + "ingestion_config": ( + ingestion_config.model_dump_json() + if ingestion_config + else None + ), + "size_in_bytes": file_size_in_bytes, + "is_update": True, + } + + result = ingest_files(ingest_input) + results.append(result) + + await asyncio.gather(*results) + + return {"ingest-file-changed": ingest_files, "update-files": update_files} diff --git a/py/core/main/orchestration/simple/kg_workflow.py b/py/core/main/orchestration/simple/kg_workflow.py new file mode 100644 index 000000000..b5bca7d39 --- /dev/null +++ b/py/core/main/orchestration/simple/kg_workflow.py @@ -0,0 +1,101 @@ +import json +import logging +import math + +from core import GenerationConfig + +from ...services import KgService + +logger = logging.getLogger(__name__) + + +def simple_kg_factory(service: KgService): + + def get_input_data_dict(input_data): + for key, value in input_data.items(): + if key == "kg_creation_settings": + input_data[key] = json.loads(value) + input_data[key]["generation_config"] = GenerationConfig( + **input_data[key]["generation_config"] + ) + if key == "kg_enrichment_settings": + input_data[key] = json.loads(value) + input_data[key]["generation_config"] = GenerationConfig( + **input_data[key]["generation_config"] + ) + return input_data + + async def create_graph(input_data): + + input_data = get_input_data_dict(input_data) + + document_ids = await service.get_document_ids_for_create_graph( + collection_id=input_data["collection_id"], + **input_data["kg_creation_settings"], + ) + + logger.info( + f"Creating graph for {len(document_ids)} documents with IDs: {document_ids}" + ) + + for _, document_id in enumerate(document_ids): + # Extract triples from the document + await service.kg_triples_extraction( + document_id=document_id, + **input_data["kg_creation_settings"], + ) + # Describe the entities in the graph + await service.kg_entity_description( + document_id=document_id, + **input_data["kg_creation_settings"], + ) + + async def enrich_graph(input_data): + + input_data = get_input_data_dict(input_data) + + num_communities = await service.kg_clustering( + collection_id=input_data["collection_id"], + **input_data["kg_enrichment_settings"], + ) + num_communities = num_communities[0]["num_communities"] + # TODO - Do not hardcode the number of parallel communities, + # make it a configurable parameter at runtime & add server-side defaults + parallel_communities = min(100, num_communities) + + total_workflows = math.ceil(num_communities / parallel_communities) + for i in range(total_workflows): + input_data_copy = input_data.copy() + input_data_copy["offset"] = i * parallel_communities + input_data_copy["limit"] = min( + parallel_communities, + num_communities - i * parallel_communities, + ) + # running i'th workflow out of total_workflows + logger.info( + f"Running kg community summary for {i+1}'th workflow out of total {total_workflows} workflows" + ) + await kg_community_summary( + input_data=input_data_copy, + ) + + return {"result": "successfully ran kg community summary workflows"} + + async def kg_community_summary(input_data): + + logger.info( + f"Running kg community summary for offset: {input_data['offset']}, limit: {input_data['limit']}" + ) + + await service.kg_community_summary( + offset=input_data["offset"], + limit=input_data["limit"], + collection_id=input_data["collection_id"], + **input_data["kg_enrichment_settings"], + ) + + return { + "create-graph": create_graph, + "enrich-graph": enrich_graph, + "kg-community-summary": kg_community_summary, + } diff --git a/py/core/main/services/__init__.py b/py/core/main/services/__init__.py index 6b5795b43..a244835b3 100644 --- a/py/core/main/services/__init__.py +++ b/py/core/main/services/__init__.py @@ -1,7 +1,7 @@ from .auth_service import AuthService from .ingestion_service import IngestionService, IngestionServiceAdapter +from .kg_service import KgService from .management_service import ManagementService -from .restructure_service import RestructureService from .retrieval_service import RetrievalService __all__ = [ @@ -9,6 +9,6 @@ "IngestionService", "IngestionServiceAdapter", "ManagementService", - "RestructureService", + "KgService", "RetrievalService", ] diff --git a/py/core/main/services/auth_service.py b/py/core/main/services/auth_service.py index 57029bcbc..46d7093ed 100644 --- a/py/core/main/services/auth_service.py +++ b/py/core/main/services/auth_service.py @@ -40,7 +40,6 @@ async def register(self, email: str, password: str) -> dict[str, str]: async def verify_email( self, email: str, verification_code: str ) -> dict[str, str]: - if not self.config.auth.require_email_verification: raise R2RException( status_code=400, message="Email verification is not required" diff --git a/py/core/main/services/ingestion_service.py b/py/core/main/services/ingestion_service.py index f19c3be0f..48bfa9d79 100644 --- a/py/core/main/services/ingestion_service.py +++ b/py/core/main/services/ingestion_service.py @@ -7,9 +7,9 @@ from core.base import ( Document, DocumentExtraction, - DocumentFragment, DocumentInfo, DocumentType, + IngestionConfig, IngestionStatus, R2RException, RunLoggingSingleton, @@ -18,7 +18,6 @@ decrement_version, ) from core.base.api.models import UserResponse -from core.base.providers import ChunkingConfig from core.telemetry.telemetry_decorator import telemetry_event from ..abstractions import R2RAgents, R2RPipelines, R2RPipes, R2RProviders @@ -93,10 +92,11 @@ async def ingest_file_ingress( filter_user_ids=[user.id], filter_document_ids=[document_id], ) - ) - if documents := existing_document_info.get("documents", []): - existing_doc = documents[0] - if is_update: + )["results"] + + if len(existing_document_info) > 0: + existing_doc = existing_document_info[0] + if not is_update: if ( existing_doc.version >= version and existing_doc.ingestion_status @@ -104,13 +104,13 @@ async def ingest_file_ingress( ): raise R2RException( status_code=409, - message=f"Must increment version number before attempting to overwrite document {document_id}.", + message=f"Must increment version number before attempting to overwrite document {document_id}. Use the `update_files` endpoint if you are looking to update the existing version.", + ) + elif existing_doc.ingestion_status != IngestionStatus.FAILED: + raise R2RException( + status_code=409, + message=f"Document {document_id} was already ingested and is not in a failed state.", ) - elif existing_doc.ingestion_status != IngestionStatus.FAILURE: - raise R2RException( - status_code=409, - message=f"Document {document_id} was already ingested and is not in a failed state.", - ) await self.providers.database.relational.upsert_documents_overview( document_info @@ -154,9 +154,8 @@ def _create_document_info( ) async def parse_file( - self, - document_info: DocumentInfo, - ) -> AsyncGenerator[DocumentFragment, None]: + self, document_info: DocumentInfo, ingestion_config: dict + ) -> AsyncGenerator[DocumentExtraction, None]: return await self.pipes.parsing_pipe.run( input=self.pipes.parsing_pipe.Input( message=Document( @@ -172,24 +171,7 @@ async def parse_file( ), state=None, run_manager=self.run_manager, - ) - - async def chunk_document( - self, - parsed_documents: list[dict], - chunking_config: ChunkingConfig, - ) -> AsyncGenerator[DocumentFragment, None]: - - return await self.pipes.chunking_pipe.run( - input=self.pipes.chunking_pipe.Input( - message=[ - DocumentExtraction.from_dict(chunk) - for chunk in parsed_documents - ] - ), - state=None, - run_manager=self.run_manager, - chunking_config=chunking_config, + ingestion_config=ingestion_config, ) async def embed_document( @@ -199,7 +181,7 @@ async def embed_document( return await self.pipes.embedding_pipe.run( input=self.pipes.embedding_pipe.Input( message=[ - DocumentFragment.from_dict(chunk) + DocumentExtraction.from_dict(chunk) for chunk in chunked_documents ] ), @@ -289,7 +271,6 @@ def _parse_user_data(user_data) -> UserResponse: @staticmethod def parse_ingest_file_input(data: dict) -> dict: - print('data["chunking_config"] = ', data["chunking_config"]) return { "user": IngestionServiceAdapter._parse_user_data(data["user"]), "metadata": data["metadata"], @@ -297,11 +278,7 @@ def parse_ingest_file_input(data: dict) -> dict: UUID(data["document_id"]) if data["document_id"] else None ), "version": data.get("version"), - "chunking_config": ( - ChunkingConfig.from_dict(data["chunking_config"]) - if data["chunking_config"] - else None - ), + "ingestion_config": data["ingestion_config"] or {}, "is_update": data.get("is_update", False), "file_data": data["file_data"], "size_in_bytes": data["size_in_bytes"], @@ -313,11 +290,7 @@ def parse_update_files_input(data: dict) -> dict: "user": IngestionServiceAdapter._parse_user_data(data["user"]), "document_ids": [UUID(doc_id) for doc_id in data["document_ids"]], "metadatas": data["metadatas"], - "chunking_config": ( - ChunkingConfig.from_dict(data["chunking_config"]) - if data["chunking_config"] - else None - ), + "ingestion_config": data["ingestion_config"], "file_sizes_in_bytes": data["file_sizes_in_bytes"], "file_datas": data["file_datas"], } diff --git a/py/core/main/services/kg_service.py b/py/core/main/services/kg_service.py new file mode 100644 index 000000000..fbe1ebfc8 --- /dev/null +++ b/py/core/main/services/kg_service.py @@ -0,0 +1,213 @@ +import logging +import math +from typing import Any, AsyncGenerator, Optional +from uuid import UUID + +from core.base import KGCreationStatus, RunLoggingSingleton, RunManager +from core.base.abstractions import GenerationConfig +from core.telemetry.telemetry_decorator import telemetry_event + +from ..abstractions import R2RAgents, R2RPipelines, R2RPipes, R2RProviders +from ..config import R2RConfig +from .base import Service + +logger = logging.getLogger(__name__) + + +async def _collect_results(result_gen: AsyncGenerator) -> list[dict]: + results = [] + async for res in result_gen: + results.append(res.json() if hasattr(res, "json") else res) + return results + + +# TODO - Fix naming convention to read `KGService` instead of `KgService` +# this will require a minor change in how services are registered. +class KgService(Service): + def __init__( + self, + config: R2RConfig, + providers: R2RProviders, + pipes: R2RPipes, + pipelines: R2RPipelines, + agents: R2RAgents, + run_manager: RunManager, + logging_connection: RunLoggingSingleton, + ): + super().__init__( + config, + providers, + pipes, + pipelines, + agents, + run_manager, + logging_connection, + ) + + @telemetry_event("kg_triples_extraction") + async def kg_triples_extraction( + self, + document_id: UUID, + generation_config: GenerationConfig, + extraction_merge_count: int, + max_knowledge_triples: int, + entity_types: list[str], + relation_types: list[str], + **kwargs, + ): + try: + + logger.info(f"Processing document {document_id} for KG extraction") + + await self.providers.database.relational.set_workflow_status( + id=document_id, + status_type="kg_extraction_status", + status=KGCreationStatus.PROCESSING, + ) + + triples = await self.pipes.kg_triples_extraction_pipe.run( + input=self.pipes.kg_triples_extraction_pipe.Input( + message={ + "document_id": document_id, + "generation_config": generation_config, + "extraction_merge_count": extraction_merge_count, + "max_knowledge_triples": max_knowledge_triples, + "entity_types": entity_types, + "relation_types": relation_types, + } + ), + state=None, + run_manager=self.run_manager, + ) + + result_gen = await self.pipes.kg_storage_pipe.run( + input=self.pipes.kg_storage_pipe.Input(message=triples), + state=None, + run_manager=self.run_manager, + ) + + await self.providers.database.relational.set_workflow_status( + id=document_id, + status_type="kg_extraction_status", + status=KGCreationStatus.SUCCESS, + ) + + except Exception as e: + logger.error(f"Error in kg_extraction: {e}") + await self.providers.database.relational.set_workflow_status( + id=document_id, + status_type="kg_extraction_status", + status=KGCreationStatus.FAILED, + ) + + return await _collect_results(result_gen) + + @telemetry_event("get_document_ids_for_create_graph") + async def get_document_ids_for_create_graph( + self, + collection_id: UUID, + force_kg_creation: bool, + **kwargs, + ): + + document_status_filter = [ + KGCreationStatus.PENDING, + KGCreationStatus.FAILED, + ] + if force_kg_creation: + document_status_filter += [ + KGCreationStatus.SUCCESS, + KGCreationStatus.PROCESSING, + ] + + document_ids = await self.providers.database.relational.get_document_ids_by_status( + status_type="kg_extraction_status", + status=document_status_filter, + collection_id=collection_id, + ) + + return document_ids + + @telemetry_event("kg_entity_description") + async def kg_entity_description( + self, + document_id: UUID, + max_description_input_length: int, + **kwargs, + ): + + entity_count = await self.providers.kg.get_entity_count(document_id) + + # TODO - Do not hardcode the batch size, + # make it a configurable parameter at runtime & server-side defaults + + # process 256 entities at a time + num_batches = math.ceil(entity_count / 256) + all_results = [] + for i in range(num_batches): + logger.info( + f"Running kg_entity_description for batch {i+1}/{num_batches} for document {document_id}" + ) + + node_descriptions = await self.pipes.kg_entity_description_pipe.run( + input=self.pipes.kg_entity_description_pipe.Input( + message={ + "offset": i * 256, + "limit": 256, + "max_description_input_length": max_description_input_length, + "document_id": document_id, + } + ), + state=None, + run_manager=self.run_manager, + ) + + all_results.append(await _collect_results(node_descriptions)) + + return all_results + + @telemetry_event("kg_clustering") + async def kg_clustering( + self, + collection_id: UUID, + generation_config: GenerationConfig, + leiden_params: dict, + **kwargs, + ): + clustering_result = await self.pipes.kg_clustering_pipe.run( + input=self.pipes.kg_clustering_pipe.Input( + message={ + "collection_id": collection_id, + "generation_config": generation_config, + "leiden_params": leiden_params, + } + ), + state=None, + run_manager=self.run_manager, + ) + return await _collect_results(clustering_result) + + @telemetry_event("kg_community_summary") + async def kg_community_summary( + self, + offset: int, + limit: int, + max_summary_input_length: int, + generation_config: GenerationConfig, + collection_id: UUID, + **kwargs, + ): + summary_results = await self.pipes.kg_community_summary_pipe.run( + input=self.pipes.kg_community_summary_pipe.Input( + message={ + "offset": offset, + "limit": limit, + "generation_config": generation_config, + "max_summary_input_length": max_summary_input_length, + "collection_id": collection_id, + } + ), + state=None, + run_manager=self.run_manager, + ) + return await _collect_results(summary_results) diff --git a/py/core/main/services/management_service.py b/py/core/main/services/management_service.py index f8aa19255..f062e67b8 100644 --- a/py/core/main/services/management_service.py +++ b/py/core/main/services/management_service.py @@ -310,9 +310,11 @@ async def delete( ] try: - documents_overview = await self.providers.database.relational.get_documents_overview( - **relational_filters - ) + documents_overview = ( + await self.providers.database.relational.get_documents_overview( + **relational_filters + ) + )["results"] except Exception as e: logger.error( f"Error fetching documents from relational database: {e}" @@ -320,9 +322,7 @@ async def delete( documents_overview = [] if documents_overview: - document_ids_to_purge.update( - doc.id for doc in documents_overview["results"] - ) + document_ids_to_purge.update(doc.id for doc in documents_overview) if not document_ids_to_purge: raise R2RException( @@ -384,97 +384,10 @@ async def document_chunks( document_id, offset=offset, limit=limit ) - @telemetry_event("InspectKnowledgeGraph") - async def inspect_knowledge_graph( - self, - offset: int = 0, - limit=1000, - print_descriptions: bool = False, - *args: Any, - **kwargs: Any, - ): - if self.providers.kg is None: - raise R2RException( - status_code=404, message="Knowledge Graph provider not found." - ) - - rel_query = f""" - MATCH (n1)-[r]->(n2) - return n1.name AS subject, n1.description AS subject_description, n2.name AS object, n2.description AS object_description, type(r) AS relation, r.description AS relation_description - SKIP {offset} - LIMIT {limit} - """ - - try: - neo4j_results = self.providers.kg.structured_query( - rel_query - ).records - - relationships_raw = [ - { - "subject": { - "name": record["subject"], - "description": record["subject_description"], - }, - "relation": { - "name": record["relation"], - "description": record["relation_description"], - }, - "object": { - "name": record["object"], - "description": record["object_description"], - }, - } - for record in neo4j_results - ] - - descriptions_dict = {} - relationships = [] - - for relationship in relationships_raw: - if print_descriptions: - descriptions_dict[relationship["subject"]["name"]] = ( - relationship["subject"]["description"] - ) - descriptions_dict[relationship["object"]["name"]] = ( - relationship["object"]["description"] - ) - - relationships.append( - ( - relationship["subject"]["name"], - relationship["relation"]["name"], - relationship["object"]["name"], - ) - ) - - # Create graph representation and group relationships - graph, grouped_relationships = self._process_relationships( - relationships - ) - - # Generate output - output = self.generate_output( - grouped_relationships, - graph, - descriptions_dict, - print_descriptions, - ) - - return "\n".join(output) - - except Exception as e: - logger.error("Error printing relationships", exc_info=True) - raise R2RException( - status_code=500, - message=f"An error occurred while fetching relationships: {str(e)}", - ) - @telemetry_event("AssignDocumentToCollection") async def assign_document_to_collection( self, document_id: str, collection_id: UUID ): - await self.providers.database.relational.assign_document_to_collection( document_id, collection_id ) diff --git a/py/core/main/services/restructure_service.py b/py/core/main/services/restructure_service.py deleted file mode 100644 index e38f912f2..000000000 --- a/py/core/main/services/restructure_service.py +++ /dev/null @@ -1,131 +0,0 @@ -import logging -from typing import AsyncGenerator -from uuid import UUID - -from core.base import RunLoggingSingleton, RunManager -from core.base.abstractions import GenerationConfig -from core.telemetry.telemetry_decorator import telemetry_event - -from ..abstractions import R2RAgents, R2RPipelines, R2RPipes, R2RProviders -from ..config import R2RConfig -from .base import Service - -logger = logging.getLogger(__name__) - - -async def _collect_results(result_gen: AsyncGenerator) -> list[dict]: - results = [] - async for res in result_gen: - results.append(res.json() if hasattr(res, "json") else res) - return results - - -class RestructureService(Service): - def __init__( - self, - config: R2RConfig, - providers: R2RProviders, - pipes: R2RPipes, - pipelines: R2RPipelines, - agents: R2RAgents, - run_manager: RunManager, - logging_connection: RunLoggingSingleton, - ): - super().__init__( - config, - providers, - pipes, - pipelines, - agents, - run_manager, - logging_connection, - ) - - @telemetry_event("kg_extract_and_store") - async def kg_extract_and_store( - self, - document_id: UUID, - generation_config: GenerationConfig, - fragment_merge_count: int, - max_knowledge_triples: int, - entity_types: list[str], - relation_types: list[str], - ): - triples = await self.pipes.kg_extraction_pipe.run( - input=self.pipes.kg_extraction_pipe.Input( - message={ - "document_id": document_id, - "generation_config": generation_config, - "fragment_merge_count": fragment_merge_count, - "max_knowledge_triples": max_knowledge_triples, - "entity_types": entity_types, - "relation_types": relation_types, - } - ), - state=None, - run_manager=self.run_manager, - ) - - result_gen = await self.pipes.kg_storage_pipe.run( - input=self.pipes.kg_storage_pipe.Input(message=triples), - state=None, - run_manager=self.run_manager, - ) - - return await _collect_results(result_gen) - - @telemetry_event("kg_node_creation") - async def kg_node_creation(self, max_description_input_length: int): - node_extractions = await self.pipes.kg_node_extraction_pipe.run( - input=self.pipes.kg_node_extraction_pipe.Input(message=None), - state=None, - run_manager=self.run_manager, - ) - result_gen = await self.pipes.kg_node_description_pipe.run( - input=self.pipes.kg_node_description_pipe.Input( - message={ - "node_extractions": node_extractions, - "max_description_input_length": max_description_input_length, - } - ), - state=None, - run_manager=self.run_manager, - ) - return await _collect_results(result_gen) - - @telemetry_event("kg_clustering") - async def kg_clustering(self, leiden_params, generation_config): - clustering_result = await self.pipes.kg_clustering_pipe.run( - input=self.pipes.kg_clustering_pipe.Input( - message={ - "leiden_params": leiden_params, - "generation_config": generation_config, - } - ), - state=None, - run_manager=self.run_manager, - ) - - return await _collect_results(clustering_result) - - @telemetry_event("kg_community_summary") - async def kg_community_summary( - self, - community_id: str, - level: int, - max_summary_input_length: int, - generation_config: GenerationConfig, - ): - summary_results = await self.pipes.kg_community_summary_pipe.run( - input=self.pipes.kg_community_summary_pipe.Input( - message={ - "community_id": community_id, - "level": level, - "generation_config": generation_config, - "max_summary_input_length": max_summary_input_length, - } - ), - state=None, - run_manager=self.run_manager, - ) - return await _collect_results(summary_results) diff --git a/py/core/parsers/__init__.py b/py/core/parsers/__init__.py index 228f7085f..0439c320b 100644 --- a/py/core/parsers/__init__.py +++ b/py/core/parsers/__init__.py @@ -7,7 +7,6 @@ "AudioParser", "DOCXParser", "ImageParser", - "MovieParser", "PDFParser", "PDFParserUnstructured", "PDFParserMarker", diff --git a/py/core/parsers/media/__init__.py b/py/core/parsers/media/__init__.py index a56ec64a8..075276e33 100644 --- a/py/core/parsers/media/__init__.py +++ b/py/core/parsers/media/__init__.py @@ -1,7 +1,6 @@ from .audio_parser import AudioParser from .docx_parser import DOCXParser from .img_parser import ImageParser -from .movie_parser import MovieParser from .pdf_parser import ( # type: ignore PDFParser, PDFParserMarker, @@ -13,7 +12,6 @@ "AudioParser", "DOCXParser", "ImageParser", - "MovieParser", "PDFParser", "PDFParserUnstructured", "PDFParserMarker", diff --git a/py/core/parsers/media/movie_parser.py b/py/core/parsers/media/movie_parser.py deleted file mode 100644 index b34df4cc5..000000000 --- a/py/core/parsers/media/movie_parser.py +++ /dev/null @@ -1,114 +0,0 @@ -import base64 -import os -from typing import AsyncGenerator - -from core.base.parsers.base_parser import AsyncParser -from core.parsers.media.openai_helpers import ( - process_audio_with_openai, - process_frame_with_openai, -) - - -class MovieParser(AsyncParser): - """A parser for movie data.""" - - def __init__( - self, - model: str = "gpt-4o", - max_tokens: int = 2048, - seconds_per_frame: int = 2, - max_frames: int = 10, - ): - try: - import cv2 - - self.cv2 = cv2 - except ImportError: - raise ValueError( - "Error, `opencv-python` is required to run `MovieParser`. Please install it using `pip install opencv-python`." - ) - try: - import moviepy.editor as mp - - self.mp = mp - except ImportError: - raise ValueError( - "Error, `moviepy` is required to run `MovieParser`. Please install it using `pip install moviepy`." - ) - - self.model = model - self.max_tokens = max_tokens - self.seconds_per_frame = seconds_per_frame - self.max_frames = max_frames - self.openai_api_key = os.environ.get("OPENAI_API_KEY") - if not self.openai_api_key: - raise ValueError( - "Error, environment variable `OPENAI_API_KEY` is required to run `MovieParser`." - ) - - async def ingest( # type: ignore - self, data: bytes, chunk_size: int = 1024 - ) -> AsyncGenerator[str, None]: - """Ingest movie data and yield a description.""" - temp_video_path = "temp_movie.mp4" - with open(temp_video_path, "wb") as f: - f.write(data) - try: - raw_frames, audio_file = self.process_video(temp_video_path) - for frame in raw_frames: - frame_text = process_frame_with_openai( - frame, self.openai_api_key # type: ignore - ) - yield frame_text - - if audio_file: - transcription_text = process_audio_with_openai( - audio_file, self.openai_api_key # type: ignore - ) - # split text into small chunks and yield them - for i in range(0, len(transcription_text), chunk_size): - text = transcription_text[i : i + chunk_size] - if text and text != "": - yield text - finally: - os.remove(temp_video_path) - - def process_video(self, video_path): - base64Frames = [] - base_video_path, _ = os.path.splitext(video_path) - - video = self.cv2.VideoCapture(video_path) - total_frames = int(video.get(self.cv2.CAP_PROP_FRAME_COUNT)) - fps = video.get(self.cv2.CAP_PROP_FPS) - frames_to_skip = int(fps * self.seconds_per_frame) - curr_frame = 0 - - # Calculate frames to skip based on max_frames if it is set - if self.max_frames and self.max_frames < total_frames / frames_to_skip: - frames_to_skip = max(total_frames // self.max_frames, 1) - - frame_count = 0 - while curr_frame < total_frames - 1 and ( - not self.max_frames or frame_count < self.max_frames - ): - video.set(self.cv2.CAP_PROP_POS_FRAMES, curr_frame) - success, frame = video.read() - if not success: - break - _, buffer = self.cv2.imencode(".jpg", frame) - base64Frames.append(base64.b64encode(buffer).decode("utf-8")) - curr_frame += frames_to_skip - frame_count += 1 - video.release() - - audio_path = f"{base_video_path}.wav" - audio_file = None - with self.mp.VideoFileClip(video_path) as clip: - if clip.audio is not None: - clip.audio.write_audiofile( - audio_path, codec="pcm_s16le", fps=16000 - ) - audio_file = open(audio_path, "rb") - os.remove(audio_path) - - return base64Frames, audio_file diff --git a/py/core/parsers/media/pdf_parser.py b/py/core/parsers/media/pdf_parser.py index e79eca1a1..9f3e7ad27 100644 --- a/py/core/parsers/media/pdf_parser.py +++ b/py/core/parsers/media/pdf_parser.py @@ -129,7 +129,6 @@ async def ingest( partition_strategy: str = "hi_res", chunking_strategy="by_title", ) -> AsyncGenerator[str, None]: - # partition the pdf elements = self.partition_pdf( file=BytesIO(data), diff --git a/py/core/parsers/structured/csv_parser.py b/py/core/parsers/structured/csv_parser.py index 2a075eda8..41d2ec980 100644 --- a/py/core/parsers/structured/csv_parser.py +++ b/py/core/parsers/structured/csv_parser.py @@ -39,7 +39,6 @@ def __init__(self): def get_delimiter( self, file_path: Optional[str] = None, file: Optional[IO[bytes]] = None ): - sniffer = self.csv.Sniffer() num_bytes = 65536 diff --git a/py/core/parsers/structured/json_parser.py b/py/core/parsers/structured/json_parser.py index f2e18e81d..003061d5c 100644 --- a/py/core/parsers/structured/json_parser.py +++ b/py/core/parsers/structured/json_parser.py @@ -1,6 +1,6 @@ # type: ignore import json -from typing import AsyncGenerator +from typing import Any, AsyncGenerator from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser @@ -9,11 +9,28 @@ class JSONParser(AsyncParser[DataType]): """A parser for JSON data.""" - async def ingest(self, data: DataType) -> AsyncGenerator[str, None]: - """Ingest JSON data and yield a formatted text representation.""" + async def ingest( + self, data: DataType, **kwargs: Any + ) -> AsyncGenerator[str, None]: + """ + Ingest JSON data and yield a formatted text representation. + + :param data: The JSON data to parse. + :param kwargs: Additional keyword arguments. + """ if isinstance(data, bytes): data = data.decode("utf-8") - yield self._parse_json(json.loads(data)) + parsed_json = json.loads(data) + formatted_text = self._parse_json(parsed_json) + + chunk_size = kwargs.get("chunk_size") + if chunk_size and isinstance(chunk_size, int): + # If chunk_size is provided and is an integer, yield the formatted text in chunks + for i in range(0, len(formatted_text), chunk_size): + yield formatted_text[i : i + chunk_size] + else: + # If no valid chunk_size is provided, yield the entire formatted text + yield formatted_text def _parse_json(self, data: dict) -> str: def remove_objects_with_null(obj): diff --git a/py/core/parsers/structured/xlsx_parser.py b/py/core/parsers/structured/xlsx_parser.py index 502370708..33d2c4aa2 100644 --- a/py/core/parsers/structured/xlsx_parser.py +++ b/py/core/parsers/structured/xlsx_parser.py @@ -76,7 +76,6 @@ async def ingest( [[cell.value for cell in row] for row in ws.iter_rows()] ) for table in self.connected_components(ws_data): - # parse like a csv parser, assumes that the first row has column names if len(table) <= 1: continue diff --git a/py/core/parsers/text/html_parser.py b/py/core/parsers/text/html_parser.py index 61b59cf20..c2e893120 100644 --- a/py/core/parsers/text/html_parser.py +++ b/py/core/parsers/text/html_parser.py @@ -10,7 +10,9 @@ class HTMLParser(AsyncParser[DataType]): """A parser for HTML data.""" - async def ingest(self, data: DataType) -> AsyncGenerator[str, None]: + async def ingest( + self, data: DataType, *args, **kwargs + ) -> AsyncGenerator[str, None]: """Ingest HTML data and yield text.""" soup = BeautifulSoup(data, "html.parser") yield soup.get_text() diff --git a/py/core/pipelines/search_pipeline.py b/py/core/pipelines/search_pipeline.py index eb5f9adef..b6c14b0d7 100644 --- a/py/core/pipelines/search_pipeline.py +++ b/py/core/pipelines/search_pipeline.py @@ -116,14 +116,14 @@ def add_pipe( self, pipe: AsyncPipe, add_upstream_outputs: Optional[list[dict[str, str]]] = None, - kg_extraction_pipe: bool = False, + kg_triples_extraction_pipe: bool = False, vector_search_pipe: bool = False, *args, **kwargs, ) -> None: logger.debug(f"Adding pipe {pipe.config.name} to the SearchPipeline") - if kg_extraction_pipe: + if kg_triples_extraction_pipe: if not self._kg_search_pipeline: self._kg_search_pipeline = AsyncPipeline() if not self._kg_search_pipeline: diff --git a/py/core/pipes/__init__.py b/py/core/pipes/__init__.py index 88bd1149e..510879e05 100644 --- a/py/core/pipes/__init__.py +++ b/py/core/pipes/__init__.py @@ -1,14 +1,13 @@ from .abstractions.generator_pipe import GeneratorPipe from .abstractions.search_pipe import SearchPipe -from .ingestion.chunking_pipe import ChunkingPipe from .ingestion.embedding_pipe import EmbeddingPipe from .ingestion.parsing_pipe import ParsingPipe from .ingestion.vector_storage_pipe import VectorStoragePipe from .kg.clustering import KGClusteringPipe from .kg.community_summary import KGCommunitySummaryPipe -from .kg.extraction import KGTriplesExtractionPipe -from .kg.node_extraction import KGNodeDescriptionPipe, KGNodeExtractionPipe +from .kg.entity_description import KGEntityDescriptionPipe from .kg.storage import KGStoragePipe +from .kg.triples_extraction import KGTriplesExtractionPipe from .retrieval.kg_search_pipe import KGSearchSearchPipe from .retrieval.multi_search import MultiSearchPipe from .retrieval.query_transform_pipe import QueryTransformPipe @@ -22,11 +21,9 @@ "GeneratorPipe", "EmbeddingPipe", "KGTriplesExtractionPipe", - "KGNodeExtractionPipe", "KGSearchSearchPipe", - "KGNodeDescriptionPipe", + "KGEntityDescriptionPipe", "ParsingPipe", - "ChunkingPipe", "QueryTransformPipe", "SearchRAGPipe", "StreamingSearchRAGPipe", diff --git a/py/core/pipes/ingestion/chunking_pipe.py b/py/core/pipes/ingestion/chunking_pipe.py deleted file mode 100644 index f5777d988..000000000 --- a/py/core/pipes/ingestion/chunking_pipe.py +++ /dev/null @@ -1,78 +0,0 @@ -import logging -from typing import Any, AsyncGenerator, Optional -from uuid import UUID - -from core.base import ( - AsyncState, - ChunkingProvider, - DocumentExtraction, - DocumentFragment, - PipeType, - R2RChunkingConfig, - RunLoggingSingleton, - generate_id_from_label, -) -from core.base.pipes.base_pipe import AsyncPipe -from core.providers import R2RChunkingProvider - -logger = logging.getLogger(__name__) - - -class ChunkingPipe(AsyncPipe[DocumentFragment]): - class Input(AsyncPipe.Input): - message: list[DocumentExtraction] - - def __init__( - self, - chunking_provider: ChunkingProvider, - config: AsyncPipe.PipeConfig, - pipe_logger: Optional[RunLoggingSingleton] = None, - type: PipeType = PipeType.INGESTOR, - *args, - **kwargs, - ): - super().__init__( - config, - type, - pipe_logger, - *args, - **kwargs, - ) - self.default_chunking_provider = ( - chunking_provider or R2RChunkingProvider(R2RChunkingConfig()) - ) - - async def _run_logic( # type: ignore - self, - input: AsyncPipe.Input, - state: AsyncState, - run_id: UUID, - *args: Any, - **kwargs: Any, - ) -> AsyncGenerator[DocumentFragment, None]: - - chunking_provider = ( - kwargs.get("chunking_provider", None) - or self.default_chunking_provider - ) - - unstr_iteration = 0 # unstructured already chunks - for item in input.message: - iteration = 0 - async for chunk in chunking_provider.chunk(item): # type: ignore - if item.metadata.get("partitioned_by_unstructured", False): - item.metadata["chunk_order"] = unstr_iteration - unstr_iteration += 1 - else: - item.metadata["chunk_order"] = iteration - iteration += 1 - - yield DocumentFragment( - id=generate_id_from_label(f"{item.id}-{iteration}"), - extraction_id=item.id, - document_id=item.document_id, - user_id=item.user_id, - collection_ids=item.collection_ids, - data=chunk, - metadata=item.metadata, - ) diff --git a/py/core/pipes/ingestion/embedding_pipe.py b/py/core/pipes/ingestion/embedding_pipe.py index 03a543608..9ba5c113d 100644 --- a/py/core/pipes/ingestion/embedding_pipe.py +++ b/py/core/pipes/ingestion/embedding_pipe.py @@ -4,7 +4,7 @@ from core.base import ( AsyncState, - DocumentFragment, + DocumentExtraction, EmbeddingProvider, PipeType, R2RDocumentProcessingError, @@ -19,11 +19,11 @@ class EmbeddingPipe(AsyncPipe[VectorEntry]): """ - Embeds fragments using a specified embedding model. + Embeds extractions using a specified embedding model. """ class Input(AsyncPipe.Input): - message: list[DocumentFragment] + message: list[DocumentExtraction] def __init__( self, @@ -43,30 +43,31 @@ def __init__( self.embedding_provider = embedding_provider self.embedding_batch_size = embedding_batch_size - async def embed(self, fragments: list[DocumentFragment]) -> list[float]: + async def embed( + self, extractions: list[DocumentExtraction] + ) -> list[float]: return await self.embedding_provider.async_get_embeddings( - [fragment.data for fragment in fragments], # type: ignore + [extraction.data for extraction in extractions], # type: ignore EmbeddingProvider.PipeStage.BASE, ) async def _process_batch( - self, fragment_batch: list[DocumentFragment] + self, extraction_batch: list[DocumentExtraction] ) -> list[VectorEntry]: - vectors = await self.embed(fragment_batch) + vectors = await self.embed(extraction_batch) return [ VectorEntry( - fragment_id=fragment.id, - extraction_id=fragment.extraction_id, - document_id=fragment.document_id, - user_id=fragment.user_id, - collection_ids=fragment.collection_ids, + extraction_id=extraction.id, + document_id=extraction.document_id, + user_id=extraction.user_id, + collection_ids=extraction.collection_ids, vector=Vector(data=raw_vector), - text=fragment.data, # type: ignore + text=extraction.data, # type: ignore metadata={ - **fragment.metadata, + **extraction.metadata, }, ) - for raw_vector, fragment in zip(vectors, fragment_batch) + for raw_vector, extraction in zip(vectors, extraction_batch) ] async def _run_logic( # type: ignore @@ -77,12 +78,11 @@ async def _run_logic( # type: ignore *args: Any, **kwargs: Any, ) -> AsyncGenerator[VectorEntry, None]: - if not isinstance(input, EmbeddingPipe.Input): raise ValueError( f"Invalid input type for embedding pipe: {type(input)}" ) - fragment_batch = [] + extraction_batch = [] batch_size = self.embedding_batch_size concurrent_limit = ( self.embedding_provider.config.concurrent_request_limit @@ -94,13 +94,13 @@ async def process_batch(batch): try: for item in input.message: - fragment_batch.append(item) + extraction_batch.append(item) - if len(fragment_batch) >= batch_size: + if len(extraction_batch) >= batch_size: tasks.add( - asyncio.create_task(process_batch(fragment_batch)) + asyncio.create_task(process_batch(extraction_batch)) ) - fragment_batch = [] + extraction_batch = [] while len(tasks) >= concurrent_limit: done, tasks = await asyncio.wait( @@ -110,8 +110,8 @@ async def process_batch(batch): for vector_entry in await task: yield vector_entry - if fragment_batch: - tasks.add(asyncio.create_task(process_batch(fragment_batch))) + if extraction_batch: + tasks.add(asyncio.create_task(process_batch(extraction_batch))) for future_task in asyncio.as_completed(tasks): for vector_entry in await future_task: @@ -132,33 +132,32 @@ async def process_batch(batch): await self.log_queue.get() self.log_queue.task_done() - async def _process_fragment( - self, fragment: DocumentFragment + async def _process_extraction( + self, extraction: DocumentExtraction ) -> Union[VectorEntry, R2RDocumentProcessingError]: try: - if isinstance(fragment.data, bytes): + if isinstance(extraction.data, bytes): raise ValueError( - "Fragment data is in bytes format, which is not supported by the embedding provider." + "extraction data is in bytes format, which is not supported by the embedding provider." ) vectors = await self.embedding_provider.async_get_embeddings( - [fragment.data], + [extraction.data], EmbeddingProvider.PipeStage.BASE, ) return VectorEntry( - fragment_id=fragment.id, - extraction_id=fragment.extraction_id, - document_id=fragment.document_id, - user_id=fragment.user_id, - collection_ids=fragment.collection_ids, + extraction_id=extraction.id, + document_id=extraction.document_id, + user_id=extraction.user_id, + collection_ids=extraction.collection_ids, vector=Vector(data=vectors[0]), - text=fragment.data, - metadata={**fragment.metadata}, + text=extraction.data, + metadata={**extraction.metadata}, ) except Exception as e: - logger.error(f"Error processing fragment: {e}") + logger.error(f"Error processing extraction: {e}") return R2RDocumentProcessingError( error_message=str(e), - document_id=fragment.document_id, + document_id=extraction.document_id, ) diff --git a/py/core/pipes/ingestion/parsing_pipe.py b/py/core/pipes/ingestion/parsing_pipe.py index 539186c11..75f8d3ab8 100644 --- a/py/core/pipes/ingestion/parsing_pipe.py +++ b/py/core/pipes/ingestion/parsing_pipe.py @@ -7,13 +7,14 @@ Document, DocumentExtraction, FileProvider, - ParsingProvider, + IngestionConfig, PipeType, RunLoggingSingleton, generate_id_from_label, ) from core.base.abstractions import R2RDocumentProcessingError from core.base.pipes.base_pipe import AsyncPipe +from core.base.providers.ingestion import IngestionProvider logger = logging.getLogger(__name__) @@ -24,7 +25,7 @@ class Input(AsyncPipe.Input): def __init__( self, - parsing_provider: ParsingProvider, + ingestion_provider: IngestionProvider, file_provider: FileProvider, config: AsyncPipe.PipeConfig, type: PipeType = PipeType.INGESTOR, @@ -39,7 +40,7 @@ def __init__( *args, **kwargs, ) - self.parsing_provider = parsing_provider + self.ingestion_provider = ingestion_provider self.file_provider = file_provider async def _parse( @@ -47,21 +48,30 @@ async def _parse( document: Document, run_id: UUID, version: str, + ingestion_config_override: Optional[dict], ) -> AsyncGenerator[DocumentExtraction, None]: try: + ingestion_config_override = ingestion_config_override or {} + override_provider = ingestion_config_override.pop("provider", None) + if ( + override_provider + and override_provider + != self.ingestion_provider.config.provider + ): + raise ValueError( + f"Provider '{override_provider}' does not match ingestion provider '{self.ingestion_provider.config.provider}'." + ) if result := await self.file_provider.retrieve_file(document.id): file_name, file_wrapper, file_size = result with file_wrapper as file_content_stream: file_content = file_content_stream.read() - async for extraction in self.parsing_provider.parse( # type: ignore - file_content, document + async for extraction in self.ingestion_provider.parse( # type: ignore + file_content, document, ingestion_config_override ): - extraction_id = generate_id_from_label( - f"{extraction.id}-{version}" - ) - extraction.id = extraction_id + id = generate_id_from_label(f"{extraction.id}-{version}") + extraction.id = id extraction.metadata["version"] = version yield extraction except Exception as e: @@ -78,7 +88,12 @@ async def _run_logic( # type: ignore *args, **kwargs, ) -> AsyncGenerator[DocumentExtraction, None]: + ingestion_config = kwargs.get("ingestion_config") + async for result in self._parse( - input.message, run_id, input.message.metadata.get("version", "v0") + input.message, + run_id, + input.message.metadata.get("version", "v0"), + ingestion_config_override=ingestion_config, ): yield result diff --git a/py/core/pipes/ingestion/vector_storage_pipe.py b/py/core/pipes/ingestion/vector_storage_pipe.py index b7c8ead03..09706401f 100644 --- a/py/core/pipes/ingestion/vector_storage_pipe.py +++ b/py/core/pipes/ingestion/vector_storage_pipe.py @@ -91,7 +91,7 @@ async def _run_logic( # type: ignore for document_id, count in document_counts.items(): logger.info( - f"Successful ingestion for document_id: {document_id}, with vector: {count}" + f"Successful ingestion for document_id: {document_id}, with vector count: {count}" ) yield StorageResult( document_id=document_id, num_chunks=count, success=True diff --git a/py/core/pipes/kg/clustering.py b/py/core/pipes/kg/clustering.py index 9aa8ad312..65dba50e7 100644 --- a/py/core/pipes/kg/clustering.py +++ b/py/core/pipes/kg/clustering.py @@ -7,7 +7,6 @@ AsyncState, CompletionProvider, EmbeddingProvider, - GenerationConfig, KGProvider, PipeType, PromptProvider, @@ -49,25 +48,24 @@ def __init__( async def cluster_kg( self, + collection_id: UUID, leiden_params: dict, - generation_config: GenerationConfig, ): """ - Clusters the knowledge graph triples into communities using hierarchical Leiden algorithm. Uses neo4j's graph data science library. + Clusters the knowledge graph triples into communities using hierarchical Leiden algorithm. Uses graspologic library. """ - num_communities, num_hierarchies, intermediate_communities = ( - self.kg_provider.perform_graph_clustering(leiden_params) # type: ignore - ) + num_communities = await self.kg_provider.perform_graph_clustering( + collection_id, + leiden_params, + ) # type: ignore logger.info( - f"Clustering completed. Generated {num_communities} communities with {num_hierarchies} hierarchies with intermediate communities: {intermediate_communities}." + f"Clustering completed. Generated {num_communities} communities." ) return { "num_communities": num_communities, - "num_hierarchies": num_hierarchies, - "intermediate_communities": intermediate_communities, } async def _run_logic( # type: ignore @@ -82,18 +80,7 @@ async def _run_logic( # type: ignore Executes the KG clustering pipe: clustering entities and triples into communities. """ + collection_id = input.message["collection_id"] leiden_params = input.message["leiden_params"] - if not leiden_params: - raise ValueError("Leiden parameters not provided.") - generation_config = input.message["generation_config"] - if not generation_config: - raise ValueError("Generation config not provided.") - - base_dimension = self.embedding_provider.config.base_dimension - vector_index_fn = self.kg_provider.create_vector_index - vector_index_fn("__ENTITY__", "name_embedding", base_dimension) - vector_index_fn("__ENTITY__", "description_embedding", base_dimension) - vector_index_fn("__RELATIONSHIP__", "description", base_dimension) - vector_index_fn("__Community__", "summary_embedding", base_dimension) - yield await self.cluster_kg(leiden_params, generation_config) + yield await self.cluster_kg(collection_id, leiden_params) diff --git a/py/core/pipes/kg/community_summary.py b/py/core/pipes/kg/community_summary.py index d93adfaef..d9e652014 100644 --- a/py/core/pipes/kg/community_summary.py +++ b/py/core/pipes/kg/community_summary.py @@ -1,3 +1,4 @@ +import asyncio import json import logging from typing import Any, AsyncGenerator, Optional @@ -6,16 +7,14 @@ from core.base import ( AsyncPipe, AsyncState, - Community, + CommunityReport, CompletionProvider, EmbeddingProvider, - Entity, GenerationConfig, KGProvider, PipeType, PromptProvider, RunLoggingSingleton, - Triple, ) logger = logging.getLogger(__name__) @@ -54,8 +53,8 @@ def __init__( def community_summary_prompt( self, - entities: list[Entity], - triples: list[Triple], + entities: list, + triples: list, max_summary_input_length: int, ): """ @@ -63,14 +62,14 @@ def community_summary_prompt( """ entities_info = "\n".join( [ - f"{entity.id}, {entity.name}, {entity.description}" + f"{entity['id']}, {entity['name']}, {entity['description']}" for entity in entities ] ) triples_info = "\n".join( [ - f"{triple.id}, {triple.subject}, {triple.object}, {triple.predicate}, {triple.description}" + f"{triple['id']}, {triple['subject']}, {triple['object']}, {triple['predicate']}, {triple['description']}" for triple in triples ] ) @@ -93,73 +92,90 @@ def community_summary_prompt( async def process_community( self, - level: int, - community_id: str, + community_number: str, max_summary_input_length: int, generation_config: GenerationConfig, + collection_id: UUID, ) -> dict: """ - Process a community by summarizing it and creating a summary embedding and storing it to a neo4j database. + Process a community by summarizing it and creating a summary embedding and storing it to a database. """ - logger.info( - f"Processing community {community_id} at level {level} with max summary input length {max_summary_input_length}." - ) - - entities, triples = ( - self.kg_provider.get_community_entities_and_triples( # type: ignore - level=level, community_id=community_id + community_level, entities, triples = ( + await self.kg_provider.get_community_details( + community_number=community_number ) ) - if entities == [] or triples == []: - # TODO - Does this logic work well with the full workflow? + if entities == [] and triples == []: raise ValueError( - f"Community {community_id} at level {level} has no entities or triples." + f"Community {community_number} has no entities or triples." ) - description = ( - ( - await self.llm_provider.aget_completion( - messages=self.prompt_provider._get_message_payload( - task_prompt_name=self.kg_provider.config.kg_enrichment_settings.community_reports_prompt, - task_inputs={ - "input_text": self.community_summary_prompt( - entities, - triples, - max_summary_input_length, - ), - }, - ), - generation_config=generation_config, + for attempt in range(3): + + description = ( + ( + await self.llm_provider.aget_completion( + messages=self.prompt_provider._get_message_payload( + task_prompt_name=self.kg_provider.config.kg_enrichment_settings.community_reports_prompt, + task_inputs={ + "input_text": self.community_summary_prompt( + entities, + triples, + max_summary_input_length, + ), + }, + ), + generation_config=generation_config, + ) ) + .choices[0] + .message.content ) - .choices[0] - .message.content - ) - if not description: - raise ValueError( - f"Failed to generate a summary for community {community_id} at level {level}." - ) - - community = Community( - id=str(community_id), - level=str(level), - summary=description, - summary_embedding=await self.embedding_provider.async_get_embedding( - description + try: + if description.startswith("```json"): + description = ( + description.strip("```json").strip("```").strip() + ) + + description = json.loads(description) + name = description["name"] + summary = description["summary"] + findings = description["findings"] + rating = description["rating"] + rating_explanation = description["rating_explanation"] + break + except Exception as e: + if attempt == 2: + raise ValueError( + f"Failed to generate a summary for community {community_number} at level {community_level}." + ) from e + + community = CommunityReport( + community_number=community_number, + collection_id=collection_id, + level=community_level, + name=name, + summary=summary, + rating=rating, + rating_explanation=rating_explanation, + findings=findings, + embedding=await self.embedding_provider.async_get_embedding( + "Summary:\n" + + summary + + "\n\nFindings:\n" + + "\n".join(findings) ), ) - self.kg_provider.upsert_communities([community]) # type: ignore - - try: - summary = json.loads(community.summary) - except: - summary = {"title": ""} + await self.kg_provider.add_community_report(community) - return {"id": community.id, "title": summary["title"]} + return { + "community_number": community.community_number, + "name": community.name, + } async def _run_logic( # type: ignore self, @@ -173,21 +189,21 @@ async def _run_logic( # type: ignore Executes the KG community summary pipe: summarizing communities. """ - community_id = input.message["community_id"] - level = input.message["level"] + offset = input.message["offset"] + limit = input.message["limit"] generation_config = input.message["generation_config"] max_summary_input_length = input.message["max_summary_input_length"] - - try: - community_summary = await self.process_community( - level=level, - community_id=community_id, - max_summary_input_length=max_summary_input_length, - generation_config=generation_config, + collection_id = input.message["collection_id"] + community_summary_jobs = [] + for community_number in range(offset, offset + limit): + community_summary_jobs.append( + self.process_community( + community_number=community_number, + max_summary_input_length=max_summary_input_length, + generation_config=generation_config, + collection_id=collection_id, + ) ) - yield community_summary - except Exception as e: - error_message = f"Failed to process community {community_id} at level {level}: {e}" - logger.error(error_message) - raise ValueError(error_message) + for community_summary in asyncio.as_completed(community_summary_jobs): + yield await community_summary diff --git a/py/core/pipes/kg/entity_description.py b/py/core/pipes/kg/entity_description.py new file mode 100644 index 000000000..6a85eccb7 --- /dev/null +++ b/py/core/pipes/kg/entity_description.py @@ -0,0 +1,180 @@ +# pipe to extract nodes/triples etc + +import asyncio +import logging +import random +from typing import Any, AsyncGenerator, Optional +from uuid import UUID + +from core.base import ( + AsyncState, + CompletionProvider, + EmbeddingProvider, + KGProvider, + PipeType, + RunLoggingSingleton, +) +from core.base.pipes.base_pipe import AsyncPipe + +logger = logging.getLogger(__name__) + + +class KGEntityDescriptionPipe(AsyncPipe): + """ + The pipe takes input a list of nodes and extracts description from them. + """ + + class Input(AsyncPipe.Input): + message: dict[str, Any] + + def __init__( + self, + kg_provider: KGProvider, + llm_provider: CompletionProvider, + embedding_provider: EmbeddingProvider, + config: AsyncPipe.PipeConfig, + pipe_logger: Optional[RunLoggingSingleton] = None, + type: PipeType = PipeType.OTHER, + *args, + **kwargs, + ): + super().__init__( + pipe_logger=pipe_logger, + type=type, + config=config, + ) + self.kg_provider = kg_provider + self.llm_provider = llm_provider + self.embedding_provider = embedding_provider + + async def _run_logic( # type: ignore + self, + input: AsyncPipe.Input, + state: AsyncState, + run_id: UUID, + *args: Any, + **kwargs: Any, + ) -> AsyncGenerator[Any, None]: + """ + Extracts description from the input. + """ + + # TODO - Move this to a .yaml file and load it as we do in triples extraction + summarization_content = """ + Provide a comprehensive yet concise summary of the given entity, incorporating its description and associated triples: + + Entity Info: + {entity_info} + Triples: + {triples_txt} + + Your summary should: + 1. Clearly define the entity's core concept or purpose + 2. Highlight key relationships or attributes from the triples + 3. Integrate any relevant information from the existing description + 4. Maintain a neutral, factual tone + 5. Be approximately 2-3 sentences long + + Ensure the summary is coherent, informative, and captures the essence of the entity within the context of the provided information. + """ + + def truncate_info(info_list, max_length): + random.shuffle(info_list) + truncated_info = "" + current_length = 0 + for info in info_list: + if current_length + len(info) > max_length: + break + truncated_info += info + "\n" + current_length += len(info) + + return truncated_info + + async def process_entity( + entities, triples, max_description_input_length + ): + + entity_info = [ + f"{entity['name']}, {entity['description']}" + for entity in entities + ] + + triples_txt = [ + f"{i+1}: {triple['subject']}, {triple['object']}, {triple['predicate']} - Summary: {triple['description']}" + for i, triple in enumerate(triples) + ] + + out_entity = {"name": entities[0]["name"]} + out_entity["description"] = ( + ( + await self.llm_provider.aget_completion( + messages=[ + { + "role": "user", + "content": summarization_content.format( + entity_info=truncate_info( + entity_info, + max_description_input_length, + ), + triples_txt=truncate_info( + triples_txt, + max_description_input_length, + ), + ), + } + ], + generation_config=self.kg_provider.config.kg_enrichment_settings.generation_config, + ) + ) + .choices[0] + .message.content + ) + + # will do more requests, but it is simpler + out_entity["description_embedding"] = ( + await self.embedding_provider.async_get_embeddings( + [out_entity["description"]] + ) + )[0] + + # upsert the entity and its embedding + await self.kg_provider.upsert_embeddings( + [ + ( + out_entity["name"], + out_entity["description"], + str(out_entity["description_embedding"]), + ) + ], + "entity_embedding", + ) + + return out_entity["name"] + + offset = input.message["offset"] + limit = input.message["limit"] + document_id = input.message["document_id"] + entity_map = await self.kg_provider.get_entity_map( + offset, limit, document_id + ) + + total_entities = len(entity_map) + logger.info( + f"Processing {total_entities} entities for document {document_id}" + ) + + workflows = [] + for i, (entity_name, entity_info) in enumerate(entity_map.items()): + try: + workflows.append( + process_entity( + entity_info["entities"], + entity_info["triples"], + input.message["max_description_input_length"], + ) + ) + except Exception as e: + logger.error(f"Error processing entity {entity_name}: {e}") + + for result in asyncio.as_completed(workflows): + yield await result diff --git a/py/core/pipes/kg/node_extraction.py b/py/core/pipes/kg/node_extraction.py deleted file mode 100644 index 1e98963fa..000000000 --- a/py/core/pipes/kg/node_extraction.py +++ /dev/null @@ -1,228 +0,0 @@ -# pipe to extract nodes/triples etc - -import asyncio -import logging -import random -from typing import Any, AsyncGenerator, Optional -from uuid import UUID - -from core.base import ( - AsyncState, - CompletionProvider, - EmbeddingProvider, - KGProvider, - PipeType, - PromptProvider, - RunLoggingSingleton, -) -from core.base.pipes.base_pipe import AsyncPipe - -logger = logging.getLogger(__name__) - - -class KGNodeExtractionPipe(AsyncPipe): - """ - The pipe takes input a list of documents (optional) and extracts nodes and triples from them. - """ - - class Input(AsyncPipe.Input): - message: Any - - def __init__( - self, - kg_provider: KGProvider, - llm_provider: CompletionProvider, - prompt_provider: PromptProvider, - config: AsyncPipe.PipeConfig, - pipe_logger: Optional[RunLoggingSingleton] = None, - type: PipeType = PipeType.OTHER, - *args, - **kwargs, - ): - super().__init__( - pipe_logger=pipe_logger, - type=type, - config=config, - ) - self.kg_provider = kg_provider - self.llm_provider = llm_provider - self.prompt_provider = prompt_provider - - async def _run_logic( # type: ignore - self, - input: Input, - state: AsyncState, - run_id: UUID, - *args, - **kwargs, - ) -> AsyncGenerator[Any, None]: - - nodes = self.kg_provider.get_entity_map() # type: ignore - - for _, node_info in nodes.items(): - for entity in node_info["entities"]: - yield entity, node_info[ - "triples" - ] # the entity and its associated triples - - -class KGNodeDescriptionPipe(AsyncPipe): - """ - The pipe takes input a list of nodes and extracts description from them. - """ - - class Input(AsyncPipe.Input): - message: dict[str, Any] - - def __init__( - self, - kg_provider: KGProvider, - llm_provider: CompletionProvider, - embedding_provider: EmbeddingProvider, - config: AsyncPipe.PipeConfig, - pipe_logger: Optional[RunLoggingSingleton] = None, - type: PipeType = PipeType.OTHER, - *args, - **kwargs, - ): - super().__init__( - pipe_logger=pipe_logger, - type=type, - config=config, - ) - self.kg_provider = kg_provider - self.llm_provider = llm_provider - self.embedding_provider = embedding_provider - - async def _run_logic( # type: ignore - self, - input: AsyncPipe.Input, - state: AsyncState, - run_id: UUID, - *args: Any, - **kwargs: Any, - ) -> AsyncGenerator[Any, None]: - """ - Extracts description from the input. - """ - - summarization_content = """ - Provide a comprehensive yet concise summary of the given entity, incorporating its description and associated triples: - - Entity: {entity_info} - Description: {description} - Triples: {triples_txt} - - Your summary should: - 1. Clearly define the entity's core concept or purpose - 2. Highlight key relationships or attributes from the triples - 3. Integrate any relevant information from the existing description - 4. Maintain a neutral, factual tone - 5. Be approximately 2-3 sentences long - - Ensure the summary is coherent, informative, and captures the essence of the entity within the context of the provided information. - """ - - async def process_entity( - entity, triples, max_description_input_length - ): - - # if embedding is present in the entity, just return it - # in the future disable this to override and recompute the descriptions for all entities - if entity.description_embedding: - return entity - - entity_info = f"{entity.name}, {entity.description}" - triples_txt = [ - f"{i+1}: {triple.subject}, {triple.object}, {triple.predicate} - Summary: {triple.description}" - for i, triple in enumerate(triples) - ] - - # truncate the descriptions to the max_description_input_length - # randomly shuffle the triples - # randomly select elements from the triples_txt until the length is less than max_description_input_length - random.shuffle(triples_txt) - truncated_triples_txt = "" - current_length = 0 - for triple in triples_txt: - if current_length + len(triple) > max_description_input_length: - break - truncated_triples_txt += triple + "\n" - current_length += len(triple) - - messages = [ - { - "role": "user", - "content": summarization_content.format( - entity_info=entity_info, - description=entity.description, - triples_txt=triples_txt, - ), - } - ] - - out_entity = self.kg_provider.retrieve_cache( - "entities_with_description", f"{entity.name}_{entity.category}" - ) - if out_entity: - logger.info(f"Hit cache for entity {entity.name}") - else: - completion = await self.llm_provider.aget_completion( - messages, - self.kg_provider.config.kg_enrichment_settings.generation_config, - ) - entity.description = completion.choices[0].message.content - - # embedding - description_embedding = ( - await self.embedding_provider.async_get_embeddings( - [entity.description] - ) - ) - entity.description_embedding = description_embedding[0] - - # name embedding - # turned it off because we aren't using it for now - # name_embedding = ( - # await self.embedding_provider.async_get_embeddings( - # [entity.name] - # ) - # ) - # entity.name_embedding = name_embedding[0] - - out_entity = entity - - return out_entity - - max_description_input_length = input.message[ - "max_description_input_length" - ] - node_extractions = input.message["node_extractions"] - - tasks = [] - count = 0 - async for entity, triples in node_extractions: - tasks.append( - asyncio.create_task( - process_entity( - entity, triples, max_description_input_length - ) - ) - ) - count += 1 - - logger.info(f"KG Node Description pipe: Created {count} tasks") - # do gather because we need to wait for all descriptions before kicking off the next step - processed_entities = await asyncio.gather(*tasks) - - # upsert to the database - self.kg_provider.upsert_entities( - processed_entities, with_embeddings=True - ) - - logger.info( - "KG Node Description pipe: Upserted entities to the database" - ) - - for entity in processed_entities: - yield entity diff --git a/py/core/pipes/kg/storage.py b/py/core/pipes/kg/storage.py index 1f51a68dc..b9ac01ed4 100644 --- a/py/core/pipes/kg/storage.py +++ b/py/core/pipes/kg/storage.py @@ -18,14 +18,14 @@ class KGStoragePipe(AsyncPipe): + # TODO - Apply correct type hints to storage messages class Input(AsyncPipe.Input): - message: AsyncGenerator[List[R2RDocumentProcessingError], None] + message: AsyncGenerator[List[Any], None] def __init__( self, kg_provider: KGProvider, config: AsyncPipe.PipeConfig, - embedding_provider: Optional[EmbeddingProvider] = None, storage_batch_size: int = 1, pipe_logger: Optional[RunLoggingSingleton] = None, type: PipeType = PipeType.INGESTOR, @@ -47,7 +47,6 @@ def __init__( **kwargs, ) self.kg_provider = kg_provider - self.embedding_provider = embedding_provider self.storage_batch_size = storage_batch_size async def store( @@ -58,9 +57,7 @@ async def store( Stores a batch of knowledge graph extractions in the graph database. """ try: - return self.kg_provider.upsert_nodes_and_relationships( - kg_extractions - ) + return await self.kg_provider.add_kg_extractions(kg_extractions) except Exception as e: error_message = f"Failed to store knowledge graph extractions in the database: {e}" logger.error(error_message) diff --git a/py/core/pipes/kg/extraction.py b/py/core/pipes/kg/triples_extraction.py similarity index 70% rename from py/core/pipes/kg/extraction.py rename to py/core/pipes/kg/triples_extraction.py index 80b47bd46..62b74b739 100644 --- a/py/core/pipes/kg/extraction.py +++ b/py/core/pipes/kg/triples_extraction.py @@ -2,15 +2,13 @@ import json import logging import re -import uuid from typing import Any, AsyncGenerator, Optional, Union from core.base import ( AsyncState, - ChunkingProvider, CompletionProvider, DatabaseProvider, - DocumentFragment, + DocumentExtraction, Entity, GenerationConfig, KGExtraction, @@ -36,13 +34,12 @@ class ClientError(Exception): pass -class KGTriplesExtractionPipe( - AsyncPipe[Union[KGExtraction, R2RDocumentProcessingError]] -): +class KGTriplesExtractionPipe(AsyncPipe[dict]): """ Extracts knowledge graph information from document extractions. """ + # TODO - Apply correct type hints to storage messages class Input(AsyncPipe.Input): message: dict @@ -52,7 +49,6 @@ def __init__( database_provider: DatabaseProvider, llm_provider: CompletionProvider, prompt_provider: PromptProvider, - chunking_provider: ChunkingProvider, config: AsyncPipe.PipeConfig, kg_batch_size: int = 1, graph_rag: bool = True, @@ -66,13 +62,12 @@ def __init__( pipe_logger=pipe_logger, type=type, config=config - or AsyncPipe.PipeConfig(name="default_kg_extraction_pipe"), + or AsyncPipe.PipeConfig(name="default_kg_triples_extraction_pipe"), ) self.kg_provider = kg_provider self.prompt_provider = prompt_provider self.database_provider = database_provider self.llm_provider = llm_provider - self.chunking_provider = chunking_provider self.kg_batch_size = kg_batch_size self.id_prefix = id_prefix self.pipe_run_info = None @@ -80,7 +75,7 @@ def __init__( async def extract_kg( self, - fragments: list[DocumentFragment], + extractions: list[DocumentExtraction], generation_config: GenerationConfig, max_knowledge_triples: int, entity_types: list[str], @@ -89,16 +84,16 @@ async def extract_kg( delay: int = 2, ) -> KGExtraction: """ - Extracts NER triples from a fragment with retries. + Extracts NER triples from a extraction with retries. """ - # combine all fragments into a single string - combined_fragment: str = " ".join([fragment.data for fragment in fragments]) # type: ignore + # combine all extractions into a single string + combined_extraction: str = " ".join([extraction.data for extraction in extractions]) # type: ignore messages = self.prompt_provider._get_message_payload( - task_prompt_name=self.kg_provider.config.kg_creation_settings.kg_extraction_prompt, + task_prompt_name=self.kg_provider.config.kg_creation_settings.kg_triples_extraction_prompt, task_inputs={ - "input": combined_fragment, + "input": combined_extraction, "max_knowledge_triples": max_knowledge_triples, "entity_types": "\n".join(entity_types), "relation_types": "\n".join(relation_types), @@ -137,28 +132,28 @@ def parse_fn(response_str: str) -> Any: f"No entities found in the response string, the selected LLM likely failed to format it's response correctly. {response_str}", 400, ) - # logger.warning( - # f"No entities found in the response string, the selected LLM likely failed to format it's response correctly. {response_str}", - # ) relationships = re.findall( relationship_pattern, response_str ) - entities_dict = {} + entities_arr = [] for entity in entities: entity_value = entity[0] entity_category = entity[1] entity_description = entity[2] - entities_dict[entity_value] = Entity( - category=entity_category, - description=entity_description, - name=entity_value, - document_ids=[str(fragments[0].document_id)], - text_unit_ids=[ - str(fragment.id) for fragment in fragments - ], - attributes={"fragment_text": combined_fragment}, + entities_arr.append( + Entity( + category=entity_category, + description=entity_description, + name=entity_value, + document_id=str(extractions[0].document_id), + extraction_ids=[ + str(extraction.id) + for extraction in extractions + ], + attributes={}, + ) ) relations_arr = [] @@ -177,22 +172,23 @@ def parse_fn(response_str: str) -> Any: object=object, description=description, weight=weight, - document_ids=[str(fragments[0].document_id)], - text_unit_ids=[ - str(fragment.id) for fragment in fragments + document_id=str(extractions[0].document_id), + extraction_ids=[ + str(extraction.id) + for extraction in extractions ], - attributes={ - "fragment_text": combined_fragment - }, + attributes={}, ) ) - return entities_dict, relations_arr + return entities_arr, relations_arr entities, triples = parse_fn(kg_extraction) return KGExtraction( - fragment_ids=[fragment.id for fragment in fragments], - document_id=fragments[0].document_id, + extraction_ids=[ + extraction.id for extraction in extractions + ], + document_id=extractions[0].document_id, entities=entities, triples=triples, ) @@ -208,15 +204,15 @@ def parse_fn(response_str: str) -> Any: await asyncio.sleep(delay) else: logger.error( - f"Failed after retries with for fragment {fragments[0].id} of document {fragments[0].document_id}: {e}" + f"Failed after retries with for extraction {extractions[0].id} of document {extractions[0].document_id}: {e}" ) # raise e # you should raise an error. # add metadata to entities and triples return KGExtraction( - fragment_ids=[fragment.id for fragment in fragments], - document_id=fragments[0].document_id, - entities={}, + extraction_ids=[extraction.id for extraction in extractions], + document_id=extractions[0].document_id, + entities=[], triples=[], ) @@ -228,20 +224,17 @@ async def _run_logic( # type: ignore *args: Any, **kwargs: Any, ) -> AsyncGenerator[Union[KGExtraction, R2RDocumentProcessingError], None]: - logger.info("Running KG Extraction Pipe") document_id = input.message["document_id"] generation_config = input.message["generation_config"] - fragment_merge_count = input.message["fragment_merge_count"] + extraction_merge_count = input.message["extraction_merge_count"] max_knowledge_triples = input.message["max_knowledge_triples"] entity_types = input.message["entity_types"] relation_types = input.message["relation_types"] - - fragments = [ - DocumentFragment( - id=extraction["fragment_id"], - extraction_id=extraction["extraction_id"], + extractions = [ + DocumentExtraction( + id=extraction["extraction_id"], document_id=extraction["document_id"], user_id=extraction["user_id"], collection_ids=extraction["collection_ids"], @@ -255,35 +248,40 @@ async def _run_logic( # type: ignore ] ] - # sort the fragments accroding to chunk_order field in metadata in ascending order - fragments = sorted(fragments, key=lambda x: x.metadata["chunk_order"]) + # sort the extractions accroding to chunk_order field in metadata in ascending order + extractions = sorted( + extractions, key=lambda x: x.metadata["chunk_order"] + ) - # group these extractions into groups of fragment_merge_count - fragments_groups = [ - fragments[i : i + fragment_merge_count] - for i in range(0, len(fragments), fragment_merge_count) + # group these extractions into groups of extraction_merge_count + extractions_groups = [ + extractions[i : i + extraction_merge_count] + for i in range(0, len(extractions), extraction_merge_count) ] - logger.info( - f"Extracting KG Triples from {len(fragments_groups)} fragment groups from originally {len(fragments)} fragments for document {document_id}" - ) - tasks = [ asyncio.create_task( self.extract_kg( - fragments=fragments_group, + extractions=extractions_group, generation_config=generation_config, max_knowledge_triples=max_knowledge_triples, entity_types=entity_types, relation_types=relation_types, ) ) - for fragments_group in fragments_groups + for extractions_group in extractions_groups ] + completed_tasks = 0 + total_tasks = len(tasks) + for completed_task in asyncio.as_completed(tasks): try: yield await completed_task + completed_tasks += 1 + logger.info( + f"Completed {completed_tasks}/{total_tasks} KG extraction tasks for document {document_id}" + ) except Exception as e: logger.error(f"Error in Extracting KG Triples: {e}") yield R2RDocumentProcessingError( diff --git a/py/core/pipes/retrieval/kg_search_pipe.py b/py/core/pipes/retrieval/kg_search_pipe.py index 461edca76..cfaf74d8c 100644 --- a/py/core/pipes/retrieval/kg_search_pipe.py +++ b/py/core/pipes/retrieval/kg_search_pipe.py @@ -130,8 +130,8 @@ async def local_search( property_names=[ "name", "description", - "fragment_ids", - "document_ids", + # "extraction_ids", + # "document_ids", ], ): yield KGSearchResult( @@ -141,38 +141,38 @@ async def local_search( ), method=KGSearchMethod.LOCAL, result_type=KGSearchResultType.ENTITY, - fragment_ids=search_result["fragment_ids"], - document_ids=search_result["document_ids"], + # extraction_ids=search_result["extraction_ids"], + # document_ids=search_result["document_ids"], metadata={"associated_query": message}, ) # relationship search - search_type = "__Relationship__" - async for search_result in self.kg_provider.vector_query( # type: ignore - input, - search_type=search_type, - search_type_limits=kg_search_settings.local_search_limits[ - search_type - ], - query_embedding=query_embedding, - property_names=[ - "name", - "description", - "fragment_ids", - "document_ids", - ], - ): - yield KGSearchResult( - content=KGRelationshipResult( - name=search_result["name"], - description=search_result["description"], - ), - method=KGSearchMethod.LOCAL, - result_type=KGSearchResultType.RELATIONSHIP, - fragment_ids=search_result["fragment_ids"], - document_ids=search_result["document_ids"], - metadata={"associated_query": message}, - ) + # search_type = "__Relationship__" + # async for search_result in self.kg_provider.vector_query( # type: ignore + # input, + # search_type=search_type, + # search_type_limits=kg_search_settings.local_search_limits[ + # search_type + # ], + # query_embedding=query_embedding, + # property_names=[ + # "name", + # "description", + # "extraction_ids", + # "document_ids", + # ], + # ): + # yield KGSearchResult( + # content=KGRelationshipResult( + # name=search_result["name"], + # description=search_result["description"], + # ), + # method=KGSearchMethod.LOCAL, + # result_type=KGSearchResultType.RELATIONSHIP, + # # extraction_ids=search_result["extraction_ids"], + # # document_ids=search_result["document_ids"], + # metadata={"associated_query": message}, + # ) # community search search_type = "__Community__" @@ -182,47 +182,29 @@ async def local_search( search_type_limits=kg_search_settings.local_search_limits[ search_type ], - embedding_type="summary_embedding", + embedding_type="embedding", query_embedding=query_embedding, - property_names=["title", "summary"], + property_names=[ + "community_number", + "name", + "findings", + "rating", + "rating_explanation", + "summary", + ], ): - - summary = search_result["summary"] - - # try loading it as a json - try: - - if "```json" in summary: - summary = ( - summary.strip() - .removeprefix("```json") - .removesuffix("```") - .strip() - ) - - summary_json = json.loads(summary) - description = summary_json.get("summary", "") - name = summary_json.get("title", "") - - def get_str(finding): - if isinstance(finding, dict): - return f"{finding['summary']} => {finding['explanation']}" - else: - return str(finding) - - except json.JSONDecodeError: - logger.warning(f"Summary is not valid JSON") - continue - yield KGSearchResult( content=KGCommunityResult( - name=name, description=description + name=search_result["name"], + summary=search_result["summary"], + rating=search_result["rating"], + rating_explanation=search_result["rating_explanation"], + findings=search_result["findings"], ), method=KGSearchMethod.LOCAL, result_type=KGSearchResultType.COMMUNITY, metadata={ "associated_query": message, - "findings": summary_json.get("findings", ""), }, ) @@ -336,7 +318,6 @@ async def _run_logic( # type: ignore *args: Any, **kwargs: Any, ) -> AsyncGenerator[KGSearchResult, None]: - kg_search_type = kg_search_settings.kg_search_type # runs local and/or global search diff --git a/py/core/pipes/retrieval/multi_search.py b/py/core/pipes/retrieval/multi_search.py index ae93ea542..f2e38d139 100644 --- a/py/core/pipes/retrieval/multi_search.py +++ b/py/core/pipes/retrieval/multi_search.py @@ -117,7 +117,7 @@ def reciprocal_rank_fusion( document_queries: dict[UUID, set[str]] = {} for query, results in all_results.items(): for rank, result in enumerate(results, 1): - doc_id = result.fragment_id + doc_id = result.extraction_id if doc_id not in document_scores: document_scores[doc_id] = 0 document_results[doc_id] = result diff --git a/py/core/pipes/retrieval/vector_search_pipe.py b/py/core/pipes/retrieval/vector_search_pipe.py index b9b0038a5..ffcfe61c3 100644 --- a/py/core/pipes/retrieval/vector_search_pipe.py +++ b/py/core/pipes/retrieval/vector_search_pipe.py @@ -62,6 +62,7 @@ async def search( # type: ignore message, purpose=EmbeddingPurpose.QUERY, ) + search_results = ( self.database_provider.vector.hybrid_search( query_vector=query_vector, diff --git a/py/core/providers/__init__.py b/py/core/providers/__init__.py index 6f8281f6b..6fc556a22 100644 --- a/py/core/providers/__init__.py +++ b/py/core/providers/__init__.py @@ -1,8 +1,4 @@ from .auth import R2RAuthProvider, SupabaseAuthProvider -from .chunking import ( # type: ignore - R2RChunkingProvider, - UnstructuredChunkingProvider, -) from .crypto import BCryptConfig, BCryptProvider from .database import PostgresDBProvider from .embeddings import ( @@ -11,19 +7,29 @@ OpenAIEmbeddingProvider, ) from .file import PostgresFileProvider -from .kg import Neo4jKGProvider +from .ingestion import ( # type: ignore + R2RIngestionConfig, + R2RIngestionProvider, + UnstructuredIngestionConfig, + UnstructuredIngestionProvider, +) +from .kg import PostgresKGProvider from .llm import LiteCompletionProvider, OpenAICompletionProvider -from .orchestration import HatchetOrchestrationProvider -from .parsing import R2RParsingProvider, UnstructuredParsingProvider +from .orchestration import ( + HatchetOrchestrationProvider, + SimpleOrchestrationProvider, +) from .prompts import R2RPromptProvider __all__ = [ # Auth "R2RAuthProvider", "SupabaseAuthProvider", - # Chunking - "R2RChunkingProvider", # type: ignore - "UnstructuredChunkingProvider", # type: ignore + # Ingestion + "R2RIngestionProvider", + "R2RIngestionConfig", + "UnstructuredIngestionProvider", + "UnstructuredIngestionConfig", # Crypto "BCryptProvider", "BCryptConfig", @@ -36,15 +42,13 @@ # File "PostgresFileProvider", # KG - "Neo4jKGProvider", + "PostgresKGProvider", # Orchestration "HatchetOrchestrationProvider", + "SimpleOrchestrationProvider", # LLM "OpenAICompletionProvider", "LiteCompletionProvider", - # Parsing - "R2RParsingProvider", - "UnstructuredParsingProvider", # Prompts "R2RPromptProvider", ] diff --git a/py/core/providers/auth/r2r_auth.py b/py/core/providers/auth/r2r_auth.py index b665fabd5..550e0cdfc 100644 --- a/py/core/providers/auth/r2r_auth.py +++ b/py/core/providers/auth/r2r_auth.py @@ -131,10 +131,20 @@ def get_current_active_user( return current_user async def register(self, email: str, password: str) -> Dict[str, str]: - # Create new user + # Create new user and give them a default collection new_user = await self.db_provider.relational.create_user( email, password ) + default_collection = ( + await self.db_provider.relational.create_default_collection( + new_user.id, + ) + ) + + await self.db_provider.relational.add_user_to_collection( + new_user.id, default_collection.collection_id + ) + if self.config.require_email_verification: # Generate verification code and send email verification_code = ( diff --git a/py/core/providers/auth/supabase.py b/py/core/providers/auth/supabase.py index c345be08e..704865bc2 100644 --- a/py/core/providers/auth/supabase.py +++ b/py/core/providers/auth/supabase.py @@ -132,8 +132,6 @@ async def user(self, token: str = Depends(oauth2_scheme)) -> UserResponse: # Use Supabase client to get user details from token user = self.supabase.auth.get_user(token).user if user: - # print('user = ', user) - # import pdb; pdb.set_trace() return UserResponse( id=user.id, email=user.email, diff --git a/py/core/providers/chunking/__init__.py b/py/core/providers/chunking/__init__.py deleted file mode 100644 index f954e5c21..000000000 --- a/py/core/providers/chunking/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# type: ignore -from .r2r_chunking import R2RChunkingProvider -from .unstructured_chunking import UnstructuredChunkingProvider - -__all__ = ["R2RChunkingProvider", "UnstructuredChunkingProvider"] diff --git a/py/core/providers/chunking/r2r_chunking.py b/py/core/providers/chunking/r2r_chunking.py deleted file mode 100644 index 1c2d3115d..000000000 --- a/py/core/providers/chunking/r2r_chunking.py +++ /dev/null @@ -1,108 +0,0 @@ -# type: ignore -import logging -from typing import Any, AsyncGenerator, Union - -from core.base import ( - ChunkingProvider, - R2RChunkingConfig, - RecursiveCharacterTextSplitter, - Strategy, - TextSplitter, -) -from core.base.abstractions import DocumentExtraction - -logger = logging.getLogger(__name__) - - -class R2RChunkingProvider(ChunkingProvider): - def __init__(self, config: R2RChunkingConfig): - super().__init__(config) - self.text_splitter = self._initialize_text_splitter() - logger.info( - f"R2RChunkingProvider initialized with config: {self.config}" - ) - - def _initialize_text_splitter(self) -> TextSplitter: - logger.info( - f"Initializing text splitter with method: {self.config.method}" - ) # Debug log - if self.config.method == Strategy.RECURSIVE: - return RecursiveCharacterTextSplitter( - chunk_size=self.config.chunk_size, - chunk_overlap=self.config.chunk_overlap, - ) - elif self.config.method == Strategy.CHARACTER: - from core.base.utils.splitter.text import CharacterTextSplitter - - separator = CharacterTextSplitter.DEFAULT_SEPARATOR - if self.config.extra_fields: - separator = self.config.extra_fields.get( - "separator", CharacterTextSplitter.DEFAULT_SEPARATOR - ) - print("self.config = ", self.config) - return CharacterTextSplitter( - chunk_size=self.config.chunk_size, - chunk_overlap=self.config.chunk_overlap, - separator=separator, - keep_separator=False, - strip_whitespace=True, - ) - elif self.config.method == Strategy.BASIC: - raise NotImplementedError( - "Basic chunking method not implemented. Please use Recursive." - ) - elif self.config.method == Strategy.BY_TITLE: - raise NotImplementedError("By title method not implemented") - else: - raise ValueError(f"Unsupported method type: {self.config.method}") - - def validate_config(self) -> bool: - return self.config.chunk_size > 0 and self.config.chunk_overlap >= 0 - - def update_config(self, config_override: R2RChunkingConfig): - if self.config != config_override: - self.config = config_override - self.text_splitter = self._initialize_text_splitter() - - async def chunk( - self, parsed_document: Union[str, DocumentExtraction] - ) -> AsyncGenerator[Any, None]: - - if isinstance(parsed_document, DocumentExtraction): - parsed_document = parsed_document.data - - if isinstance(parsed_document, str): - chunks = self.text_splitter.create_documents([parsed_document]) - else: - # Assuming parsed_document is already a list of text chunks - chunks = parsed_document - - for chunk in chunks: - yield ( - chunk.page_content if hasattr(chunk, "page_content") else chunk - ) - - async def chunk_with_override( - self, - parsed_document: Union[str, DocumentExtraction], - config_override: R2RChunkingConfig, - ) -> AsyncGenerator[Any, None]: - original_config = self.config - original_splitter = self.text_splitter - try: - self.update_config(config_override) - async for chunk in self.chunk(parsed_document): - yield chunk - finally: - self.config = original_config - self.text_splitter = original_splitter - - @classmethod - def with_override( - cls, - original_provider: "R2RChunkingProvider", - config_override: R2RChunkingConfig, - ) -> "R2RChunkingProvider": - new_config = R2RChunkingConfig(**original_provider.config.model_dump()) - new_config.update(config_override.model_dump(exclude_unset=True)) - return cls(new_config) diff --git a/py/core/providers/chunking/unstructured_chunking.py b/py/core/providers/chunking/unstructured_chunking.py deleted file mode 100644 index 0d976b694..000000000 --- a/py/core/providers/chunking/unstructured_chunking.py +++ /dev/null @@ -1,65 +0,0 @@ -# type: ignore -import logging -from typing import AsyncGenerator, Union - -from core.base import ChunkingProvider, Strategy, UnstructuredChunkingConfig -from core.base.abstractions import DocumentExtraction - -logger = logging.getLogger(__name__) - - -class UnstructuredChunkingProvider(ChunkingProvider): - def __init__(self, config: UnstructuredChunkingConfig): - try: - from unstructured.chunking.basic import chunk_elements - from unstructured.chunking.title import chunk_by_title - from unstructured.documents.elements import Text - - self.chunk_by_title = chunk_by_title - self.chunk_elements = chunk_elements - self.Text = Text - - except ImportError: - self.chunk_elements = None - self.chunk_by_title = None - self.Text = None - - super().__init__(config) - - async def chunk( - self, parsed_document: Union[str, DocumentExtraction] - ) -> AsyncGenerator[str, None]: - - # as unstructured has already partitioned the document, we can yield the text directly - if parsed_document.metadata.get("partitioned_by_unstructured", False): - yield parsed_document.data - - else: - - try: - if self.config.strategy == Strategy.BY_TITLE: - chunks = self.chunk_by_title( - [self.Text(text=parsed_document.data)], - max_characters=self.config.extra_fields["chunk_size"], - new_after_n_chars=self.config.extra_fields[ - "max_chunk_size" - ] - or self.config.extra_fields["chunk_size"], - overlap=self.config.extra_fields["chunk_overlap"], - ) - else: - chunks = self.chunk_elements( - [self.Text(text=parsed_document.data)], - max_characters=self.config.chunk_size, - new_after_n_chars=self.config.max_chunk_size - or self.config.chunk_size, - overlap=self.config.chunk_overlap, - ) - for chunk in chunks: - yield chunk.text - - except Exception as e: - logger.error( - f"If you are trying to use r2r for parsing and unstructured for chunking, please use the r2r-unstructured docker. You can do that using --docker flag with `r2r serve` command. Error: {e}" - ) - raise e diff --git a/py/core/providers/database/base.py b/py/core/providers/database/base.py index d52231f91..32ae6a334 100644 --- a/py/core/providers/database/base.py +++ b/py/core/providers/database/base.py @@ -76,6 +76,11 @@ def build(self): class DatabaseMixin: + def get_config(self): + if hasattr(self, "config"): + return self.config + raise AttributeError("Config not set in the provider class") + def _get_table_name(self, base_name: str) -> str: raise NotImplementedError("Subclasses must implement this method") diff --git a/py/core/providers/database/collection.py b/py/core/providers/database/collection.py index 5366812a3..cc30736a2 100644 --- a/py/core/providers/database/collection.py +++ b/py/core/providers/database/collection.py @@ -2,9 +2,13 @@ import logging from datetime import datetime from typing import Optional, Union -from uuid import UUID +from uuid import UUID, uuid4 -from core.base import R2RException +from core.base import ( + R2RException, + generate_default_user_collection_id, + generate_id_from_label, +) from core.base.abstractions import DocumentInfo, DocumentType, IngestionStatus from core.base.api.models import CollectionOverviewResponse, CollectionResponse @@ -26,25 +30,59 @@ async def create_table(self) -> None: """ await self.execute_query(query) + async def create_default_collection( + self, user_id: Optional[UUID] = None + ) -> CollectionResponse: + """Create a default collection if it doesn't exist.""" + config = self.get_config() + + if user_id: + default_collection_uuid = generate_default_user_collection_id( + user_id + ) + else: + default_collection_uuid = generate_id_from_label( + config.default_collection_name + ) + + if not await self.collection_exists(default_collection_uuid): + logger.info("Initializing a new default collection...") + return await self.create_collection( + name=config.default_collection_name, + description=config.default_collection_description, + collection_id=default_collection_uuid, + ) + + return await self.get_collection(default_collection_uuid) + async def collection_exists(self, collection_id: UUID) -> bool: """Check if a collection exists.""" query = f""" SELECT 1 FROM {self._get_table_name('collections')} WHERE collection_id = $1 """ - result = await self.execute_query(query, [collection_id]) - return bool(result) + result = await self.fetchrow_query(query, [collection_id]) + return result is not None async def create_collection( - self, name: str, description: str = "" + self, + name: str, + description: str = "", + collection_id: Optional[UUID] = None, ) -> CollectionResponse: current_time = datetime.utcnow() query = f""" - INSERT INTO {self._get_table_name('collections')} (name, description, created_at, updated_at) - VALUES ($1, $2, $3, $4) + INSERT INTO {self._get_table_name('collections')} (collection_id, name, description, created_at, updated_at) + VALUES ($1, $2, $3, $4, $5) RETURNING collection_id, name, description, created_at, updated_at """ - params = [name, description, current_time, current_time] + params = [ + collection_id or uuid4(), + name, + description, + current_time, + current_time, + ] try: async with self.pool.acquire() as conn: # type: ignore @@ -137,23 +175,53 @@ async def update_collection( ) async def delete_collection(self, collection_id: UUID) -> None: - # Remove collection_id from users - user_update_query = f""" - UPDATE {self._get_table_name('users')} - SET collection_ids = array_remove(collection_ids, $1) - WHERE $1 = ANY(collection_ids) - """ - await self.execute_query(user_update_query, [collection_id]) - - # Delete the collection - delete_query = f""" - DELETE FROM {self._get_table_name('collections')} - WHERE collection_id = $1 - """ - result = await self.execute_query(delete_query, [collection_id]) - - if result == "DELETE 0": - raise R2RException(status_code=404, message="Collection not found") + async with self.pool.acquire() as conn: # type: ignore + async with conn.transaction(): + try: + # Remove collection_id from users + user_update_query = f""" + UPDATE {self._get_table_name('users')} + SET collection_ids = array_remove(collection_ids, $1) + WHERE $1 = ANY(collection_ids) + """ + await conn.execute(user_update_query, collection_id) + + # Remove collection_id from documents + document_update_query = f""" + WITH updated AS ( + UPDATE {self._get_table_name('document_info')} + SET collection_ids = array_remove(collection_ids, $1) + WHERE $1 = ANY(collection_ids) + RETURNING 1 + ) + SELECT COUNT(*) AS affected_rows FROM updated + """ + result = await conn.fetchrow( + document_update_query, collection_id + ) + affected_rows = result["affected_rows"] + + # Delete the collection + delete_query = f""" + DELETE FROM {self._get_table_name('collections')} + WHERE collection_id = $1 + RETURNING collection_id + """ + deleted = await conn.fetchrow(delete_query, collection_id) + + if not deleted: + raise R2RException( + status_code=404, message="Collection not found" + ) + + except Exception as e: + logger.error( + f"Error deleting collection {collection_id}: {str(e)}" + ) + raise R2RException( + status_code=500, + message=f"An error occurred while deleting the collection: {str(e)}", + ) async def list_collections( self, offset: int = 0, limit: int = -1 @@ -357,8 +425,10 @@ async def get_collections_for_user( return {"results": collections, "total_entries": total_entries} async def assign_document_to_collection( - self, document_id: UUID, collection_id: UUID - ) -> None: + self, + document_id: UUID, + collection_id: UUID, + ) -> UUID: """ Assign a document to a collection. @@ -408,6 +478,8 @@ async def assign_document_to_collection( message="Document is already assigned to the collection", ) + return collection_id + except R2RException: # Re-raise R2RExceptions as they are already handled raise diff --git a/py/core/providers/database/document.py b/py/core/providers/database/document.py index e3409b097..a4878319e 100644 --- a/py/core/providers/database/document.py +++ b/py/core/providers/database/document.py @@ -21,8 +21,9 @@ DocumentInfo, DocumentType, IngestionStatus, + KGCreationStatus, + KGEnrichmentStatus, R2RException, - RestructureStatus, ) from .base import DatabaseMixin @@ -46,7 +47,7 @@ def __init__(self, *args, **kwargs): Column("version", String), Column("size_in_bytes", Integer), Column("ingestion_status", String), - Column("restructuring_status", String), + Column("kg_extraction_status", String), Column("created_at", DateTime), Column("updated_at", DateTime), Column("ingestion_attempt_number", Integer, default=0), @@ -64,7 +65,7 @@ async def create_table(self): version TEXT, size_in_bytes INT, ingestion_status TEXT DEFAULT 'pending', - restructuring_status TEXT DEFAULT 'pending', + kg_extraction_status TEXT DEFAULT 'pending', created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW(), ingestion_attempt_number INT DEFAULT 0 @@ -76,21 +77,21 @@ async def create_table(self): # TODO - Remove this after the next release # Additional query to check and add the column if it doesn't exist - add_column_query = f""" - DO $$ - BEGIN - IF NOT EXISTS ( - SELECT 1 - FROM information_schema.columns - WHERE table_name = '{self._get_table_name("document_info")}' - AND column_name = 'ingestion_attempt_number' - ) THEN - ALTER TABLE {self._get_table_name("document_info")} - ADD COLUMN ingestion_attempt_number INT DEFAULT 0; - END IF; - END $$; - """ - await self.execute_query(add_column_query) + # add_column_query = f""" + # DO $$ + # BEGIN + # IF NOT EXISTS ( + # SELECT 1 + # FROM information_schema.columns + # WHERE table_name = '{self._get_table_name("document_info")}' + # AND column_name = 'ingestion_attempt_number' + # ) THEN + # ALTER TABLE {self._get_table_name("document_info")} + # ADD COLUMN ingestion_attempt_number INT DEFAULT 0; + # END IF; + # END $$; + # """ + # await self.execute_query(add_column_query) async def upsert_documents_overview( self, documents_overview: Union[DocumentInfo, list[DocumentInfo]] @@ -144,7 +145,7 @@ async def upsert_documents_overview( UPDATE {self._get_table_name('document_info')} SET collection_ids = $1, user_id = $2, type = $3, metadata = $4, title = $5, version = $6, size_in_bytes = $7, ingestion_status = $8, - restructuring_status = $9, updated_at = $10, ingestion_attempt_number = $11 + kg_extraction_status = $9, updated_at = $10, ingestion_attempt_number = $11 WHERE document_id = $12 """ await conn.execute( @@ -157,7 +158,7 @@ async def upsert_documents_overview( db_entry["version"], db_entry["size_in_bytes"], db_entry["ingestion_status"], - db_entry["restructuring_status"], + db_entry["kg_extraction_status"], db_entry["updated_at"], new_attempt_number, document_info.id, @@ -166,7 +167,7 @@ async def upsert_documents_overview( insert_query = f""" INSERT INTO {self._get_table_name('document_info')} (document_id, collection_ids, user_id, type, metadata, title, version, - size_in_bytes, ingestion_status, restructuring_status, created_at, + size_in_bytes, ingestion_status, kg_extraction_status, created_at, updated_at, ingestion_attempt_number) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) """ @@ -181,7 +182,7 @@ async def upsert_documents_overview( db_entry["version"], db_entry["size_in_bytes"], db_entry["ingestion_status"], - db_entry["restructuring_status"], + db_entry["kg_extraction_status"], db_entry["created_at"], db_entry["updated_at"], db_entry["ingestion_attempt_number"], @@ -218,6 +219,157 @@ async def delete_from_documents_overview( await self.execute_query(query, params) + async def _get_status_from_table( + self, ids: list[UUID], table_name: str, status_type: str + ): + """ + Get the workflow status for a given document or list of documents. + + Args: + ids (list[UUID]): The document IDs. + table_name (str): The table name. + status_type (str): The type of status to retrieve. + + Returns: + The workflow status for the given document or list of documents. + """ + query = f""" + SELECT {status_type} FROM {self._get_table_name(table_name)} + WHERE document_id = ANY($1) + """ + return await self.fetch_query(query, [ids]) + + async def _get_ids_from_table( + self, + status: list[str], + table_name: str, + status_type: str, + collection_id: Optional[UUID] = None, + ): + """ + Get the IDs from a given table. + + Args: + status (Union[str, list[str]]): The status or list of statuses to retrieve. + table_name (str): The table name. + status_type (str): The type of status to retrieve. + """ + query = f""" + SELECT document_id FROM {self._get_table_name(table_name)} + WHERE {status_type} = ANY($1) and $2 = ANY(collection_ids) + """ + records = await self.fetch_query(query, [status, collection_id]) + document_ids = [record["document_id"] for record in records] + return document_ids + + async def _set_status_in_table( + self, ids: list[UUID], status: str, table_name: str, status_type: str + ): + """ + Set the workflow status for a given document or list of documents. + + Args: + ids (list[UUID]): The document IDs. + status (str): The status to set. + table_name (str): The table name. + status_type (str): The type of status to set. + """ + query = f""" + UPDATE {self._get_table_name(table_name)} + SET {status_type} = $1 + WHERE document_id = Any($2) + """ + await self.execute_query(query, [status, ids]) + + def _get_status_model_and_table_name(self, status_type: str): + """ + Get the status model and table name for a given status type. + + Args: + status_type (str): The type of status to retrieve. + + Returns: + The status model and table name for the given status type. + """ + if status_type == "ingestion": + return IngestionStatus, "document_info" + elif status_type == "kg_extraction_status": + return KGCreationStatus, "document_info" + elif status_type == "kg_enrichment_status": + return KGEnrichmentStatus, "collection_info" + else: + raise R2RException( + status_code=400, message=f"Invalid status type: {status_type}" + ) + + async def get_workflow_status( + self, id: Union[UUID, list[UUID]], status_type: str + ): + """ + Get the workflow status for a given document or list of documents. + + Args: + id (Union[UUID, list[UUID]]): The document ID or list of document IDs. + status_type (str): The type of status to retrieve. + + Returns: + The workflow status for the given document or list of documents. + """ + ids = [id] if isinstance(id, UUID) else id + out_model, table_name = self._get_status_model_and_table_name( + status_type + ) + result = map( + (await self._get_status_from_table(ids, table_name, status_type)), + out_model, + ) + return result[0] if isinstance(id, UUID) else result + + async def set_workflow_status( + self, id: Union[UUID, list[UUID]], status_type: str, status: str + ): + """ + Set the workflow status for a given document or list of documents. + + Args: + id (Union[UUID, list[UUID]]): The document ID or list of document IDs. + status_type (str): The type of status to set. + status (str): The status to set. + """ + ids = [id] if isinstance(id, UUID) else id + out_model, table_name = self._get_status_model_and_table_name( + status_type + ) + return await self._set_status_in_table( + ids, status, table_name, status_type + ) + + async def get_document_ids_by_status( + self, + status_type: str, + status: Union[str, list[str]], + collection_id: UUID = None, + ): + """ + Get the IDs for a given status. + + Args: + ids_key (str): The key to retrieve the IDs. + status_type (str): The type of status to retrieve. + status (Union[str, list[str]]): The status or list of statuses to retrieve. + """ + + if isinstance(status, str): + status = [status] + + out_model, table_name = self._get_status_model_and_table_name( + status_type + ) + result = await self._get_ids_from_table( + status, table_name, status_type, collection_id + ) + return result + async def get_documents_overview( self, filter_user_ids: Optional[list[UUID]] = None, @@ -254,7 +406,7 @@ async def get_documents_overview( query = f""" SELECT document_id, collection_ids, user_id, type, metadata, title, version, - size_in_bytes, ingestion_status, created_at, updated_at, restructuring_status, + size_in_bytes, ingestion_status, created_at, updated_at, kg_extraction_status, COUNT(*) OVER() AS total_entries {base_query} ORDER BY created_at DESC @@ -285,8 +437,8 @@ async def get_documents_overview( ingestion_status=IngestionStatus(row["ingestion_status"]), created_at=row["created_at"], updated_at=row["updated_at"], - restructuring_status=RestructureStatus( - row["restructuring_status"] + kg_extraction_status=KGCreationStatus( + row["kg_extraction_status"] ), ) for row in results diff --git a/py/core/providers/database/postgres.py b/py/core/providers/database/postgres.py index b23aee747..0109f17e4 100644 --- a/py/core/providers/database/postgres.py +++ b/py/core/providers/database/postgres.py @@ -8,6 +8,7 @@ CryptoProvider, DatabaseConfig, DatabaseProvider, + PostgresConfigurationSettings, RelationalDBProvider, VectorDBProvider, ) @@ -74,9 +75,6 @@ def __init__( config.project_name or config.vecs_collection # remove after deprecation or os.getenv("POSTGRES_PROJECT_NAME") - or os.getenv( - "POSTGRES_VECS_COLLECTION" - ) # remove after deprecation ) if not project_name: raise ValueError( @@ -106,6 +104,16 @@ def __init__( self.conn = None self.config: DatabaseConfig = config self.crypto_provider = crypto_provider + self.postgres_configuration_settings: PostgresConfigurationSettings = ( + self._get_postgres_configuration_settings(config) + ) + self.default_collection_name = config.default_collection_name + self.default_collection_description = ( + config.default_collection_description + ) + + def _get_table_name(self, base_name: str) -> str: + return f"{self.project_name}.{base_name}" async def initialize(self): self.vector = self._initialize_vector_db() @@ -125,6 +133,48 @@ async def _initialize_relational_db(self) -> RelationalDBProvider: connection_string=self.connection_string, crypto_provider=self.crypto_provider, project_name=self.project_name, + postgres_configuration_settings=self.postgres_configuration_settings, ) await relational_db.initialize() return relational_db + + def _get_postgres_configuration_settings( + self, config: DatabaseConfig + ) -> PostgresConfigurationSettings: + settings = PostgresConfigurationSettings() + + env_mapping = { + "max_connections": "POSTGRES_MAX_CONNECTIONS", + "shared_buffers": "POSTGRES_SHARED_BUFFERS", + "effective_cache_size": "POSTGRES_EFFECTIVE_CACHE_SIZE", + "maintenance_work_mem": "POSTGRES_MAINTENANCE_WORK_MEM", + "checkpoint_completion_target": "POSTGRES_CHECKPOINT_COMPLETION_TARGET", + "wal_buffers": "POSTGRES_WAL_BUFFERS", + "default_statistics_target": "POSTGRES_DEFAULT_STATISTICS_TARGET", + "random_page_cost": "POSTGRES_RANDOM_PAGE_COST", + "effective_io_concurrency": "POSTGRES_EFFECTIVE_IO_CONCURRENCY", + "work_mem": "POSTGRES_WORK_MEM", + "huge_pages": "POSTGRES_HUGE_PAGES", + "min_wal_size": "POSTGRES_MIN_WAL_SIZE", + "max_wal_size": "POSTGRES_MAX_WAL_SIZE", + "max_worker_processes": "POSTGRES_MAX_WORKER_PROCESSES", + "max_parallel_workers_per_gather": "POSTGRES_MAX_PARALLEL_WORKERS_PER_GATHER", + "max_parallel_workers": "POSTGRES_MAX_PARALLEL_WORKERS", + "max_parallel_maintenance_workers": "POSTGRES_MAX_PARALLEL_MAINTENANCE_WORKERS", + } + + for setting, env_var in env_mapping.items(): + value = getattr( + config.postgres_configuration_settings, setting, None + ) or os.getenv(env_var) + + if value is not None and value != "": + field_type = settings.__annotations__[setting] + if field_type == Optional[int]: + value = int(value) + elif field_type == Optional[float]: + value = float(value) + + setattr(settings, setting, value) + + return settings diff --git a/py/core/providers/database/relational.py b/py/core/providers/database/relational.py index 9a8291544..3d097b48e 100644 --- a/py/core/providers/database/relational.py +++ b/py/core/providers/database/relational.py @@ -21,7 +21,12 @@ class PostgresRelationalDBProvider( UserMixin, ): def __init__( - self, config, connection_string, crypto_provider, project_name + self, + config, + connection_string, + crypto_provider, + project_name, + postgres_configuration_settings, ): super().__init__(config) self.config = config @@ -29,10 +34,14 @@ def __init__( self.crypto_provider = crypto_provider self.project_name = project_name self.pool = None + self.postgres_configuration_settings = postgres_configuration_settings async def initialize(self): try: - self.pool = await asyncpg.create_pool(self.connection_string) + self.pool = await asyncpg.create_pool( + self.connection_string, + max_size=self.postgres_configuration_settings.max_connections, + ) logger.info( "Successfully connected to Postgres database and created connection pool." ) @@ -44,7 +53,7 @@ async def initialize(self): await self._initialize_relational_db() def _get_table_name(self, base_name: str) -> str: - return f"{base_name}_{self.project_name}" + return f"{self.project_name}.{base_name}" @asynccontextmanager async def get_connection(self): @@ -59,6 +68,16 @@ async def execute_query(self, query, params=None): else: return await conn.execute(query) + async def execute_many(self, query, params=None, batch_size=1000): + async with self.get_connection() as conn: + async with conn.transaction(): + if params: + for i in range(0, len(params), batch_size): + param_batch = params[i : i + batch_size] + await conn.executemany(query, param_batch) + else: + await conn.executemany(query) + async def fetch_query(self, query, params=None): async with self.get_connection() as conn: async with conn.transaction(): @@ -76,9 +95,14 @@ async def fetchrow_query(self, query, params=None): else: return await conn.fetchrow(query) + # async def copy_records_to_table(self, table_name, records): + # async with self.get_connection() as conn: + # async with conn.transaction(): + # await conn.copy_records_to_table(table_name, records) + async def _initialize_relational_db(self): async with self.get_connection() as conn: - await conn.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";') + await conn.execute(f'CREATE EXTENSION IF NOT EXISTS "uuid-ossp";') # Call create_table for each mixin for base_class in self.__class__.__bases__: diff --git a/py/core/providers/database/user.py b/py/core/providers/database/user.py index d4bde81ed..ebf39d17e 100644 --- a/py/core/providers/database/user.py +++ b/py/core/providers/database/user.py @@ -10,6 +10,7 @@ class UserMixin(DatabaseMixin): + async def create_table(self): query = f""" CREATE TABLE IF NOT EXISTS {self._get_table_name('users')} ( diff --git a/py/core/providers/database/vecs/adapter/noop.py b/py/core/providers/database/vecs/adapter/noop.py index 3e4056860..149617e14 100644 --- a/py/core/providers/database/vecs/adapter/noop.py +++ b/py/core/providers/database/vecs/adapter/noop.py @@ -43,8 +43,7 @@ def __call__( ) -> Generator[Record, None, None]: for record in records: ( - fragment_id, - extraction_id, + id, document_id, user_id, collection_ids, @@ -53,8 +52,7 @@ def __call__( metadata, ) = record yield ( - str(fragment_id), - str(extraction_id), + str(id), str(document_id), str(user_id), [str(gid) for gid in collection_ids], diff --git a/py/core/providers/database/vecs/client.py b/py/core/providers/database/vecs/client.py index c0d04af87..dfb67d388 100644 --- a/py/core/providers/database/vecs/client.py +++ b/py/core/providers/database/vecs/client.py @@ -59,6 +59,7 @@ def __init__( pool_size: int = 1, max_retries: int = 3, retry_delay: int = 1, + project_name: str = "vecs", ): self.engine = create_engine( connection_string, @@ -66,10 +67,11 @@ def __init__( poolclass=QueuePool, pool_recycle=300, # Recycle connections after 5 min ) - self.meta = MetaData(schema="vecs") + self.meta = MetaData(schema=project_name) self.Session = sessionmaker(self.engine) self.max_retries = max_retries self.retry_delay = retry_delay + self.project_name = project_name self.vector_version: Optional[str] = None self._initialize_database() @@ -98,15 +100,19 @@ def _initialize_database(self): def _create_schema(self, sess): try: - sess.execute(text("CREATE SCHEMA IF NOT EXISTS vecs;")) + sess.execute( + text(f'CREATE SCHEMA IF NOT EXISTS "{self.project_name}";') + ) except Exception as e: logger.warning(f"Failed to create schema: {str(e)}") def _create_extension(self, sess): try: - sess.execute(text("CREATE EXTENSION IF NOT EXISTS vector;")) - sess.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm;")) - sess.execute(text("CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;")) + sess.execute(text(f"CREATE EXTENSION IF NOT EXISTS vector;")) + sess.execute(text(f"CREATE EXTENSION IF NOT EXISTS pg_trgm;")) + sess.execute( + text(f"CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;") + ) except Exception as e: logger.warning(f"Failed to create extension: {str(e)}") @@ -217,7 +223,7 @@ def get_collection(self, name: str) -> Collection: from core.providers.database.vecs.collection import Collection query = text( - """ + f""" select relname as table_name, atttypmod as embedding_dim @@ -226,7 +232,7 @@ def get_collection(self, name: str) -> Collection: join pg_attribute pa on pc.oid = pa.attrelid where - pc.relnamespace = 'vecs'::regnamespace + pc.relnamespace = "{self.project_name}"::regnamespace and pc.relkind = 'r' and pa.attname = 'vec' and not pc.relname ^@ '_' diff --git a/py/core/providers/database/vecs/collection.py b/py/core/providers/database/vecs/collection.py index 83866944f..45a990447 100644 --- a/py/core/providers/database/vecs/collection.py +++ b/py/core/providers/database/vecs/collection.py @@ -11,7 +11,7 @@ import warnings from dataclasses import dataclass from enum import Enum -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union +from typing import TYPE_CHECKING, Any, Iterable, Optional, Union from uuid import UUID, uuid4 from flupy import flu @@ -193,7 +193,6 @@ class Collection: """ COLUMN_VARS = [ - "fragment_id", "extraction_id", "document_id", "user_id", @@ -223,7 +222,9 @@ def __init__( self.client = client self.name = name self.dimension = dimension - self.table = _build_table(name, client.meta, dimension) + self.table = _build_table( + client.project_name, name, client.meta, dimension + ) self._index: Optional[str] = None self.adapter = adapter or Adapter(steps=[NoOp(dimension=dimension)]) @@ -289,7 +290,7 @@ def _create_if_not_exists(self): join pg_attribute pa on pc.oid = pa.attrelid where - pc.relnamespace = 'vecs'::regnamespace + pc.relnamespace = '{self.client.project_name}'::regnamespace and pc.relkind = 'r' and pa.attname = 'vec' and not pc.relname ^@ '_' @@ -347,7 +348,7 @@ def _create(self): text( f""" create index ix_meta_{unique_string} - on vecs."{self.table.name}" + on {self.client.project_name}."{self.table.name}" using gin ( metadata jsonb_path_ops ) """ ) @@ -358,7 +359,7 @@ def _create(self): text( f""" CREATE TRIGGER tsvector_update_{unique_string} BEFORE INSERT OR UPDATE - ON vecs."{self.table.name}" FOR EACH ROW EXECUTE FUNCTION + ON {self.client.project_name}."{self.table.name}" FOR EACH ROW EXECUTE FUNCTION tsvector_update_trigger(fts, 'pg_catalog.english', text); """ ) @@ -397,23 +398,21 @@ def upsert( stmt = postgresql.insert(self.table).values( [ { - "fragment_id": record[0], - "extraction_id": record[1], - "document_id": record[2], - "user_id": record[3], - "collection_ids": record[4], - "vec": record[5], - "text": record[6], - "metadata": record[7], - "fts": func.to_tsvector(record[6]), + "extraction_id": record[0], + "document_id": record[1], + "user_id": record[2], + "collection_ids": record[3], + "vec": record[4], + "text": record[5], + "metadata": record[6], + "fts": func.to_tsvector(record[5]), } for record in chunk ] ) stmt = stmt.on_conflict_do_update( - index_elements=[self.table.c.fragment_id], + index_elements=[self.table.c.extraction_id], set_=dict( - extraction_id=stmt.excluded.extraction_id, document_id=stmt.excluded.document_id, user_id=stmt.excluded.user_id, collection_ids=stmt.excluded.collection_ids, @@ -426,29 +425,29 @@ def upsert( sess.execute(stmt) return None - def fetch(self, fragment_ids: Iterable[UUID]) -> List[Record]: + def fetch(self, ids: Iterable[UUID]) -> list[Record]: """ - Fetches vectors from the collection by their fragment identifiers. + Fetches vectors from the collection by their identifiers. Args: - fragment_ids (Iterable[UUID]): An iterable of vector fragment identifiers. + ids (Iterable[UUID]): An iterable of vector identifiers. Returns: - List[Record]: A list of the fetched vectors. + list[Record]: A list of the fetched vectors. Raises: - ArgError: If fragment_ids is not an iterable of UUIDs. + ArgError: If ids is not an iterable of UUIDs. """ - if isinstance(fragment_ids, (str, UUID)): - raise ArgError("fragment_ids must be an iterable of UUIDs") + if isinstance(ids, (str, UUID)): + raise ArgError("ids must be an iterable of UUIDs") chunk_size = 12 records = [] with self.client.Session() as sess: with sess.begin(): - for id_chunk in flu(fragment_ids).chunk(chunk_size): + for id_chunk in flu(ids).chunk(chunk_size): stmt = select(self.table).where( - self.table.c.fragment_id.in_(id_chunk) + self.table.c.extraction_id.in_(id_chunk) ) chunk_records = sess.execute(stmt) records.extend(chunk_records) @@ -456,58 +455,54 @@ def fetch(self, fragment_ids: Iterable[UUID]) -> List[Record]: def delete( self, - fragment_ids: Optional[Iterable[UUID]] = None, - filters: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Dict[str, str]]: + ids: Optional[Iterable[UUID]] = None, + filters: Optional[dict[str, Any]] = None, + ) -> dict[str, dict[str, str]]: """ - Deletes vectors from the collection by matching filters or fragment_ids. + Deletes vectors from the collection by matching filters or ids. Args: - fragment_ids (Optional[Iterable[UUID]], optional): An iterable of vector fragment identifiers. - filters (Optional[Dict], optional): Filters to apply to the search. Defaults to None. + extraction_ids (Optional[Iterable[UUID]], optional): An iterable of vector fragment identifiers. + filters (Optional[dict], optional): Filters to apply to the search. Defaults to None. Returns: - Dict[str, Dict[str, str]]: A dictionary of deleted records, where the key is the fragment_id - and the value is a dictionary containing 'document_id', 'extraction_id', 'fragment_id', and 'text'. + dict[str, dict[str, str]]: A dictionary of deleted records, where the key is the extraction_id + and the value is a dictionary containing 'document_id', 'extraction_id', and 'text'. Raises: - ArgError: If neither fragment_ids nor filters are provided, or if both are provided. + ArgError: If neither ids nor filters are provided, or if both are provided. """ - if fragment_ids is None and filters is None: - raise ArgError("Either fragment_ids or filters must be provided.") + if ids is None and filters is None: + raise ArgError("Either ids or filters must be provided.") - if fragment_ids is not None and filters is not None: - raise ArgError( - "Either fragment_ids or filters must be provided, not both." - ) + if ids is not None and filters is not None: + raise ArgError("Either ids or filters must be provided, not both.") - if isinstance(fragment_ids, (str, UUID)): - raise ArgError("fragment_ids must be an iterable of UUIDs") + if isinstance(ids, (str, UUID)): + raise ArgError("ids must be an iterable of UUIDs") deleted_records = {} with self.client.Session() as sess: with sess.begin(): - if fragment_ids: - for id_chunk in flu(fragment_ids).chunk(12): + if ids: + for id_chunk in flu(ids).chunk(12): delete_stmt = ( delete(self.table) - .where(self.table.c.fragment_id.in_(id_chunk)) + .where(self.table.c.extraction_id.in_(id_chunk)) .returning( - self.table.c.fragment_id, - self.table.c.document_id, self.table.c.extraction_id, + self.table.c.document_id, self.table.c.text, ) ) result = sess.execute(delete_stmt) for row in result: - fragment_id = str(row[0]) - deleted_records[fragment_id] = { - "fragment_id": fragment_id, + extraction_id = str(row[0]) + deleted_records[extraction_id] = { + "extraction_id": extraction_id, "document_id": str(row[1]), - "extraction_id": str(row[2]), - "text": row[3], + "text": row[2], } if filters: @@ -516,20 +511,18 @@ def delete( delete(self.table) .where(meta_filter) .returning( - self.table.c.fragment_id, - self.table.c.document_id, self.table.c.extraction_id, + self.table.c.document_id, self.table.c.text, ) ) result = sess.execute(delete_stmt) for row in result: - fragment_id = str(row[0]) - deleted_records[fragment_id] = { - "fragment_id": fragment_id, + extraction_id = str(row[0]) + deleted_records[extraction_id] = { + "extraction_id": extraction_id, "document_id": str(row[1]), - "extraction_id": str(row[2]), - "text": row[3], + "text": row[2], } return deleted_records @@ -556,7 +549,7 @@ def semantic_search( self, vector: list[float], search_settings: VectorSearchSettings, - ) -> Union[List[Record], List[str]]: + ) -> Union[list[Record], list[str]]: """ Executes a similarity search in the collection. @@ -567,7 +560,7 @@ def semantic_search( search_settings (VectorSearchSettings): The search settings to use. Returns: - Union[List[Record], List[str]]: The result of the similarity search. + Union[list[Record], list[str]]: The result of the similarity search. """ try: @@ -590,7 +583,6 @@ def semantic_search( distance_clause = distance_lambda(self.table.c.vec)(vector) cols = [ - self.table.c.fragment_id, self.table.c.extraction_id, self.table.c.document_id, self.table.c.user_id, @@ -605,8 +597,9 @@ def semantic_search( stmt = select(*cols) - # if filters: - stmt = stmt.filter(self.build_filters(search_settings.filters)) # type: ignore + if search_settings.filters: + stmt = stmt.filter(self.build_filters(search_settings.filters)) + stmt = stmt.order_by(distance_clause) stmt = stmt.offset(search_settings.offset) stmt = stmt.limit(search_settings.search_limit) @@ -632,11 +625,12 @@ def semantic_search( ) if len(cols) == 1: return [str(x) for x in sess.scalars(stmt).fetchall()] - return sess.execute(stmt).fetchall() or [] + result = sess.execute(stmt).fetchall() + return result or [] def full_text_search( self, query_text: str, search_settings: VectorSearchSettings - ) -> List[VectorSearchResult]: + ) -> list[VectorSearchResult]: # Create a tsquery from the input query ts_query = func.websearch_to_tsquery("english", query_text) @@ -648,7 +642,6 @@ def full_text_search( # Build the main query stmt = ( select( - self.table.c.fragment_id, self.table.c.extraction_id, self.table.c.document_id, self.table.c.user_id, @@ -671,7 +664,6 @@ def full_text_search( # Convert the results to VectorSearchResult objects return [ VectorSearchResult( - fragment_id=str(r.fragment_id), extraction_id=str(r.extraction_id), document_id=str(r.document_id), user_id=str(r.user_id), @@ -683,15 +675,14 @@ def full_text_search( for r in results ] - def build_filters(self, filters: Dict): + def build_filters(self, filters: dict): """ PUBLIC Builds filters for SQL query based on provided dictionary. Args: - table: The SQLAlchemy table object. - filters (Dict): The dictionary specifying filter conditions. + filters (dict): The dictionary specifying filter conditions. Raises: FilterError: If filter conditions are not correctly formatted. @@ -709,6 +700,7 @@ def parse_condition(key, value): column = getattr(self.table.c, key) if isinstance(value, dict): op, clause = next(iter(value.items())) + if op == "$eq": return column == clause elif op == "$ne": @@ -734,12 +726,15 @@ def parse_condition(key, value): f"Unsupported operator for column {key}: {op}" ) else: + # Handle direct equality + if isinstance(value, str): + value = UUID(value) return column == value else: # Handle JSON-based filters json_col = self.table.c.metadata if key.startswith("metadata."): - key.split("metadata.")[1] + key = key.split("metadata.")[1] if isinstance(value, dict): if len(value) > 1: raise FilterError("only one operator permitted") @@ -814,7 +809,7 @@ def parse_filter(filter_dict): return parse_filter(filters) @classmethod - def _list_collections(cls, client: "Client") -> List["Collection"]: + def _list_collections(cls, client: "Client") -> list["Collection"]: """ PRIVATE @@ -824,7 +819,7 @@ def _list_collections(cls, client: "Client") -> List["Collection"]: client (Client): The database client. Returns: - List[Collection]: A list of all existing collections. + list[Collection]: A list of all existing collections. """ query = text( @@ -837,7 +832,7 @@ def _list_collections(cls, client: "Client") -> List["Collection"]: join pg_attribute pa on pc.oid = pa.attrelid where - pc.relnamespace = 'vecs'::regnamespace + pc.relnamespace = '{client.project_name}'::regnamespace and pc.relkind = 'r' and pa.attname = 'vec' and not pc.relname ^@ '_' @@ -888,13 +883,13 @@ def index(self) -> Optional[str]: if self._index is None: query = text( - """ + f""" select relname as table_name from pg_class pc where - pc.relnamespace = 'vecs'::regnamespace + pc.relnamespace = '{self.client.project_name}'::regnamespace and relname ilike 'ix_vector%' and pc.relkind = 'i' """ @@ -1028,7 +1023,11 @@ def create_index( with sess.begin(): if self.index is not None: if replace: - sess.execute(text(f'drop index vecs."{self.index}";')) + sess.execute( + text( + f'drop index {self.client.project_name}."{self.index}";' + ) + ) self._index = None else: raise ArgError( @@ -1037,7 +1036,7 @@ def create_index( if method == IndexMethod.ivfflat: if not index_arguments: - n_records: int = sess.execute(func.count(self.table.c.id)).scalar() # type: ignore + n_records: int = sess.execute(func.count(self.table.c.extraction_id)).scalar() # type: ignore n_lists = ( int(max(n_records / 1000, 30)) @@ -1057,7 +1056,7 @@ def create_index( text( f""" create index ix_{ops}_ivfflat_nl{n_lists}_{unique_string} - on vecs."{self.table.name}" + on {self.client.project_name}."{self.table.name}" using ivfflat (vec {ops}) with (lists={n_lists}) """ ) @@ -1076,7 +1075,7 @@ def create_index( text( f""" create index ix_{ops}_hnsw_m{m}_efc{ef_construction}_{unique_string} - on vecs."{self.table.name}" + on {self.client.project_name}."{self.table.name}" using hnsw (vec {ops}) WITH (m={m}, ef_construction={ef_construction}); """ ) @@ -1085,12 +1084,13 @@ def create_index( return None -def _build_table(name: str, meta: MetaData, dimension: int) -> Table: +def _build_table( + project_name: str, name: str, meta: MetaData, dimension: int +) -> Table: table = Table( name, meta, - Column("fragment_id", postgresql.UUID, primary_key=True), - Column("extraction_id", postgresql.UUID, nullable=False), + Column("extraction_id", postgresql.UUID, primary_key=True), Column("document_id", postgresql.UUID, nullable=False), Column("user_id", postgresql.UUID, nullable=False), Column( diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index ae63aa6a0..b54cddab5 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -5,6 +5,7 @@ from typing import Any, Optional from sqlalchemy import text +from sqlalchemy.exc import NoResultFound, SQLAlchemyError from core.base import ( DatabaseConfig, @@ -30,12 +31,15 @@ class PostgresVectorDBProvider(VectorDBProvider): def __init__(self, config: DatabaseConfig, *args, **kwargs): super().__init__(config) self.collection: Optional[Collection] = None + self.project_name = kwargs.get("project_name", None) connection_string = kwargs.get("connection_string", None) if not connection_string: raise ValueError( "Please provide a valid `connection_string` to the `PostgresVectorDBProvider`." ) - self.vx: Client = create_client(connection_string=connection_string) + self.vx: Client = create_client( + connection_string=connection_string, project_name=self.project_name + ) if not self.vx: raise ValueError( "Error occurred while attempting to connect to the pgvector provider." @@ -59,8 +63,8 @@ def __init__(self, config: DatabaseConfig, *args, **kwargs): def _initialize_vector_db(self, dimension: int) -> None: # Create extension for trigram similarity with self.vx.Session() as sess: - sess.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm;")) - sess.execute(text("CREATE EXTENSION IF NOT EXISTS btree_gin;")) + sess.execute(text(f"CREATE EXTENSION IF NOT EXISTS pg_trgm;")) + sess.execute(text(f"CREATE EXTENSION IF NOT EXISTS btree_gin;")) sess.commit() self.collection = self.vx.get_or_create_vector_table( @@ -77,7 +81,6 @@ def upsert(self, entry: VectorEntry) -> None: self.collection.upsert( records=[ ( - entry.fragment_id, entry.extraction_id, entry.document_id, entry.user_id, @@ -94,11 +97,9 @@ def upsert_entries(self, entries: list[VectorEntry]) -> None: raise ValueError( "Please call `initialize_collection` before attempting to run `upsert_entries`." ) - self.collection.upsert( records=[ ( - entry.fragment_id, entry.extraction_id, entry.document_id, entry.user_id, @@ -123,14 +124,13 @@ def semantic_search( ) return [ VectorSearchResult( - fragment_id=result[0], # type: ignore - extraction_id=result[1], # type: ignore - document_id=result[2], # type: ignore - user_id=result[3], # type: ignore - collection_ids=result[4], # type: ignore - text=result[5], # type: ignore - score=1 - float(result[6]), # type: ignore - metadata=result[7], # type: ignore + extraction_id=result[0], # type: ignore + document_id=result[1], # type: ignore + user_id=result[2], # type: ignore + collection_ids=result[3], # type: ignore + text=result[4], # type: ignore + score=1 - float(result[5]), # type: ignore + metadata=result[6], # type: ignore ) for result in results ] @@ -203,7 +203,7 @@ def hybrid_search( # Combine results using RRF combined_results = { - result.fragment_id: { + result.extraction_id: { "semantic_rank": rank, "full_text_rank": full_text_limit, "data": result, @@ -224,7 +224,7 @@ def hybrid_search( rrf_k = search_settings.hybrid_search_settings.rrf_k # Combine results using RRF combined_results = { - result.fragment_id: { + result.extraction_id: { "semantic_rank": rank, "full_text_rank": full_text_limit, "data": result, @@ -233,10 +233,10 @@ def hybrid_search( } for rank, result in enumerate(full_text_results, 1): - if result.fragment_id in combined_results: - combined_results[result.fragment_id]["full_text_rank"] = rank + if result.extraction_id in combined_results: + combined_results[result.extraction_id]["full_text_rank"] = rank else: - combined_results[result.fragment_id] = { + combined_results[result.extraction_id] = { "semantic_rank": semantic_limit, "full_text_rank": rank, "data": result, @@ -272,7 +272,6 @@ def hybrid_search( return [ VectorSearchResult( - fragment_id=result["data"].fragment_id, # type: ignore extraction_id=result["data"].extraction_id, # type: ignore document_id=result["data"].document_id, # type: ignore user_id=result["data"].user_id, # type: ignore @@ -341,7 +340,7 @@ def assign_document_to_collection( table_name = self.collection.table.name query = text( f""" - UPDATE vecs."{table_name}" + UPDATE {self.project_name}."{table_name}" SET collection_ids = array_append(collection_ids, :collection_id) WHERE document_id = :document_id AND NOT (:collection_id = ANY(collection_ids)) RETURNING document_id @@ -381,7 +380,7 @@ def remove_document_from_collection( table_name = self.collection.table.name query = text( f""" - UPDATE vecs."{table_name}" + UPDATE {self.project_name}."{table_name}" SET collection_ids = array_remove(collection_ids, :collection_id) WHERE document_id = :document_id AND :collection_id = ANY(collection_ids) RETURNING document_id @@ -407,7 +406,7 @@ def remove_collection_from_documents(self, collection_id: str) -> None: table_name = self.collection.table.name query = text( f""" - UPDATE vecs."{table_name}" + UPDATE {self.project_name}."{table_name}" SET collection_ids = array_remove(collection_ids, :collection_id) WHERE :collection_id = ANY(collection_ids) """ @@ -424,7 +423,7 @@ def delete_user(self, user_id: str) -> None: table_name = self.collection.table.name query = text( f""" - UPDATE vecs."{table_name}" + UPDATE {self.project_name}."{table_name}" SET user_id = NULL WHERE user_id = :user_id """ @@ -435,42 +434,44 @@ def delete_user(self, user_id: str) -> None: sess.commit() def delete_collection(self, collection_id: str) -> None: - """ - Remove the specified collection ID from all documents in the vector database. - - Args: - collection_id (str): The ID of the collection to remove from all documents. - - Raises: - ValueError: If the collection is not initialized. - """ if self.collection is None: raise ValueError("Collection is not initialized.") table_name = self.collection.table.name + query = text( f""" - UPDATE vecs."{table_name}" - SET collection_ids = array_remove(collection_ids, :collection_id) - WHERE :collection_id = ANY(collection_ids) + WITH updated AS ( + UPDATE {self.project_name}."{table_name}" + SET collection_ids = array_remove(collection_ids, :collection_id) + WHERE :collection_id = ANY(collection_ids) + RETURNING 1 + ) + SELECT COUNT(*) AS affected_rows FROM updated """ ) with self.vx.Session() as sess: - result = sess.execute( - query, {"collection_id": collection_id} - ).fetchone() - sess.commit() - - if not result: - raise ValueError( - f"Collection {collection_id} not found in any documents." - ) - - affected_rows = result.rowcount - logger.info( - f"Removed collection {collection_id} from {affected_rows} documents." - ) + try: + result = sess.execute(query, {"collection_id": collection_id}) + row = result.one() + affected_rows = row.affected_rows + sess.commit() + + if affected_rows == 0: + raise ValueError( + f"Collection {collection_id} not found in any documents." + ) + except NoResultFound: + raise ValueError( + f"Unexpected error: No result returned for collection {collection_id}" + ) + except SQLAlchemyError as e: + sess.rollback() + logger.error( + f"Error deleting collection {collection_id}: {str(e)}" + ) + raise def get_document_chunks( self, document_id: str, offset: int = 0, limit: int = -1 @@ -482,8 +483,8 @@ def get_document_chunks( table_name = self.collection.table.name query = text( f""" - SELECT fragment_id, extraction_id, document_id, user_id, collection_ids, text, metadata, COUNT(*) OVER() AS total - FROM vecs."{table_name}" + SELECT extraction_id, document_id, user_id, collection_ids, text, metadata, COUNT(*) OVER() AS total + FROM {self.project_name}."{table_name}" WHERE document_id = :document_id ORDER BY CAST(metadata->>'chunk_order' AS INTEGER) {limit_clause} OFFSET :offset @@ -501,16 +502,15 @@ def get_document_chunks( total = 0 if results: - total = results[0][7] + total = results[0][6] chunks = [ { - "fragment_id": result[0], - "extraction_id": result[1], - "document_id": result[2], - "user_id": result[3], - "collection_ids": result[4], - "text": result[5], - "metadata": result[6], + "extraction_id": result[0], + "document_id": result[1], + "user_id": result[2], + "collection_ids": result[3], + "text": result[4], + "metadata": result[5], } for result in results ] diff --git a/py/core/providers/file/postgres.py b/py/core/providers/file/postgres.py index dcc110428..e8347dcc1 100644 --- a/py/core/providers/file/postgres.py +++ b/py/core/providers/file/postgres.py @@ -45,7 +45,7 @@ async def initialize(self): await self.create_table() def _get_table_name(self, base_name: str) -> str: - return f"{base_name}" + return self.db_provider._get_table_name(base_name) async def create_table(self): query = f""" diff --git a/py/core/providers/ingestion/__init__.py b/py/core/providers/ingestion/__init__.py new file mode 100644 index 000000000..4a25d30da --- /dev/null +++ b/py/core/providers/ingestion/__init__.py @@ -0,0 +1,13 @@ +# type: ignore +from .r2r.base import R2RIngestionConfig, R2RIngestionProvider +from .unstructured.base import ( + UnstructuredIngestionConfig, + UnstructuredIngestionProvider, +) + +__all__ = [ + "R2RIngestionConfig", + "R2RIngestionProvider", + "UnstructuredIngestionProvider", + "UnstructuredIngestionConfig", +] diff --git a/py/core/providers/ingestion/r2r/base.py b/py/core/providers/ingestion/r2r/base.py new file mode 100644 index 000000000..0f2034ba9 --- /dev/null +++ b/py/core/providers/ingestion/r2r/base.py @@ -0,0 +1,204 @@ +# type: ignore +import logging +import time +from typing import Any, AsyncGenerator, Optional, Union + +from core import parsers +from core.base import ( + AsyncParser, + ChunkingStrategy, + Document, + DocumentExtraction, + DocumentType, + IngestionConfig, + IngestionProvider, + R2RDocumentProcessingError, + RecursiveCharacterTextSplitter, + TextSplitter, + generate_id_from_label, +) + +logger = logging.getLogger(__name__) + + +class R2RIngestionConfig(IngestionConfig): + chunk_size: int = 1024 + chunk_overlap: int = 512 + chunking_strategy: ChunkingStrategy = ChunkingStrategy.RECURSIVE + extra_fields: dict[str, Any] = {} + separator: Optional[str] = None + + +class R2RIngestionProvider(IngestionProvider): + AVAILABLE_PARSERS = { + DocumentType.CSV: [parsers.CSVParser, parsers.CSVParserAdvanced], + DocumentType.DOCX: [parsers.DOCXParser], + DocumentType.HTML: [parsers.HTMLParser], + DocumentType.HTM: [parsers.HTMLParser], + DocumentType.JSON: [parsers.JSONParser], + DocumentType.MD: [parsers.MDParser], + DocumentType.PDF: [parsers.PDFParser, parsers.PDFParserUnstructured], + DocumentType.PPTX: [parsers.PPTParser], + DocumentType.TXT: [parsers.TextParser], + DocumentType.XLSX: [parsers.XLSXParser, parsers.XLSXParserAdvanced], + DocumentType.GIF: [parsers.ImageParser], + DocumentType.JPEG: [parsers.ImageParser], + DocumentType.JPG: [parsers.ImageParser], + DocumentType.PNG: [parsers.ImageParser], + DocumentType.SVG: [parsers.ImageParser], + DocumentType.MP3: [parsers.AudioParser], + } + + IMAGE_TYPES = { + DocumentType.GIF, + DocumentType.JPG, + DocumentType.JPEG, + DocumentType.PNG, + DocumentType.SVG, + } + + def __init__(self, config: R2RIngestionConfig): + super().__init__(config) + self.config: R2RIngestionConfig = config # for type hinting + self.parsers: dict[DocumentType, AsyncParser] = {} + self.text_splitter = self._build_text_splitter() + self._initialize_parsers() + + logger.info( + f"R2RIngestionProvider initialized with config: {self.config}" + ) + + def _initialize_parsers(self): + for doc_type, parser_infos in self.AVAILABLE_PARSERS.items(): + for parser_info in parser_infos: + if ( + doc_type not in self.config.excluded_parsers + and doc_type not in self.parsers + ): + # will choose the first parser in the list + self.parsers[doc_type] = parser_info() + + def _build_text_splitter( + self, ingestion_config_override: Optional[dict] = None + ) -> TextSplitter: + logger.info( + f"Initializing text splitter with method: {self.config.chunking_strategy}" + ) # Debug log + + if not ingestion_config_override: + ingestion_config_override = {} + + chunking_strategy = ( + ingestion_config_override.get("chunking_strategy", None) + or self.config.chunking_strategy + ) + + chunk_size = ( + ingestion_config_override.get("chunk_size", None) + or self.config.chunk_size + ) + chunk_overlap = ( + ingestion_config_override.get("chunk_overlap", None) + or self.config.chunk_overlap + ) + + if chunking_strategy == ChunkingStrategy.RECURSIVE: + return RecursiveCharacterTextSplitter( + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + ) + elif chunking_strategy == ChunkingStrategy.CHARACTER: + from core.base.utils.splitter.text import CharacterTextSplitter + + separator = ( + ingestion_config_override.get("separator", None) + or self.config.separator + or CharacterTextSplitter.DEFAULT_SEPARATOR + ) + + return CharacterTextSplitter( + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + separator=separator, + keep_separator=False, + strip_whitespace=True, + ) + elif chunking_strategy == ChunkingStrategy.BASIC: + raise NotImplementedError( + "Basic chunking method not implemented. Please use Recursive." + ) + elif chunking_strategy == ChunkingStrategy.BY_TITLE: + raise NotImplementedError("By title method not implemented") + else: + raise ValueError(f"Unsupported method type: {chunking_strategy}") + + def validate_config(self) -> bool: + return self.config.chunk_size > 0 and self.config.chunk_overlap >= 0 + + def chunk( + self, + parsed_document: Union[str, DocumentExtraction], + ingestion_config_override: dict, + ) -> AsyncGenerator[Any, None]: + + text_spliiter = self.text_splitter + if ingestion_config_override: + text_spliiter = self._build_text_splitter( + ingestion_config_override + ) + if isinstance(parsed_document, DocumentExtraction): + parsed_document = parsed_document.data + + if isinstance(parsed_document, str): + chunks = text_spliiter.create_documents([parsed_document]) + else: + # Assuming parsed_document is already a list of text chunks + chunks = parsed_document + + for chunk in chunks: + yield ( + chunk.page_content if hasattr(chunk, "page_content") else chunk + ) + + async def parse( # type: ignore + self, + file_content: bytes, + document: Document, + ingestion_config_override: dict, + ) -> AsyncGenerator[ + Union[DocumentExtraction, R2RDocumentProcessingError], None + ]: + if document.type not in self.parsers: + yield R2RDocumentProcessingError( + document_id=document.id, + error_message=f"Parser for {document.type} not found in `R2RIngestionProvider`.", + ) + else: + t0 = time.time() + + contents = "" + async for text in self.parsers[document.type].ingest(file_content): + contents += text + "\n" + + iteration = 0 + chunks = self.chunk(contents, ingestion_config_override) + for chunk in chunks: + extraction = DocumentExtraction( + id=generate_id_from_label(f"{document.id}-{iteration}"), + document_id=document.id, + user_id=document.user_id, + collection_ids=document.collection_ids, + data=chunk, + metadata={**document.metadata, "chunk_order": iteration}, + ) + iteration += 1 + yield extraction + + logger.debug( + f"Parsed document with id={document.id}, title={document.metadata.get('title', None)}, " + f"user_id={document.metadata.get('user_id', None)}, metadata={document.metadata} " + f"into {iteration} extractions in t={time.time() - t0:.2f} seconds." + ) + + def get_parser_for_document_type(self, doc_type: DocumentType) -> Any: + return self.parsers.get(doc_type) diff --git a/py/core/providers/parsing/unstructured_parsing.py b/py/core/providers/ingestion/unstructured/base.py similarity index 71% rename from py/core/providers/parsing/unstructured_parsing.py rename to py/core/providers/ingestion/unstructured/base.py index 3b5393e30..24f437fb1 100644 --- a/py/core/providers/parsing/unstructured_parsing.py +++ b/py/core/providers/ingestion/unstructured/base.py @@ -5,7 +5,7 @@ import time from copy import copy from io import BytesIO -from typing import Any, AsyncGenerator +from typing import Any, AsyncGenerator, Optional import httpx from unstructured_client import UnstructuredClient @@ -14,14 +14,14 @@ from core import parsers from core.base import ( AsyncParser, + ChunkingStrategy, Document, DocumentExtraction, DocumentType, - ParsingConfig, - ParsingProvider, generate_id_from_label, ) from core.base.abstractions import R2RSerializable +from core.base.providers.ingestion import IngestionConfig, IngestionProvider logger = logging.getLogger(__name__) @@ -31,27 +31,59 @@ class FallbackElement(R2RSerializable): metadata: dict[str, Any] -class UnstructuredParsingProvider(ParsingProvider): - +class UnstructuredIngestionConfig(IngestionConfig): + combine_under_n_chars: int = 128 + max_characters: int = 500 + new_after_n_chars: int = 1500 + + coordinates: Optional[bool] = None + encoding: Optional[str] = None # utf-8 + extract_image_block_types: Optional[list[str]] = None + gz_uncompressed_content_type: Optional[str] = None + hi_res_model_name: Optional[str] = None + include_orig_elements: Optional[bool] = None + include_page_breaks: Optional[bool] = None + + languages: Optional[list[str]] = None + multipage_sections: Optional[bool] = None + ocr_languages: Optional[list[str]] = None + # output_format: Optional[str] = "application/json" + overlap: Optional[int] = None + overlap_all: Optional[bool] = None + pdf_infer_table_structure: Optional[bool] = None + + similarity_threshold: Optional[float] = None + skip_infer_table_types: Optional[list[str]] = None + split_pdf_concurrency_level: Optional[int] = None + split_pdf_page: Optional[bool] = None + starting_page_number: Optional[int] = None + strategy: Optional[str] = None + chunking_strategy: Optional[ChunkingStrategy] = None + unique_element_ids: Optional[bool] = None + xml_keep_tags: Optional[bool] = None + + def to_ingestion_request(self): + import json + + x = json.loads(self.json()) + x.pop("extra_fields", None) + x.pop("provider", None) + x.pop("excluded_parsers", None) + + x = {k: v for k, v in x.items() if v is not None} + return x + + +class UnstructuredIngestionProvider(IngestionProvider): R2R_FALLBACK_PARSERS = { - # Commented filetypes go to unstructured, uncommented fallback to R2R parsers (LLM based) - # DocumentType.CSV: [parsers.CSVParser, parsers.CSVParserAdvanced], - # DocumentType.DOCX: [parsers.DOCXParser], - # DocumentType.HTML: [parsers.HTMLParser], - # DocumentType.HTM: [parsers.HTMLParser], - # DocumentType.JSON: [parsers.JSONParser], - # DocumentType.MD: [parsers.MDParser], - # DocumentType.PDF: [parsers.PDFParser, parsers.PDFParserUnstructured], - # DocumentType.PPTX: [parsers.PPTParser], - # DocumentType.TXT: [parsers.TextParser], - # DocumentType.XLSX: [parsers.XLSXParser, parsers.XLSXParserAdvanced], DocumentType.GIF: [parsers.ImageParser], DocumentType.JPEG: [parsers.ImageParser], DocumentType.JPG: [parsers.ImageParser], DocumentType.PNG: [parsers.ImageParser], DocumentType.SVG: [parsers.ImageParser], DocumentType.MP3: [parsers.AudioParser], - # DocumentType.MP4: [parsers.MovieParser], + DocumentType.JSON: [parsers.JSONParser], + DocumentType.HTML: [parsers.HTMLParser], } IMAGE_TYPES = { @@ -62,16 +94,10 @@ class UnstructuredParsingProvider(ParsingProvider): DocumentType.SVG, } - def __init__(self, use_api: bool, config: ParsingConfig): + def __init__(self, config: UnstructuredIngestionConfig): super().__init__(config) - self.config: ParsingConfig = config - if config.excluded_parsers: - logger.warning( - "Excluded parsers are not supported by the unstructured parsing provider." - ) - - self.use_api = use_api - if self.use_api: + self.config: UnstructuredIngestionConfig = config + if config.provider == "unstructured_api": try: self.unstructured_api_auth = os.environ["UNSTRUCTURED_API_KEY"] except KeyError as e: @@ -92,7 +118,6 @@ def __init__(self, use_api: bool, config: ParsingConfig): self.operations = operations else: - try: self.local_unstructured_url = os.environ[ "UNSTRUCTURED_LOCAL_URL" @@ -118,15 +143,9 @@ def _initialize_parsers(self): # will choose the first parser in the list self.parsers[doc_type] = parser_info() - # Apply overrides if specified - for parser_override in self.config.override_parsers: - if parser_name := getattr(parsers, parser_override.parser): - self.parsers[parser_override.document_type] = parser_name() - async def parse_fallback( self, file_content: bytes, document: Document, chunk_size: int ) -> AsyncGenerator[FallbackElement, None]: - texts = self.parsers[document.type].ingest( # type: ignore file_content, chunk_size=chunk_size ) @@ -139,10 +158,21 @@ async def parse_fallback( ) chunk_id += 1 - async def parse( # type: ignore - self, file_content: bytes, document: Document + async def parse( + self, + file_content: bytes, + document: Document, + ingestion_config_override: dict, ) -> AsyncGenerator[DocumentExtraction, None]: + ingestion_config = { + **self.config.to_ingestion_request(), + **(ingestion_config_override or {}), + } + # cleanup extra fields + ingestion_config.pop("provider", None) + ingestion_config.pop("excluded_parsers", None) + t0 = time.time() if document.type in self.R2R_FALLBACK_PARSERS.keys(): logger.info( @@ -152,9 +182,7 @@ async def parse( # type: ignore async for element in self.parse_fallback( file_content, document, - chunk_size=self.config.chunking_config.extra_fields.get( - "combine_under_n_chars", 128 - ), + chunk_size=self.config.combine_under_n_chars, ): elements.append(element) else: @@ -165,7 +193,7 @@ async def parse( # type: ignore file_content = BytesIO(file_content) # type: ignore # TODO - Include check on excluded parsers here. - if self.use_api: + if self.config.provider == "unstructured_api": logger.info(f"Using API to parse document {document.id}") files = self.shared.Files( content=file_content.read(), # type: ignore @@ -175,7 +203,7 @@ async def parse( # type: ignore req = self.operations.PartitionRequest( self.shared.PartitionParameters( files=files, - **self.config.chunking_config.extra_fields, + **ingestion_config, ) ) elements = self.client.general.partition(req) # type: ignore @@ -198,11 +226,17 @@ async def parse( # type: ignore f"{self.local_unstructured_url}/partition", json={ "file_content": encoded_content, # Use encoded string - "chunking_config": self.config.chunking_config.extra_fields, + "ingestion_config": ingestion_config, + "filename": document.metadata.get("title", None), }, - timeout=300, # Adjust timeout as needed + timeout=3600, # Adjust timeout as needed ) + if response.status_code != 200: + logger.error(f"Error partitioning file: {response.text}") + raise ValueError( + f"Error partitioning file: {response.text}" + ) elements = response.json().get("elements", []) iteration = 0 # if there are no chunks @@ -232,7 +266,7 @@ async def parse( # type: ignore # indicate that the document was chunked using unstructured # nullifies the need for chunking in the pipeline metadata["partitioned_by_unstructured"] = True - + metadata["chunk_order"] = iteration # creating the text extraction yield DocumentExtraction( id=generate_id_from_label(f"{document.id}-{iteration}"), diff --git a/py/core/providers/kg/__init__.py b/py/core/providers/kg/__init__.py index 1c9ad1894..0b6f692fe 100644 --- a/py/core/providers/kg/__init__.py +++ b/py/core/providers/kg/__init__.py @@ -1,3 +1,3 @@ -from .neo4j import Neo4jKGProvider +from .postgres import PostgresKGProvider -__all__ = ["Neo4jKGProvider"] +__all__ = ["PostgresKGProvider"] diff --git a/py/core/providers/kg/neo4j/__init__.py b/py/core/providers/kg/neo4j/__init__.py deleted file mode 100644 index bb1d985f3..000000000 --- a/py/core/providers/kg/neo4j/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .provider import Neo4jKGProvider # type: ignore - -__all__ = ["Neo4jKGProvider"] diff --git a/py/core/providers/kg/neo4j/graph_queries.py b/py/core/providers/kg/neo4j/graph_queries.py deleted file mode 100644 index 84daa5aaf..000000000 --- a/py/core/providers/kg/neo4j/graph_queries.py +++ /dev/null @@ -1,149 +0,0 @@ -# type: ignore -UNIQUE_CONSTRAINTS = [] - -GET_CHUNKS_QUERY = """ -MATCH (c:__Chunk__) -RETURN c -""" - -PUT_CHUNKS_QUERY = """ -MERGE (c:__Chunk__ {id:value.id}) -SET c += value {.type, .data, .metadata, .document_id, .extraction_id} -MERGE (d:__Document__ {id:value.document_id}) -MERGE (n)-[:PART_OF_DOCUMENT]->(d) -""" - -# searching by entity_name -GET_ENTITIES_QUERY = """ -MATCH (e:__Entity__) -WHERE size($entity_ids) = 0 OR e.name IN $entity_ids -RETURN e -""" - -PUT_ENTITIES_QUERY = """ -WITH value, apoc.text.capitalize(value.category) AS upperCamelCategory -MERGE (e:__Entity__ {name: value.name}) -ON CREATE SET e:__Entity__ -ON MATCH SET e:__Entity__ -WITH e, upperCamelCategory, value -CALL apoc.create.addLabels(e, [upperCamelCategory]) YIELD node -SET node.description = CASE - WHEN node.description IS NULL THEN value.description - ELSE node.description + '\n\n' + value.description -END, -node.rank = CASE - WHEN node.rank IS NULL THEN value.rank - ELSE CASE WHEN value.rank > node.rank THEN value.rank ELSE node.rank END -END, -node.attributes = CASE - WHEN node.attributes IS NULL THEN value.attributes - ELSE node.attributes + '\n\n' + value.attributes -END -WITH node as e, value -UNWIND value.text_unit_ids AS text_unit -MATCH (c:__Chunk__ {id:text_unit}) -MERGE (e)-[:APPEARS_IN_CHUNK]->(c) -WITH e, value -UNWIND value.document_ids AS document_id -MATCH (d:__Document__ {id:document_id}) -MERGE (e)-[:APPEARS_IN_DOCUMENT]->(d) -WITH e, value -UNWIND value.community_ids AS community_id -MATCH (comm:__Community__ {community:community_id}) -MERGE (e)-[:BELONGS_TO_COMMUNITY]->(comm) -""" - -# use this after PUT_ENTITIES_QUERY when you have embeddings. -PUT_ENTITIES_EMBEDDINGS_QUERY = """ -MATCH (e:__Entity__ {name: value.name}) -SET e += value {.description} -WITH e, value -CALL db.create.setNodeVectorProperty(e, "description_embedding", value.description_embedding) -""" - -## get triples by subject and object -GET_TRIPLES_QUERY = """ - MATCH (e1)-[rel]->(e2) - RETURN e1, rel, e2 -""" - -GET_TRIPLES_BY_SUBJECT_AND_OBJECT_QUERY = """ -UNWIND $triples AS triple - MATCH (e1:__Entity__)-[rel]->(e2:__Entity__) - WHERE e1.name = triple.subject - AND e2.name = triple.object - AND type(rel) = triple.predicate - RETURN e1, rel, e2 -""" - -PUT_TRIPLES_QUERY = """ -WITH value, apoc.text.capitalize(value.predicate) AS upperCamelPredicate -MATCH (source:__Entity__ {name: value.subject}) -MATCH (target:__Entity__ {name: value.object}) -WITH source, target, value, upperCamelPredicate -CALL apoc.merge.relationship(source, upperCamelPredicate, {}, {}, target) YIELD rel -SET rel.weight = CASE - WHEN rel.weight IS NULL THEN value.weight - ELSE CASE WHEN value.weight > rel.weight THEN value.weight ELSE rel.weight END -END, -rel.description = CASE - WHEN rel.description IS NULL THEN value.description - ELSE rel.description + '\n\n' + value.description -END, -rel.attributes = CASE - WHEN rel.attributes IS NULL THEN value.attributes - ELSE rel.attributes + '\n\n' + value.attributes -END, -rel.text_unit_ids = CASE - WHEN rel.text_unit_ids IS NULL THEN value.text_unit_ids - ELSE rel.text_unit_ids + value.text_unit_ids -END, -rel.document_ids = CASE - WHEN rel.document_ids IS NULL THEN value.document_ids - ELSE rel.document_ids + value.document_ids -END -WITH rel, value -RETURN count(*) as createdRels -""" - -GET_COMMUNITIES_QUERY = """ -MATCH (c:__Community__) -WHERE $level IS NULL OR c.level = $level -RETURN c -""" - -PUT_COMMUNITIES_QUERY = """ -MERGE (c:__Community__ {community:value.id}) -SET c += value {.level, .rank, .summary} -WITH c, value -CALL db.create.setNodeVectorProperty(c, "summary_embedding", value.summary_embedding) -RETURN count(*) as createdCommunities -""" - -GET_COMMUNITIES_REPORT_QUERY = """ -MATCH (c:__Community__) -RETURN c -""" - -PUT_COMMUNITIES_REPORT_QUERY = """ -MERGE (c:__Community__ {community:value.community}) -SET c += value {.level, .title, .rank, .rank_explanation, .full_content, .summary} -WITH c, value -UNWIND range(0, size(value.findings)-1) AS finding_idx -WITH c, value, finding_idx, value.findings[finding_idx] as finding -MERGE (c)-[:HAS_FINDING]->(f:Finding {id:finding_idx}) -SET f += finding -""" - -GET_COVARIATES_QUERY = """ -MATCH (c:__Covariate__ {id:value.id}) -RETURN c -""" - -PUT_COVARIATES_QUERY = """ -MERGE (c:__Covariate__ {id:value.id}) -SET c += apoc.map.clean(value, ["text_unit_id", "document_ids", "n_tokens"], [NULL, ""]) -WITH c, value -MATCH (ch:__Chunk__ {id: value.text_unit_id}) -MERGE (ch)-[:HAS_COVARIATE]->(c) -""" diff --git a/py/core/providers/kg/neo4j/provider.py b/py/core/providers/kg/neo4j/provider.py deleted file mode 100644 index 90bf8518f..000000000 --- a/py/core/providers/kg/neo4j/provider.py +++ /dev/null @@ -1,592 +0,0 @@ -# type: ignore -import json -import logging -import os -from datetime import date, datetime -from decimal import Decimal -from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple -from uuid import UUID - -from core.base import KGConfig, KGProvider, R2RException -from core.base.abstractions import ( - Community, - DocumentFragment, - Entity, - KGExtraction, - RelationshipType, - Triple, -) - -from .graph_queries import ( - GET_CHUNKS_QUERY, - GET_COMMUNITIES_QUERY, - GET_ENTITIES_QUERY, - GET_TRIPLES_BY_SUBJECT_AND_OBJECT_QUERY, - GET_TRIPLES_QUERY, - PUT_CHUNKS_QUERY, - PUT_COMMUNITIES_QUERY, - PUT_ENTITIES_EMBEDDINGS_QUERY, - PUT_ENTITIES_QUERY, - PUT_TRIPLES_QUERY, - UNIQUE_CONSTRAINTS, -) - -logger = logging.getLogger(__name__) - - -class Neo4jKGProvider(KGProvider): - - def __init__(self, config: KGConfig, *args: Any, **kwargs: Any) -> None: - - try: - import neo4j - except ImportError: - raise ImportError("Please install neo4j: pip install neo4j") - - username = config.user or os.getenv("NEO4J_USER") - password = config.password or os.getenv("NEO4J_PASSWORD") - url = config.url or os.getenv("NEO4J_URL") - database = config.database or os.getenv("NEO4J_DATABASE", "neo4j") - - if not username or not password or not url: - raise ValueError( - "Neo4j configuration values are missing. Please set NEO4J_USER, NEO4J_PASSWORD, and NEO4J_URL environment variables." - ) - - self._driver = neo4j.GraphDatabase.driver( - url, auth=(username, password), **kwargs - ) - self._async_driver = neo4j.AsyncGraphDatabase.driver( - url, - auth=(username, password), - **kwargs, - ) - self._database = database - self.structured_schema = {} - self.config = config - - self.create_constraints() - super().__init__(config, *args, **kwargs) - - @property - def client(self): - return self._driver - - def create_constraints(self): - for statement in UNIQUE_CONSTRAINTS: - self._driver.execute_query(statement) - - def structured_query(self, query: str, param_map: Dict[str, Any] = {}): - return self._driver.execute_query(query, parameters_=param_map) - - def convert_to_neo4j_compatible(self, value): - if isinstance(value, (str, int, float, bool)): - return value - elif isinstance(value, (datetime, date)): - return value.isoformat() - elif isinstance(value, UUID): - return str(value) - elif isinstance(value, Decimal): - return float(value) - elif isinstance(value, list): - return value - elif isinstance(value, dict): - return json.dumps(value) - else: - return str(value) - - def convert_model_list_to_neo4j_compatible(self, model_list): - return [ - { - k: self.convert_to_neo4j_compatible(v) - for k, v in item.dict().items() - } - for item in model_list - ] - - def get_entity_map( - self, entity_names: list[str] | None = None - ) -> dict[str, list[Any]]: - entities = self.get(entity_names) - triples = self.get_triples(entity_names) - entity_map = {} - for entity in entities: - if entity.name not in entity_map: - entity_map[entity.name] = {"entities": [], "triples": []} - entity_map[entity.name]["entities"].append(entity) - - for triple in triples: - if triple.subject in entity_map: - entity_map[triple.subject]["triples"].append(triple) - if triple.object in entity_map: - entity_map[triple.object]["triples"].append(triple) - return entity_map - - def batched_import(self, statement, df, batch_size=1000): - """ - Import a dataframe into Neo4j using a batched approach. - Parameters: statement is the Cypher query to execute, df is the dataframe to import, and batch_size is the number of rows to import in each batch. - """ - total = len(df) - results = [] - for start in range(0, total, batch_size): - batch = df[start : min(start + batch_size, total)] - batch = self.convert_model_list_to_neo4j_compatible(batch) - result = self._driver.execute_query( - "UNWIND $rows AS value " + statement, - rows=batch, - database_=self._database, - ) - results.append(result) - return results - - def get_chunks( - self, chunk_ids: List[str] = None - ) -> List[DocumentFragment]: - """ - Get chunks from the graph. - """ - return self.structured_query(GET_CHUNKS_QUERY, chunk_ids) - - def upsert_chunks(self, chunks: List[DocumentFragment]): - """ - Upsert chunks into the graph. - """ - return self.batched_import(PUT_CHUNKS_QUERY, chunks) - - def upsert_entities( - self, entities: List[Entity], with_embeddings: bool = False - ): - """ - Upsert entities into the graph. - """ - if with_embeddings: - return self.batched_import(PUT_ENTITIES_EMBEDDINGS_QUERY, entities) - else: - return self.batched_import(PUT_ENTITIES_QUERY, entities) - - def upsert_triples(self, triples: List[Triple]): - """ - Upsert relations into the graph. - """ - return self.batched_import(PUT_TRIPLES_QUERY, triples) - - def upsert_communities(self, communities: List[Community]): - """ - Upsert communities into the graph. - """ - return self.batched_import(PUT_COMMUNITIES_QUERY, communities) - - def get_entities(self, entity_ids: List[str] = []) -> List[Entity]: - """ - Get entities from the graph. - """ - neo4j_records = self.structured_query( - GET_ENTITIES_QUERY, {"entity_ids": entity_ids} - ) - entities = [ - Entity( - category=", ".join(list(record["e"]._labels)[1:]), - **record["e"]._properties, - ) - for record in neo4j_records.records - ] - return entities - - def upsert_nodes_and_relationships( - self, kg_extractions: list[KGExtraction] - ) -> Tuple[int, int]: - - all_entities = [] - all_relationships = [] - for extraction in kg_extractions: - all_entities.extend(list(extraction.entities.values())) - all_relationships.extend(extraction.triples) - - nodes_upserted = self.upsert_entities(all_entities) - relationships_upserted = self.upsert_triples(all_relationships) - - return (len(nodes_upserted), len(relationships_upserted)) - - def get(self, entity_name: str = None) -> Entity: - """ - Get entities from the graph. - """ - if entity_name is None: - return self.get_entities() - else: - return self.get_entities(entity_ids=[entity_name]) - - def get_triples(self, triple_ids: list[str] | None = None) -> list[Triple]: - """ - Get triples from the graph. - """ - - if triple_ids is None: - neo4j_records = self.structured_query(GET_TRIPLES_QUERY) - else: - triple_ids = [triple_id.split("->") for triple_id in triple_ids] - triple_ids = [ - { - "subject": triple_id[0], - "predicate": triple_id[1], - "object": triple_id[2], - } - for triple_id in triple_ids - ] - neo4j_records = self.structured_query( - GET_TRIPLES_BY_SUBJECT_AND_OBJECT_QUERY, - {"triples": triple_ids}, - ) - - triples = [ - Triple( - subject=record["e1"]._properties["name"], - predicate=record["rel"].type, - object=record["e2"]._properties["name"], - **record["rel"]._properties, - ) - for record in neo4j_records.records - ] - return triples - - def get_community_entities_and_triples( - self, level: int, community_id: int, include_embeddings: bool = False - ) -> Tuple[List[Entity], List[Triple]]: - """ - Get the entities and triples that belong to a community. - - Input: - - level: The level of the hierarchy. - - community_id: The ID of the community to get the entities and triples for. - - include_embeddings: Whether to include the embeddings in the output. - - Output: - - A tuple of entities and triples that belong to the community. - - """ - - # get the entities and triples from the graph - query = """MATCH (a:__Entity__) - [r] -> (b:__Entity__) - WHERE a.communityIds[$level] = $community_id - OR b.communityIds[$level] = $community_id - RETURN ID(a) AS source_id, a.name AS source, id(b) AS target_id, b.name AS target, a.description AS source_description, - b.description AS target_description, labels(a) AS source_labels, labels(b) AS target_labels, - r.description AS relationship_description, r.name AS relationship_name, r.weight AS relationship_weight, ID(r) AS relationship_id - """ - - neo4j_records = self.structured_query( - query, - { - "community_id": int(community_id), - "level": int(level), - }, - ) - - entities = [ - Entity( - id=record["source_id"], - name=record["source"], - description=record["source_description"], - category=", ".join(record["source_labels"]), - ) - for record in neo4j_records.records - ] - - triples = [ - Triple( - id=record["relationship_id"], - subject=record["source"], - predicate=record["relationship_name"], - object=record["target"], - description=record["relationship_description"], - weight=record["relationship_weight"], - ) - for record in neo4j_records.records - ] - - logger.info( - f"{len(entities)} entities and {len(triples)} triples were retrieved for community {community_id} at level {level}" - ) - - return entities, triples - - def update_extraction_prompt( - self, - prompt_provider: Any, - entity_types: list[Any], - relations: list[RelationshipType], - ) -> None: - pass - - def update_kg_search_prompt( - self, - prompt_provider: Any, - entity_types: list[Any], - relations: list[RelationshipType], - ) -> None: - pass - - def get_communities(self, level: str = None) -> List[Community]: - """ - Get communities from the graph. - """ - neo4j_records = self.structured_query( - GET_COMMUNITIES_QUERY, {"level": level} - ) - - communities = [ - Community(**record["c"]._properties, id=record["c"]["community"]) - for record in neo4j_records.records - ] - return communities - - def delete_all_nodes(self): - self._driver.execute_query("MATCH (a)-[r]->(b) DELETE a, r, b") - self._driver.execute_query("MATCH (a) DELETE a") - - def delete( - self, - entity_names: Optional[List[str]] = None, - relation_names: Optional[List[str]] = None, - properties: Optional[dict] = None, - ids: Optional[List[str]] = None, - ) -> None: - pass - - def get_rel_map( - self, - graph_nodes: Any, - depth: int = 2, - limit: int = 30, - ignore_rels: Optional[List[str]] = None, - ) -> List[Triple]: - pass - - def create_vector_index( - self, node_type: str, node_property: str, dimension: int - ) -> None: - - query = f""" - CREATE VECTOR INDEX `{node_type}_{node_property}` IF NOT EXISTS - - FOR (n:{node_type}) ON n.{node_property} - OPTIONS {{indexConfig: {{`vector.similarity_function`: 'cosine', `vector.dimensions`:{dimension}}}}}""" - - self.structured_query(query) - - def get_schema(self, refresh: bool = False) -> str: - return super().get_schema(refresh) - - def retrieve_cache(self, cache_type: str, cache_id: str) -> bool: - return False - - async def vector_query( - self, query, **kwargs: Any - ) -> AsyncGenerator[dict[str, Any], None]: - - query_embedding = kwargs.get("query_embedding", None) - search_type = kwargs.get("search_type", "__Entity__") - embedding_type = kwargs.get("embedding_type", "description_embedding") - property_names = kwargs.get("property_names", ["name", "description"]) - limit = kwargs.get("limit", 10) - - property_names_arr = [ - f"e.{property_name} as {property_name}" - for property_name in property_names - ] - property_names_str = ", ".join(property_names_arr) - - if search_type == "__Relationship__": - query = f""" - MATCH () - [e] -> () - WHERE e.{embedding_type} IS NOT NULL AND size(e.{embedding_type}) = $dimension - WITH e, vector.similarity.cosine(e.{embedding_type}, $embedding) AS score - ORDER BY score DESC LIMIT toInteger($limit) - RETURN {property_names_str}, score - """ - - query_params = { - "embedding": query_embedding, - "dimension": len(query_embedding), - "limit": limit, - } - - else: - query = f""" - MATCH (e:{search_type}) - WHERE e.{embedding_type} IS NOT NULL AND size(e.{embedding_type}) = $dimension - WITH e, vector.similarity.cosine(e.{embedding_type}, $embedding) AS score - ORDER BY score DESC LIMIT toInteger($limit) - RETURN {property_names_str}, score - """ - query_params = { - "embedding": query_embedding, - "dimension": len(query_embedding), - "limit": limit, - "search_type": search_type, - } - - neo4j_results = self.structured_query(query, query_params) - - # get the descriptions from the neo4j results - # descriptions = [record['e']._properties[property_name] for record in neo4j_results.records for property_name in property_names] - # return descriptions, scores - if search_type == "__Entity__" and len(neo4j_results.records) == 0: - raise R2RException( - "No search results found. Please make sure you have run the KG enrichment step before running the search: r2r create-graph and r2r enrich-graph", - 400, - ) - - logger.info( - f"Neo4j results: Returning {len(neo4j_results.records)} records for query of type {search_type}" - ) - - for record in neo4j_results.records: - yield { - property_name: record[property_name] - for property_name in property_names - } - - def perform_graph_clustering( - self, leiden_params: dict - ) -> Tuple[int, int, set[tuple[int, Any]]]: - """ - Perform graph clustering on the graph. - - Input: - - leiden_params: a dictionary that contains the parameters for the graph clustering. - - Output: - - Total number of communities - - Total number of hierarchies - """ - # step 1: drop the graph, if it exists and project the graph again. - # in this step the vertices that have no edges are not included in the projection. - - GRAPH_EXISTS_QUERY = """ - CALL gds.graph.exists('kg_graph') YIELD exists - WITH exists - RETURN CASE WHEN exists THEN true ELSE false END as graphExists; - - """ - - result = self.structured_query(GRAPH_EXISTS_QUERY) - graph_exists = result.records[0]["graphExists"] - - GRAPH_PROJECTION_QUERY = """ - MATCH (s:__Entity__)-[r]->(t:__Entity__) - RETURN gds.graph.project( - 'kg_graph', - s, - t, - """ - - if graph_exists: - - logger.info(f"Graph exists, dropping it") - GRAPH_DROP_QUERY = ( - "CALL gds.graph.drop('kg_graph') YIELD graphName;" - ) - result = self.structured_query(GRAPH_DROP_QUERY) - - GRAPH_PROJECTION_QUERY += """ - { - sourceNodeProperties: s { }, - targetNodeProperties: t { }, - relationshipProperties: r { .weight } - }, - { - relationshipWeightProperty: 'weight', - undirectedRelationshipTypes: ['*'] - } - ) - """ - else: - GRAPH_PROJECTION_QUERY += """ - { - sourceNodeProperties: s {}, - targetNodeProperties: t {}, - relationshipProperties: r { .weight } - }, - { - relationshipWeightProperty: 'weight', - undirectedRelationshipTypes: ['*'] - } - )""" - - result = self.structured_query(GRAPH_PROJECTION_QUERY) - - # step 2: run the hierarchical leiden algorithm on the graph. - # seed_property = leiden_params.get("seed_property", "communityIds") - write_property = leiden_params.get("write_property", "communityIds") - random_seed = leiden_params.get("random_seed", 42) - include_intermediate_communities = leiden_params.get( - "include_intermediate_communities", True - ) - max_levels = leiden_params.get("max_levels", 10) - gamma = leiden_params.get("gamma", 1.0) - theta = leiden_params.get("theta", 0.01) - tolerance = leiden_params.get("tolerance", 0.0001) - min_community_size = leiden_params.get("min_community_size", 1) - # don't use the seed property for now - seed_property_config = ( - "" # f"seedProperty: '{seed_property}'" if graph_exists else "" - ) - - GRAPH_CLUSTERING_QUERY = f""" - CALL gds.leiden.write('kg_graph', {{ - {seed_property_config} - writeProperty: '{write_property}', - randomSeed: {random_seed}, - includeIntermediateCommunities: {include_intermediate_communities}, - maxLevels: {max_levels}, - gamma: {gamma}, - theta: {theta}, - tolerance: {tolerance}, - minCommunitySize: {min_community_size} - }}) - YIELD communityCount, modularities; - """ - - result = self.structured_query(GRAPH_CLUSTERING_QUERY).records[0] - - community_count: int = result["communityCount"] - modularities = result["modularities"] - - logger.info( - f"Performed graph clustering with {community_count} communities and modularities {modularities}" - ) - - COMMUNITY_QUERY = f""" - MATCH (n) - WHERE n.communityIds IS NOT NULL - RETURN DISTINCT - CASE - WHEN n.communityIds IS NOT NULL - THEN toIntegerList(n.communityIds) - ELSE [] - END AS communityIds - """ - - result = self.structured_query(COMMUNITY_QUERY) - - intermediate_communities = [ - record["communityIds"] for record in result.records - ] - - intermediate_communities_set = set() - for community_list in intermediate_communities: - for level, community_id in enumerate(community_list): - intermediate_communities_set.add((level, community_id)) - intermediate_communities_set = list(intermediate_communities_set) - - logger.info( - f"Intermediate communities: {intermediate_communities_set}" - ) - - return ( - community_count, - len(modularities), - intermediate_communities_set, - ) diff --git a/py/core/providers/kg/postgres.py b/py/core/providers/kg/postgres.py new file mode 100644 index 000000000..4b6ce29b4 --- /dev/null +++ b/py/core/providers/kg/postgres.py @@ -0,0 +1,674 @@ +import json +import logging +from typing import Any, Optional, Tuple +from uuid import UUID + +import asyncpg + +from core import KGExtraction +from core.base import ( + Community, + DatabaseProvider, + EmbeddingProvider, + Entity, + KGConfig, + KGProvider, + Triple, +) + +logger = logging.getLogger(__name__) + + +class PostgresKGProvider(KGProvider): + + def __init__( + self, + config: KGConfig, + db_provider: DatabaseProvider, + embedding_provider: EmbeddingProvider, + *args: Any, + **kwargs: Any, + ) -> None: + super().__init__(config, *args, **kwargs) + + self.db_provider = db_provider.relational + self.embedding_provider = embedding_provider + try: + import networkx as nx + + self.nx = nx + except ImportError as exc: + raise ImportError( + "NetworkX is not installed. Please install it to use this module." + ) from exc + + async def initialize(self): + logger.info( + f"Initializing PostgresKGProvider for project {self.db_provider.project_name}" + ) + await self.create_tables(project_name=self.db_provider.project_name) + + async def execute_query( + self, query: str, params: Optional[list[tuple[Any]]] = None + ) -> Any: + return await self.db_provider.execute_query(query, params) + + async def execute_many( + self, + query: str, + params: Optional[list[tuple[Any]]] = None, + batch_size: int = 1000, + ) -> Any: + return await self.db_provider.execute_many(query, params, batch_size) + + async def fetch_query( + self, query: str, params: Optional[list[tuple[Any]]] = None + ) -> Any: + return await self.db_provider.fetch_query(query, params) + + def _get_table_name(self, base_name: str) -> str: + return self.db_provider._get_table_name(base_name) + + async def create_tables(self, project_name: str): + # raw entities table + # create schema + + query = f""" + + CREATE TABLE IF NOT EXISTS {self._get_table_name("entity_raw")} ( + id SERIAL PRIMARY KEY, + category TEXT NOT NULL, + name TEXT NOT NULL, + description TEXT NOT NULL, + extraction_ids UUID[] NOT NULL, + document_id UUID NOT NULL, + attributes JSONB + ); + """ + await self.execute_query(query) + + # raw triples table, also the final table. this will have embeddings. + query = f""" + CREATE TABLE IF NOT EXISTS {self._get_table_name("triple_raw")} ( + id SERIAL PRIMARY KEY, + subject TEXT NOT NULL, + predicate TEXT NOT NULL, + object TEXT NOT NULL, + weight FLOAT NOT NULL, + description TEXT NOT NULL, + embedding vector({self.embedding_provider.config.base_dimension}), + extraction_ids UUID[] NOT NULL, + document_id UUID NOT NULL, + attributes JSONB NOT NULL + ); + """ + await self.execute_query(query) + + # entity description table, unique by document_id, category, name + query = f""" + CREATE TABLE IF NOT EXISTS {self._get_table_name("entity_description")} ( + id SERIAL PRIMARY KEY, + document_id UUID NOT NULL, + category TEXT NOT NULL, + name TEXT NOT NULL, + description TEXT NOT NULL, + description_embedding vector(1536), + extraction_ids UUID[] NOT NULL, + attributes JSONB NOT NULL, + UNIQUE (document_id, category, name) + );""" + + await self.execute_query(query) + + # triples table 2 # Relationship summaries by document ID + query = f""" + CREATE TABLE IF NOT EXISTS {self._get_table_name("triple_description")} ( + id SERIAL PRIMARY KEY, + document_ids UUID[] NOT NULL, + subject TEXT NOT NULL, + predicate TEXT NOT NULL, + object TEXT NOT NULL, + weight FLOAT NOT NULL, + description TEXT NOT NULL, + extraction_ids UUID[] NOT NULL, + attributes JSONB NOT NULL, + UNIQUE (document_ids, subject, predicate, object) + );""" + + await self.execute_query(query) + + # embeddings tables + query = f""" + CREATE TABLE IF NOT EXISTS {self._get_table_name("entity_embedding")} ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL, + description TEXT NOT NULL, + description_embedding vector({self.embedding_provider.config.base_dimension}) NOT NULL, + UNIQUE (name) + ); + """ + + await self.execute_query(query) + + # triples embeddings table + query = f""" + CREATE TABLE IF NOT EXISTS {self._get_table_name("triple_embedding")} ( + id SERIAL PRIMARY KEY, + subject TEXT NOT NULL, + predicate TEXT NOT NULL, + object TEXT NOT NULL, + description_embedding vector({self.embedding_provider.config.base_dimension}) NOT NULL, + UNIQUE (subject, predicate, object) + ); + """ + + await self.execute_query(query) + + # communities table, result of the Leiden algorithm + query = f""" + CREATE TABLE IF NOT EXISTS {self._get_table_name("community")} ( + id SERIAL PRIMARY KEY, + node TEXT NOT NULL, + cluster INT NOT NULL, + parent_cluster INT, + level INT NOT NULL, + is_final_cluster BOOLEAN NOT NULL, + triple_ids INT[] NOT NULL + );""" + + await self.execute_query(query) + + # communities_report table + query = f""" + CREATE TABLE IF NOT EXISTS {self._get_table_name("community_report")} ( + id SERIAL PRIMARY KEY, + community_number INT NOT NULL, + collection_id UUID NOT NULL, + level INT NOT NULL, + name TEXT NOT NULL, + summary TEXT NOT NULL, + findings TEXT[] NOT NULL, + rating FLOAT NOT NULL, + rating_explanation TEXT NOT NULL, + embedding vector({self.embedding_provider.config.base_dimension}) NOT NULL, + attributes JSONB, + UNIQUE (community_number, level, collection_id) + );""" + + await self.execute_query(query) + + async def _add_objects( + self, objects: list[Any], table_name: str + ) -> asyncpg.Record: + """ + Upsert objects into the specified table. + """ + # Get non-null attributes from the first object + non_null_attrs = { + k: v for k, v in objects[0].__dict__.items() if v is not None + } + columns = ", ".join(non_null_attrs.keys()) + + placeholders = ", ".join(f"${i+1}" for i in range(len(non_null_attrs))) + + QUERY = f""" + INSERT INTO {self._get_table_name(table_name)} ({columns}) + VALUES ({placeholders}) + """ + + # Filter out null values for each object + params = [ + tuple( + json.dumps(v) if isinstance(v, dict) else v + for v in obj.__dict__.values() + if v is not None + ) + for obj in objects + ] + return await self.execute_many(QUERY, params) + + async def add_entities( + self, + entities: list[Entity], + table_name: str, + ) -> asyncpg.Record: + """ + Upsert entities into the entities_raw table. These are raw entities extracted from the document. + + Args: + entities: list[Entity]: list of entities to upsert + collection_name: str: name of the collection + + Returns: + result: asyncpg.Record: result of the upsert operation + """ + return await self._add_objects(entities, table_name) + + async def add_triples( + self, + triples: list[Triple], + table_name: str, + ) -> asyncpg.Record: + """ + Upsert triples into the triple_raw table. These are raw triples extracted from the document. + + Args: + triples: list[Triple]: list of triples to upsert + table_name: str: name of the table to upsert into + + Returns: + result: asyncpg.Record: result of the upsert operation + """ + return await self._add_objects(triples, table_name) + + async def add_kg_extractions( + self, + kg_extractions: list[KGExtraction], + table_suffix: str = "_raw", + ) -> Tuple[int, int]: + """ + Upsert entities and triples into the database. These are raw entities and triples extracted from the document fragments. + + Args: + kg_extractions: list[KGExtraction]: list of KG extractions to upsert + table_suffix: str: suffix to add to the table names + + Returns: + total_entities: int: total number of entities upserted + total_relationships: int: total number of relationships upserted + """ + + total_entities, total_relationships = 0, 0 + + for extraction in kg_extractions: + + total_entities, total_relationships = ( + total_entities + len(extraction.entities), + total_relationships + len(extraction.triples), + ) + + if extraction.entities: + if not extraction.entities[0].extraction_ids: + for i in range(len(extraction.entities)): + extraction.entities[i].extraction_ids = ( + extraction.extraction_ids + ) + extraction.entities[i].document_id = ( + extraction.document_id + ) + + await self.add_entities( + extraction.entities, + table_name="entity" + table_suffix, + ) + + if extraction.triples: + if not extraction.triples[0].extraction_ids: + for i in range(len(extraction.triples)): + extraction.triples[i].extraction_ids = ( + extraction.extraction_ids + ) + extraction.triples[i].document_id = extraction.document_id + + await self.add_triples( + extraction.triples, + table_name="triple" + table_suffix, + ) + + return (total_entities, total_relationships) + + async def get_entity_map( + self, offset: int, limit: int, document_id: str + ) -> dict[str, Any]: + + QUERY1 = f""" + WITH entities_list AS ( + + SELECT DISTINCT name + FROM {self._get_table_name("entity_raw")} + WHERE document_id = $1 + ORDER BY name ASC + LIMIT {limit} OFFSET {offset} + ) + SELECT DISTINCT e.name, e.description, e.category + FROM {self._get_table_name("entity_raw")} e + JOIN entities_list el ON e.name = el.name + ORDER BY e.name;""" + + entities_list = await self.fetch_query(QUERY1, [document_id]) + entities_list = [ + { + "name": entity["name"], + "description": entity["description"], + "category": entity["category"], + } + for entity in entities_list + ] + + QUERY2 = f""" + WITH entities_list AS ( + + SELECT DISTINCT name + FROM {self._get_table_name("entity_raw")} + WHERE document_id = $1 + ORDER BY name ASC + LIMIT {limit} OFFSET {offset} + ) + + SELECT DISTINCT t.subject, t.predicate, t.object, t.weight, t.description + FROM {self._get_table_name("triple_raw")} t + JOIN entities_list el ON t.subject = el.name + ORDER BY t.subject, t.predicate, t.object; + """ + + triples_list = await self.fetch_query(QUERY2, [document_id]) + triples_list = [ + { + "subject": triple["subject"], + "predicate": triple["predicate"], + "object": triple["object"], + "weight": triple["weight"], + "description": triple["description"], + } + for triple in triples_list + ] + + entity_map = {} + for entity in entities_list: + if entity["name"] not in entity_map: + entity_map[entity["name"]] = {"entities": [], "triples": []} + entity_map[entity["name"]]["entities"].append(entity) + + for triple in triples_list: + if triple["subject"] in entity_map: + entity_map[triple["subject"]]["triples"].append(triple) + if triple["object"] in entity_map: + entity_map[triple["object"]]["triples"].append(triple) + + return entity_map + + async def upsert_embeddings( + self, + data: list[dict[str, Any]], + table_name: str, + ) -> None: + QUERY = f""" + INSERT INTO {self._get_table_name(table_name)} (name, description, description_embedding) + VALUES ($1, $2, $3) + ON CONFLICT (name) DO UPDATE SET + description = EXCLUDED.description, + description_embedding = EXCLUDED.description_embedding + """ + return await self.execute_many(QUERY, data) + + async def upsert_entities(self, entities: list[Entity]) -> None: + QUERY = """ + INSERT INTO $1.$2 (category, name, description, description_embedding, extraction_ids, document_id, attributes) + VALUES ($1, $2, $3, $4, $5, $6, $7) + """ + + table_name = self._get_table_name("entities") + query = QUERY.format(table_name) + await self.execute_query(query, entities) + + async def upsert_relationships(self, relationships: list[Triple]) -> None: + QUERY = """ + INSERT INTO $1.$2 (source, target, relationship) + VALUES ($1, $2, $3) + """ + + table_name = self._get_table_name("triples") + query = QUERY.format(table_name) + await self.execute_query(query, relationships) + + async def vector_query(self, query: str, **kwargs: Any) -> Any: + + query_embedding = kwargs.get("query_embedding", None) + search_type = kwargs.get("search_type", "__Entity__") + embedding_type = kwargs.get("embedding_type", "description_embedding") + property_names = kwargs.get("property_names", ["name", "description"]) + limit = kwargs.get("limit", 10) + + table_name = "" + if search_type == "__Entity__": + table_name = "entity_embedding" + elif search_type == "__Relationship__": + table_name = "triple_raw" + elif search_type == "__Community__": + table_name = "community_report" + else: + raise ValueError(f"Invalid search type: {search_type}") + + property_names_str = ", ".join(property_names) + QUERY = f""" + SELECT {property_names_str} FROM {self._get_table_name(table_name)} ORDER BY {embedding_type} <=> $1 LIMIT $2; + """ + + results = await self.fetch_query(QUERY, (str(query_embedding), limit)) + + for result in results: + yield { + property_name: result[property_name] + for property_name in property_names + } + + async def get_all_triples(self, collection_id: UUID) -> list[Triple]: + + # getting all documents for a collection + QUERY = f""" + select distinct document_id from {self._get_table_name("document_info")} where $1 = ANY(collection_ids) + """ + document_ids = await self.fetch_query(QUERY, [collection_id]) + document_ids = [doc_id["document_id"] for doc_id in document_ids] + + QUERY = f""" + SELECT id, subject, predicate, weight, object FROM {self._get_table_name("triple_raw")} WHERE document_id = ANY($1) + """ + triples = await self.fetch_query(QUERY, [document_ids]) + return triples + + async def add_communities( + self, communities: list[tuple[int, Any]] + ) -> None: + QUERY = f""" + INSERT INTO {self._get_table_name("community")} (node, cluster, parent_cluster, level, is_final_cluster, triple_ids) + VALUES ($1, $2, $3, $4, $5, $6) + """ + await self.execute_many(QUERY, communities) + + async def add_community_report(self, community: Community) -> None: + + community.embedding = str(community.embedding) + + non_null_attrs = { + k: v for k, v in community.__dict__.items() if v is not None + } + columns = ", ".join(non_null_attrs.keys()) + placeholders = ", ".join(f"${i+1}" for i in range(len(non_null_attrs))) + + conflict_columns = ", ".join( + [ + f"{k} = EXCLUDED.{k}" + for k in non_null_attrs.keys() + ] + ) + + QUERY = f""" + INSERT INTO {self._get_table_name("community_report")} ({columns}) + VALUES ({placeholders}) + ON CONFLICT (community_number, level, collection_id) DO UPDATE SET + {conflict_columns} + """ + + await self.execute_many(QUERY, [tuple(non_null_attrs.values())]) + + async def perform_graph_clustering( + self, + collection_id: UUID, + leiden_params: dict, # TODO - Add typing for leiden_params + ) -> Tuple[int, int, set[tuple[int, Any]]]: + # TODO: implementing the clustering algorithm but now we will get communities at a document level and then we will get communities at a higher level. + # we will use the Leiden algorithm for this. + # but for now let's skip it and make other stuff work. + # we will need multiple tables for this to work. + """ + Leiden clustering algorithm to cluster the knowledge graph triples into communities. + + Available parameters and defaults: + max_cluster_size: int = 1000, + starting_communities: Optional[Dict[str, int]] = None, + extra_forced_iterations: int = 0, + resolution: Union[int, float] = 1.0, + randomness: Union[int, float] = 0.001, + use_modularity: bool = True, + random_seed: Optional[int] = None, + weight_attribute: str = "weight", + is_weighted: Optional[bool] = None, + weight_default: Union[int, float] = 1.0, + check_directed: bool = True, + """ + settings = {} + triples = await self.get_all_triples(collection_id) + + logger.info(f"Clustering with settings: {str(settings)}") + + G = self.nx.Graph() + for triple in triples: + G.add_edge( + triple["subject"], + triple["object"], + weight=triple["weight"], + id=triple["id"], + ) + + hierarchical_communities = await self._compute_leiden_communities( + G, leiden_params + ) + + def triple_ids(node: int) -> list[int]: + return [ + triple["id"] + for triple in triples + if triple["subject"] == node or triple["object"] == node + ] + + # upsert the communities into the database. + inputs = [ + ( + item.node, + item.cluster, + item.parent_cluster, + item.level, + item.is_final_cluster, + triple_ids(item.node), + ) + for item in hierarchical_communities + ] + + await self.add_communities(inputs) + + num_communities = len( + set([item.cluster for item in hierarchical_communities]) + ) + + return num_communities + + async def _compute_leiden_communities( + self, + graph: Any, + leiden_params: dict, # TODO - make serve-side and run-time configuration paradigm + ) -> dict[int, dict[str, int]]: + """Compute Leiden communities.""" + try: + from graspologic.partition import hierarchical_leiden + + if not leiden_params.get("random_seed"): + leiden_params["random_seed"] = 7272 + + community_mapping = hierarchical_leiden(graph, **leiden_params) + + return community_mapping + + except ImportError as e: + raise ImportError("Please install the graspologic package.") from e + + async def get_community_details(self, community_number: int): + + QUERY = f""" + SELECT level FROM {self._get_table_name("community")} WHERE cluster = $1 + LIMIT 1 + """ + level = (await self.fetch_query(QUERY, [community_number]))[0]["level"] + + QUERY = f""" + WITH node_triple_ids AS ( + + SELECT node, triple_ids + FROM {self._get_table_name("community")} + WHERE cluster = $1 + ) + SELECT DISTINCT + e.id AS id, + e.name AS name, + e.description AS description + FROM node_triple_ids nti + JOIN {self._get_table_name("entity_embedding")} e ON e.name = nti.node; + """ + entities = await self.fetch_query(QUERY, [community_number]) + + QUERY = f""" + WITH node_triple_ids AS ( + + SELECT node, triple_ids + FROM {self._get_table_name("community")} + WHERE cluster = $1 + ) + SELECT DISTINCT + t.id, t.subject, t.predicate, t.object, t.weight, t.description + FROM node_triple_ids nti + JOIN {self._get_table_name("triple_raw")} t ON t.id = ANY(nti.triple_ids); + """ + triples = await self.fetch_query(QUERY, [community_number]) + + return level, entities, triples + + async def create_vector_index(self): + # need to implement this. Just call vector db provider's create_vector_index method. + # this needs to be run periodically for every collection. + raise NotImplementedError + + async def delete_triples(self, triple_ids: list[int]): + # need to implement this. + raise NotImplementedError + + async def get_schema(self): + # somehow get the rds from the postgres db. + raise NotImplementedError + + async def get_entities( + self, + entity_ids: list[str] | None = None, + with_description: bool = False, + ): + raise NotImplementedError + + async def get_triples(self, triple_ids: list[str] | None = None): + raise NotImplementedError + + async def structured_query(self): + raise NotImplementedError + + async def update_extraction_prompt(self): + raise NotImplementedError + + async def update_kg_search_prompt(self): + raise NotImplementedError + + async def upsert_triples(self): + raise NotImplementedError + + async def get_entity_count(self, document_id: str) -> int: + QUERY = f""" + SELECT COUNT(*) FROM {self._get_table_name("entity_raw")} WHERE document_id = $1 + """ + return (await self.fetch_query(QUERY, [document_id]))[0]["count"] diff --git a/py/core/providers/orchestration/__init__.py b/py/core/providers/orchestration/__init__.py index a78ba97ec..b41d79b00 100644 --- a/py/core/providers/orchestration/__init__.py +++ b/py/core/providers/orchestration/__init__.py @@ -1,3 +1,4 @@ from .hatchet import HatchetOrchestrationProvider +from .simple import SimpleOrchestrationProvider -__all__ = ["HatchetOrchestrationProvider"] +__all__ = ["HatchetOrchestrationProvider", "SimpleOrchestrationProvider"] diff --git a/py/core/providers/orchestration/hatchet.py b/py/core/providers/orchestration/hatchet.py index 13c89b349..41599c39f 100644 --- a/py/core/providers/orchestration/hatchet.py +++ b/py/core/providers/orchestration/hatchet.py @@ -1,25 +1,33 @@ import asyncio +import logging from typing import Any, Callable, Optional -from hatchet_sdk import Hatchet +from core.base import OrchestrationConfig, OrchestrationProvider, Workflow -from core.base import OrchestrationConfig, OrchestrationProvider +logger = logging.getLogger(__name__) class HatchetOrchestrationProvider(OrchestrationProvider): def __init__(self, config: OrchestrationConfig): super().__init__(config) + try: + from hatchet_sdk import Hatchet + except ImportError: + raise ImportError( + "Hatchet SDK not installed. Please install it using `pip install hatchet-sdk`." + ) self.orchestrator = Hatchet() self.config: OrchestrationConfig = config # for type hinting - self.worker + self.messages = {} - def register_workflow(self, workflow: Any) -> None: - if self.worker: - self.worker.register_workflow(workflow) - else: - raise ValueError( - "Worker not initialized. Call get_worker() first." - ) + def workflow(self, *args, **kwargs) -> Callable: + return self.orchestrator.workflow(*args, **kwargs) + + def step(self, *args, **kwargs) -> Callable: + return self.orchestrator.step(*args, **kwargs) + + def failure(self, *args, **kwargs) -> Callable: + return self.orchestrator.on_failure_step(*args, **kwargs) def get_worker(self, name: str, max_threads: Optional[int] = None) -> Any: if not max_threads: @@ -27,12 +35,6 @@ def get_worker(self, name: str, max_threads: Optional[int] = None) -> Any: self.worker = self.orchestrator.worker(name, max_threads) return self.worker - def workflow(self, *args, **kwargs) -> Callable: - return self.orchestrator.workflow(*args, **kwargs) - - def step(self, *args, **kwargs) -> Callable: - return self.orchestrator.step(*args, **kwargs) - async def start_worker(self): if not self.worker: raise ValueError( @@ -40,3 +42,53 @@ async def start_worker(self): ) asyncio.create_task(self.worker.async_start()) + + async def run_workflow( + self, + workflow_name: str, + parameters: dict, + options: dict, + *args, + **kwargs, + ) -> Any: + task_id = self.orchestrator.admin.run_workflow( + workflow_name, + parameters, + options=options, + *args, + **kwargs, + ) + return { + "task_id": str(task_id), + "message": self.messages.get( + workflow_name, "Workflow queued successfully." + ), # Return message based on workflow name + } + + def register_workflows( + self, workflow: Workflow, service: Any, messages: dict + ) -> None: + self.messages.update(messages) + + logger.info( + f"Registering workflows for {workflow} with messages {messages}." + ) + if workflow == Workflow.INGESTION: + from core.main.orchestration.hatchet.ingestion_workflow import ( + hatchet_ingestion_factory, + ) + + workflows = hatchet_ingestion_factory(self, service) + if self.worker: + for workflow in workflows.values(): + self.worker.register_workflow(workflow) + + elif workflow == Workflow.KG: + from core.main.orchestration.hatchet.kg_workflow import ( + hatchet_kg_factory, + ) + + workflows = hatchet_kg_factory(self, service) + if self.worker: + for workflow in workflows.values(): + self.worker.register_workflow(workflow) diff --git a/py/core/providers/orchestration/simple.py b/py/core/providers/orchestration/simple.py new file mode 100644 index 000000000..0331b952e --- /dev/null +++ b/py/core/providers/orchestration/simple.py @@ -0,0 +1,55 @@ +from typing import Any + +from core.base import OrchestrationConfig, OrchestrationProvider, Workflow + + +class SimpleOrchestrationProvider(OrchestrationProvider): + def __init__(self, config: OrchestrationConfig): + super().__init__(config) + self.config = config + self.messages: dict[str, str] = {} + + async def start_worker(self): + pass + + def get_worker(self, name: str, max_threads: int) -> Any: + pass + + def step(self, *args, **kwargs) -> Any: + pass + + def workflow(self, *args, **kwargs) -> Any: + pass + + def failure(self, *args, **kwargs) -> Any: + pass + + def register_workflows( + self, workflow: Workflow, service: Any, messages: dict + ) -> None: + for key, msg in messages.items(): + self.messages[key] = msg + + if workflow == Workflow.INGESTION: + from core.main.orchestration import simple_ingestion_factory + + self.ingestion_workflows = simple_ingestion_factory(service) + + elif workflow == Workflow.KG: + from core.main.orchestration.simple.kg_workflow import ( + simple_kg_factory, + ) + + self.kg_workflows = simple_kg_factory(service) + + async def run_workflow( + self, workflow_name: str, input: dict, options: dict + ) -> dict[str, str]: + if workflow_name in self.ingestion_workflows: + await self.ingestion_workflows[workflow_name](input.get("request")) + return {"message": self.messages[workflow_name]} + elif workflow_name in self.kg_workflows: + await self.kg_workflows[workflow_name](input.get("request")) + return {"message": self.messages[workflow_name]} + else: + raise ValueError(f"Workflow '{workflow_name}' not found.") diff --git a/py/core/providers/parsing/__init__.py b/py/core/providers/parsing/__init__.py deleted file mode 100644 index 1cc3cb0f4..000000000 --- a/py/core/providers/parsing/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .r2r_parsing import R2RParsingProvider # type: ignore -from .unstructured_parsing import UnstructuredParsingProvider - -__all__ = ["R2RParsingProvider", "UnstructuredParsingProvider"] diff --git a/py/core/providers/parsing/r2r_parsing.py b/py/core/providers/parsing/r2r_parsing.py deleted file mode 100644 index 9994a6274..000000000 --- a/py/core/providers/parsing/r2r_parsing.py +++ /dev/null @@ -1,104 +0,0 @@ -# type: ignore -import logging -import time -from typing import Any, AsyncGenerator, Union - -from core import parsers -from core.base import ( - AsyncParser, - Document, - DocumentExtraction, - DocumentType, - ParsingConfig, - ParsingProvider, - R2RDocumentProcessingError, - generate_id_from_label, -) - -logger = logging.getLogger(__name__) - - -class R2RParsingProvider(ParsingProvider): - AVAILABLE_PARSERS = { - DocumentType.CSV: [parsers.CSVParser, parsers.CSVParserAdvanced], - DocumentType.DOCX: [parsers.DOCXParser], - DocumentType.HTML: [parsers.HTMLParser], - DocumentType.HTM: [parsers.HTMLParser], - DocumentType.JSON: [parsers.JSONParser], - DocumentType.MD: [parsers.MDParser], - DocumentType.PDF: [parsers.PDFParser, parsers.PDFParserUnstructured], - DocumentType.PPTX: [parsers.PPTParser], - DocumentType.TXT: [parsers.TextParser], - DocumentType.XLSX: [parsers.XLSXParser, parsers.XLSXParserAdvanced], - DocumentType.GIF: [parsers.ImageParser], - DocumentType.JPEG: [parsers.ImageParser], - DocumentType.JPG: [parsers.ImageParser], - DocumentType.PNG: [parsers.ImageParser], - DocumentType.SVG: [parsers.ImageParser], - DocumentType.MP3: [parsers.AudioParser], - DocumentType.MP4: [parsers.MovieParser], - } - - IMAGE_TYPES = { - DocumentType.GIF, - DocumentType.JPG, - DocumentType.JPEG, - DocumentType.PNG, - DocumentType.SVG, - } - - def __init__(self, config: ParsingConfig): - super().__init__(config) - self.parsers: dict[DocumentType, AsyncParser] = {} - self._initialize_parsers() - - def _initialize_parsers(self): - for doc_type, parser_infos in self.AVAILABLE_PARSERS.items(): - for parser_info in parser_infos: - if ( - doc_type not in self.config.excluded_parsers - and doc_type not in self.parsers - ): - # will choose the first parser in the list - self.parsers[doc_type] = parser_info() - - # Apply overrides if specified - for parser_override in self.config.override_parsers: - if parser_name := getattr(parsers, parser_override.parser): - self.parsers[parser_override.document_type] = parser_name() - - async def parse( # type: ignore - self, file_content: bytes, document: Document - ) -> AsyncGenerator[ - Union[DocumentExtraction, R2RDocumentProcessingError], None - ]: - if document.type not in self.parsers: - yield R2RDocumentProcessingError( - document_id=document.id, - error_message=f"Parser for {document.type} not found in `R2RParsingProvider`.", - ) - else: - parser = self.parsers[document.type] - texts = await parser.ingest(file_content) - t0 = time.time() - - iteration = 0 - async for text in texts: - yield DocumentExtraction( - id=generate_id_from_label(f"{document.id}-{iteration}"), - document_id=document.id, - user_id=document.user_id, - collection_ids=document.collection_ids, - data=text, - metadata=document.metadata, - ) - iteration += 1 - - logger.debug( - f"Parsed document with id={document.id}, title={document.metadata.get('title', None)}, " - f"user_id={document.metadata.get('user_id', None)}, metadata={document.metadata} " - f"into {iteration} extractions in t={time.time() - t0:.2f} seconds." - ) - - def get_parser_for_document_type(self, doc_type: DocumentType) -> Any: - return self.parsers.get(doc_type) diff --git a/py/core/providers/prompts/defaults/graphrag_community_reports_prompt.yaml b/py/core/providers/prompts/defaults/graphrag_community_reports_prompt.yaml index 87e8dbc69..d2a6c2fd5 100644 --- a/py/core/providers/prompts/defaults/graphrag_community_reports_prompt.yaml +++ b/py/core/providers/prompts/defaults/graphrag_community_reports_prompt.yaml @@ -9,7 +9,7 @@ graphrag_community_reports_prompt: The report should include the following sections: - - TITLE: The community's name that represents its key startups - the title should be short but specific. When possible, include representative startup names in the title. + - NAME: The community's name that represents its key startups - the name should be short but specific. When possible, include representative startup names in the name. - SUMMARY: An executive summary of the community's overall structure, how its startups are related to each other, and significant information associated with its startups. - IMPACT SEVERITY RATING: A float score between 0-10 that represents the severity of IMPACT posed by startups within the community. IMPACT is the scored importance of the community. - RATING EXPLANATION: Provide a single sentence explanation of the IMPACT severity rating. @@ -18,7 +18,7 @@ graphrag_community_reports_prompt: Return output as a well-formed JSON-formatted string with the following format: ```json {{ - "title": , + "name": , "summary": , "rating": , "rating_explanation": , @@ -70,15 +70,15 @@ graphrag_community_reports_prompt: Output: {{ - "title": "OpenAI, Stripe, and Airbnb Community", - "summary": "The community comprises key startups like OpenAI, Stripe, and Airbnb, which are interconnected through strategic partnerships and investments. These relationships highlight a robust network focused on advancing AI technologies, payment infrastructure, and online marketplaces.", + "name": "OpenAI, Stripe, and Airbnb", + "summary": "The comprises key startups like OpenAI, Stripe, and Airbnb, which are interconnected through strategic partnerships and investments. These relationships highlight a robust network focused on advancing AI technologies, payment infrastructure, and online marketplaces.", "rating": 7.5, "rating_explanation": "The impact severity rating is high due to the significant influence these startups have on technology, finance, and the global economy.", "findings": [ - "OpenAI stands out as a leader in artificial intelligence research and deployment within the Y Combinator community. Its partnerships with companies like Stripe and Airbnb demonstrate its integral role in integrating AI solutions across various industries. OpenAI's influence is further amplified by its involvement in key projects that drive innovation and efficiency. [Data: Entities (101), Relationships (201, 203, 204, +more)]", + "OpenAI stands out as a leader in artificial intelligence research and deployment within YCombinator. Its partnerships with companies like Stripe and Airbnb demonstrate its integral role in integrating AI solutions across various industries. OpenAI's influence is further amplified by its involvement in key projects that drive innovation and efficiency. [Data: Entities (101), Relationships (201, 203, 204, +more)]", "Stripe serves as a critical financial infrastructure provider, facilitating payment processing for startups like Airbnb and partnering with OpenAI to enhance payment solutions. Its strategic investments and collaborations underscore its importance in the Y Combinator ecosystem, enabling seamless financial transactions and supporting startup growth. [Data: Entities (102), Relationships (201, 202, 204, 205, +more)]", "Airbnb leverages OpenAI's artificial intelligence tools to enhance its customer service capabilities, showcasing the practical application of AI in improving user experience. This integration highlights Airbnb's commitment to innovation and efficiency, positioning it as a forward-thinking leader within the community. [Data: Entities (103), Relationships (203, 205, +more)]", - "Stripe's investment in OpenAI's latest funding round illustrates the strategic financial moves that drive growth and innovation within the Y Combinator community. Such investments not only strengthen partnerships but also foster an environment of collaboration and shared success among startups. [Data: Relationships (204)]", + "Stripe's investment in OpenAI's latest funding round illustrates the strategic financial moves that drive growth and innovation. Such investments not only strengthen partnerships but also foster an environment of collaboration and shared success among startups. [Data: Relationships (204)]", "The collaboration between Airbnb and Stripe to expand global payment options demonstrates a commitment to scalability and accessibility in the Y Combinator ecosystem. This initiative is pivotal in enabling startups to reach a broader international market, thereby increasing their impact and revenue potential. [Data: Relationships (205)]" ] }} diff --git a/py/core/providers/prompts/defaults/graphrag_triplet_extraction_zero_shot.yaml b/py/core/providers/prompts/defaults/graphrag_triples_extraction_few_shot.yaml similarity index 99% rename from py/core/providers/prompts/defaults/graphrag_triplet_extraction_zero_shot.yaml rename to py/core/providers/prompts/defaults/graphrag_triples_extraction_few_shot.yaml index e4675268a..6bfb1bb26 100644 --- a/py/core/providers/prompts/defaults/graphrag_triplet_extraction_zero_shot.yaml +++ b/py/core/providers/prompts/defaults/graphrag_triples_extraction_few_shot.yaml @@ -1,4 +1,4 @@ -graphrag_triplet_extraction_zero_shot: +graphrag_triples_extraction_few_shot: template: > -Goal- Given a text document, identify all entities and their entity types from the text and all relationships among the identified entities. diff --git a/py/core/providers/prompts/defaults/kg_search.yaml b/py/core/providers/prompts/defaults/kg_search.yaml deleted file mode 100644 index 57ae40bbf..000000000 --- a/py/core/providers/prompts/defaults/kg_search.yaml +++ /dev/null @@ -1,86 +0,0 @@ -kg_search: - template: | - **System Message:** You are an AI agent capable of generating Cypher queries to interact with a Neo4j knowledge graph. The knowledge graph contains information about organizations, people, locations, and their relationships, such as founders of companies, locations of companies, and products associated with companies. - - **Instructions:** When a user asks a question, you will generate a Cypher query to retrieve the relevant information from the Neo4j knowledge graph. Later, you will be given a schema which specifies the available relationships to help you construct the query. First, review the examples provided to understand the expected format of the queries. - - ### Example(s) - User Questions and Cypher Queries for an Academic Knowledge Graph - - **User Question:** "List all courses available in the computer science department." - - **Generated Cypher Query:** - ```cypher - MATCH (c:COURSE)-[:OFFERED_BY]->(d:DEPARTMENT) - WHERE d.name CONTAINS 'Computer Science' - RETURN c.id AS Course, d.name AS Department - ORDER BY c.id; - ``` - - **User Question:** "Retrieve all courses taught by professors who have published research on natural language processing." - - **Generated Cypher Query:** - ```cypher - MATCH (pr:PERSON)-[:PUBLISHED]->(p:PAPER) - MATCH (p)-[:TOPIC]->(t:TOPIC) - WHERE t.name CONTAINS 'Natural Language Processing' - MATCH (c:COURSE)-[:TAUGHT_BY]->(pr) - RETURN DISTINCT c.id AS Course, pr.name AS Professor, t.name AS Topic - ORDER BY c.id; - ``` - - ### Example(s) - User Questions and Cypher Queries for an Historical Events and Figures - - **User Question:** "List all battles that occurred in the 19th century and the generals who participated in them." - - **Generated Cypher Query:** - ```cypher - MATCH (b:EVENT)-[:HAPPENED_AT]->(d:DATE) - WHERE d.year >= 1800 AND d.year < 1900 AND b.type CONTAINS 'Battle' - MATCH (g:PERSON)-[:PARTICIPATED_IN]->(b) - RETURN b.name AS Battle, d.year AS Year, g.name AS General - ORDER BY d.year, b.name, g.name; - ``` - - **User Question:** "Find all treaties signed in Paris and the countries involved." - - **Generated Cypher Query:** - ```cypher - MATCH (t:EVENT)-[:HAPPENED_AT]->(l:LOCATION) - WHERE l.name CONTAINS 'Paris' AND t.type CONTAINS 'Treaty' - MATCH (c:ORGANIZATION)-[:SIGNED]->(t) - RETURN t.name AS Treaty, l.name AS Location, c.name AS Country - ORDER BY t.name, c.name; - ``` - - Now, you will be provided with a schema for the entities and relationships in the Neo4j knowledge graph. Use this schema to construct Cypher queries based on user questions. - - - **Entities:** - - `ORGANIZATION` (e.g.: `COMPANY`, `SCHOOL`, `NON-PROFIT`, `OTHER`) - - `COMPANY` - - `LOCATION` (e.g.: `CITY`, `STATE`, `COUNTRY`, `OTHER`) - - `DATE` (e.g.: `YEAR`, `MONTH`, `DAY`, `BATCH`, `OTHER`) - - `QUANTITY` - - `EVENT` (e.g.: `INCORPORATION`, `FUNDING_ROUND`, `ACQUISITION`, `LAUNCH`, `OTHER`) - - - **Relationships:** - - `FOUNDED_BY` - - `HEADQUARTERED_IN` - - `OPERATES_IN` - - `RAISED` - - `ACQUIRED_BY` - - `HAS_EMPLOYEE_COUNT` - - `GENERATED_REVENUE` - - `LISTED_ON` - - `INCORPORATED` - - `HAS_DIVISION` - - `ANNOUNCED` - - `HAS_QUANTITY` - - Use the referenced examples and schema to help you construct an appropriate Cypher query based on the following question: - - **User Question:** {input} - - **Generated Cypher Query:** - - input_types: - input: str diff --git a/py/core/providers/prompts/defaults/kg_search_with_spec.yaml b/py/core/providers/prompts/defaults/kg_search_with_spec.yaml deleted file mode 100644 index 8500dade0..000000000 --- a/py/core/providers/prompts/defaults/kg_search_with_spec.yaml +++ /dev/null @@ -1,137 +0,0 @@ -kg_search_with_spec: - template: > - **System Message:** - - - You are an AI agent capable of generating Cypher queries to interact with a Neo4j knowledge graph. The knowledge graph contains information about organizations, people, locations, and their relationships, such as founders of companies, locations of companies, and products associated with companies. - - - **Instructions:** - - - When a user asks a question, you will generate a Cypher query to retrieve the relevant information from the Neo4j knowledge graph. Later, you will be given a schema which specifies the available relationships to help you construct the query. First, review the examples provided to understand the expected format of the queries. - - - ### Example(s) - User Questions and Cypher Queries for an Academic Knowledge Graph - - - **User Question:** - - "List all courses available in the computer science department." - - - **Generated Cypher Query:** - - ```cypher - - MATCH (c:COURSE)-[:OFFERED_BY]->(d:DEPARTMENT) - - WHERE d.name CONTAINS 'Computer Science' - - RETURN c.id AS Course, d.name AS Department - - ORDER BY c.id; - - ``` - - - **User Question:** - - "Retrieve all courses taught by professors who have published research on natural language processing." - - - **Generated Cypher Query:** - - ```cypher - - MATCH (pr:PERSON)-[:PUBLISHED]->(p:PAPER) - - MATCH (p)-[:TOPIC]->(t:TOPIC) - - WHERE t.name CONTAINS 'Natural Language Processing' - - MATCH (c:COURSE)-[:TAUGHT_BY]->(pr) - - RETURN DISTINCT c.id AS Course, pr.name AS Professor, t.name AS Topic - - ORDER BY c.id; - - ``` - - - - ### Example(s) - User Questions and Cypher Queries for an Historical Events and Figures - - - **User Question:** - - "List all battles that occurred in the 19th century and the generals who participated in them." - - - **Generated Cypher Query:** - - ```cypher - - MATCH (b:EVENT)-[:HAPPENED_AT]->(d:DATE) - - WHERE d.year >= 1800 AND d.year < 1900 AND b.type CONTAINS 'Battle' - - MATCH (g:PERSON)-[:PARTICIPATED_IN]->(b) - - RETURN b.name AS Battle, d.year AS Year, g.name AS General - - ORDER BY d.year, b.name, g.name; - - ``` - - - **User Question:** - - "Find all treaties signed in Paris and the countries involved." - - - - **Generated Cypher Query:** - - ```cypher - - MATCH (t:EVENT)-[:HAPPENED_AT]->(l:LOCATION) - - WHERE l.name CONTAINS 'Paris' AND t.type CONTAINS 'Treaty' - - MATCH (c:ORGANIZATION)-[:SIGNED]->(t) - - RETURN t.name AS Treaty, l.name AS Location, c.name AS Country - - ORDER BY t.name, c.name; - - ``` - - - - Now, you will be provided with a schema for the entities and relationships in the Neo4j knowledge graph. Use this schema to construct Cypher queries based on user questions. - - - - **Entities:** - - {entity_types} - - - - **Relationships:** - - {relations} - - - Use the referenced examples and schema to help you construct an appropriate Cypher query based on the following question: - - - **User Question:** - - {input} - - - **Generated Cypher Query:** - input_types: - entity_types: str - relations: str - input: str diff --git a/py/core/providers/prompts/r2r_prompts.py b/py/core/providers/prompts/r2r_prompts.py index 0df76cfb8..170356493 100644 --- a/py/core/providers/prompts/r2r_prompts.py +++ b/py/core/providers/prompts/r2r_prompts.py @@ -53,7 +53,7 @@ async def initialize(self): raise def _get_table_name(self, base_name: str) -> str: - return base_name + return self.db_provider._get_table_name(base_name) async def create_table(self): query = f""" @@ -100,9 +100,9 @@ async def fetch_query( # FIXME: We really should be taking advantage of Pydantic models here # so that we don't have to json.dumps/loads all the time async def _load_prompts_from_database(self): - query = """ + query = f""" SELECT prompt_id, name, template, input_types, created_at, updated_at - FROM prompts + FROM {self._get_table_name("prompts")} """ try: results = await self.fetch_query(query) diff --git a/py/core/telemetry/telemetry_decorator.py b/py/core/telemetry/telemetry_decorator.py index ca68e0ae7..85bbc2682 100644 --- a/py/core/telemetry/telemetry_decorator.py +++ b/py/core/telemetry/telemetry_decorator.py @@ -5,6 +5,7 @@ from concurrent.futures import ThreadPoolExecutor from functools import wraps from pathlib import Path +from typing import Optional import toml @@ -70,12 +71,19 @@ def get_project_metadata(): # Create a thread pool with a fixed number of workers -telemetry_thread_pool = ThreadPoolExecutor(max_workers=2) +telemetry_thread_pool: Optional[ThreadPoolExecutor] = None + +if os.getenv("TELEMETRY_ENABLED", "true").lower() in ("true", "1"): + telemetry_thread_pool = ThreadPoolExecutor(max_workers=2) def telemetry_event(event_name): def decorator(func): def log_telemetry(event_type, user_id, metadata, error_message=None): + + if telemetry_thread_pool is None: + return + try: if event_type == "feature": telemetry_client.capture( @@ -99,6 +107,9 @@ def log_telemetry(event_type, user_id, metadata, error_message=None): @wraps(func) async def async_wrapper(*args, **kwargs): + if telemetry_thread_pool is None: + return await func(*args, **kwargs) + metadata = get_project_metadata() user_id = product_telemetry_client.user_id diff --git a/py/poetry.lock b/py/poetry.lock index 2ee565144..488c2c690 100644 --- a/py/poetry.lock +++ b/py/poetry.lock @@ -1,114 +1,114 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" -version = "2.4.0" +version = "2.4.3" description = "Happy Eyeballs for asyncio" optional = true python-versions = ">=3.8" files = [ - {file = "aiohappyeyeballs-2.4.0-py3-none-any.whl", hash = "sha256:7ce92076e249169a13c2f49320d1967425eaf1f407522d707d59cac7628d62bd"}, - {file = "aiohappyeyeballs-2.4.0.tar.gz", hash = "sha256:55a1714f084e63d49639800f95716da97a1f173d46a16dfcfda0016abb93b6b2"}, + {file = "aiohappyeyeballs-2.4.3-py3-none-any.whl", hash = "sha256:8a7a83727b2756f394ab2895ea0765a0a8c475e3c71e98d43d76f22b4b435572"}, + {file = "aiohappyeyeballs-2.4.3.tar.gz", hash = "sha256:75cf88a15106a5002a8eb1dab212525c00d1f4c0fa96e551c9fbe6f09a621586"}, ] [[package]] name = "aiohttp" -version = "3.10.6" +version = "3.10.8" description = "Async http client/server framework (asyncio)" optional = true python-versions = ">=3.8" files = [ - {file = "aiohttp-3.10.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:682836fc672972cc3101cc9e30d49c5f7e8f1d010478d46119fe725a4545acfd"}, - {file = "aiohttp-3.10.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:289fa8a20018d0d5aa9e4b35d899bd51bcb80f0d5f365d9a23e30dac3b79159b"}, - {file = "aiohttp-3.10.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8617c96a20dd57e7e9d398ff9d04f3d11c4d28b1767273a5b1a018ada5a654d3"}, - {file = "aiohttp-3.10.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdbeff1b062751c2a2a55b171f7050fb7073633c699299d042e962aacdbe1a07"}, - {file = "aiohttp-3.10.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ea35d849cdd4a9268f910bff4497baebbc1aa3f2f625fd8ccd9ac99c860c621"}, - {file = "aiohttp-3.10.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:473961b3252f3b949bb84873d6e268fb6d8aa0ccc6eb7404fa58c76a326bb8e1"}, - {file = "aiohttp-3.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d2665c5df629eb2f981dab244c01bfa6cdc185f4ffa026639286c4d56fafb54"}, - {file = "aiohttp-3.10.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:25d92f794f1332f656e3765841fc2b7ad5c26c3f3d01e8949eeb3495691cf9f4"}, - {file = "aiohttp-3.10.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9bd6b2033993d5ae80883bb29b83fb2b432270bbe067c2f53cc73bb57c46065f"}, - {file = "aiohttp-3.10.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:d7f408c43f5e75ea1edc152fb375e8f46ef916f545fb66d4aebcbcfad05e2796"}, - {file = "aiohttp-3.10.6-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:cf8b8560aa965f87bf9c13bf9fed7025993a155ca0ce8422da74bf46d18c2f5f"}, - {file = "aiohttp-3.10.6-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:14477c4e52e2f17437b99893fd220ffe7d7ee41df5ebf931a92b8ca82e6fd094"}, - {file = "aiohttp-3.10.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:fb138fbf9f53928e779650f5ed26d0ea1ed8b2cab67f0ea5d63afa09fdc07593"}, - {file = "aiohttp-3.10.6-cp310-cp310-win32.whl", hash = "sha256:9843d683b8756971797be171ead21511d2215a2d6e3c899c6e3107fbbe826791"}, - {file = "aiohttp-3.10.6-cp310-cp310-win_amd64.whl", hash = "sha256:f8b8e49fe02f744d38352daca1dbef462c3874900bd8166516f6ea8e82b5aacf"}, - {file = "aiohttp-3.10.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f52e54fd776ad0da1006708762213b079b154644db54bcfc62f06eaa5b896402"}, - {file = "aiohttp-3.10.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:995ab1a238fd0d19dc65f2d222e5eb064e409665c6426a3e51d5101c1979ee84"}, - {file = "aiohttp-3.10.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0749c4d5a08a802dd66ecdf59b2df4d76b900004017468a7bb736c3b5a3dd902"}, - {file = "aiohttp-3.10.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e05b39158f2af0e2438cc2075cfc271f4ace0c3cc4a81ec95b27a0432e161951"}, - {file = "aiohttp-3.10.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a9f196c970db2dcde4f24317e06615363349dc357cf4d7a3b0716c20ac6d7bcd"}, - {file = "aiohttp-3.10.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:47647c8af04a70e07a2462931b0eba63146a13affa697afb4ecbab9d03a480ce"}, - {file = "aiohttp-3.10.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:669c0efe7e99f6d94d63274c06344bd0e9c8daf184ce5602a29bc39e00a18720"}, - {file = "aiohttp-3.10.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9721cdd83a994225352ca84cd537760d41a9da3c0eacb3ff534747ab8fba6d0"}, - {file = "aiohttp-3.10.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0b82c8ebed66ce182893e7c0b6b60ba2ace45b1df104feb52380edae266a4850"}, - {file = "aiohttp-3.10.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:b169f8e755e541b72e714b89a831b315bbe70db44e33fead28516c9e13d5f931"}, - {file = "aiohttp-3.10.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:0be3115753baf8b4153e64f9aa7bf6c0c64af57979aa900c31f496301b374570"}, - {file = "aiohttp-3.10.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e1f80cd17d81a404b6e70ef22bfe1870bafc511728397634ad5f5efc8698df56"}, - {file = "aiohttp-3.10.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6419728b08fb6380c66a470d2319cafcec554c81780e2114b7e150329b9a9a7f"}, - {file = "aiohttp-3.10.6-cp311-cp311-win32.whl", hash = "sha256:bd294dcdc1afdc510bb51d35444003f14e327572877d016d576ac3b9a5888a27"}, - {file = "aiohttp-3.10.6-cp311-cp311-win_amd64.whl", hash = "sha256:bf861da9a43d282d6dd9dcd64c23a0fccf2c5aa5cd7c32024513c8c79fb69de3"}, - {file = "aiohttp-3.10.6-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:2708baccdc62f4b1251e59c2aac725936a900081f079b88843dabcab0feeeb27"}, - {file = "aiohttp-3.10.6-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:7475da7a5e2ccf1a1c86c8fee241e277f4874c96564d06f726d8df8e77683ef7"}, - {file = "aiohttp-3.10.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:02108326574ff60267b7b35b17ac5c0bbd0008ccb942ce4c48b657bb90f0b8aa"}, - {file = "aiohttp-3.10.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:029a019627b37fa9eac5c75cc54a6bb722c4ebbf5a54d8c8c0fb4dd8facf2702"}, - {file = "aiohttp-3.10.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8a637d387db6fdad95e293fab5433b775fd104ae6348d2388beaaa60d08b38c4"}, - {file = "aiohttp-3.10.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dc1a16f3fc1944c61290d33c88dc3f09ba62d159b284c38c5331868425aca426"}, - {file = "aiohttp-3.10.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81b292f37969f9cc54f4643f0be7dacabf3612b3b4a65413661cf6c350226787"}, - {file = "aiohttp-3.10.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0754690a3a26e819173a34093798c155bafb21c3c640bff13be1afa1e9d421f9"}, - {file = "aiohttp-3.10.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:164ecd32e65467d86843dbb121a6666c3deb23b460e3f8aefdcaacae79eb718a"}, - {file = "aiohttp-3.10.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:438c5863feb761f7ca3270d48c292c334814459f61cc12bab5ba5b702d7c9e56"}, - {file = "aiohttp-3.10.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ba18573bb1de1063d222f41de64a0d3741223982dcea863b3f74646faf618ec7"}, - {file = "aiohttp-3.10.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:c82a94ddec996413a905f622f3da02c4359952aab8d817c01cf9915419525e95"}, - {file = "aiohttp-3.10.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:92351aa5363fc3c1f872ca763f86730ced32b01607f0c9662b1fa711087968d0"}, - {file = "aiohttp-3.10.6-cp312-cp312-win32.whl", hash = "sha256:3e15e33bfc73fa97c228f72e05e8795e163a693fd5323549f49367c76a6e5883"}, - {file = "aiohttp-3.10.6-cp312-cp312-win_amd64.whl", hash = "sha256:fe517113fe4d35d9072b826c3e147d63c5f808ca8167d450b4f96c520c8a1d8d"}, - {file = "aiohttp-3.10.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:482f74057ea13d387a7549d7a7ecb60e45146d15f3e58a2d93a0ad2d5a8457cd"}, - {file = "aiohttp-3.10.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:03fa40d1450ee5196e843315ddf74a51afc7e83d489dbfc380eecefea74158b1"}, - {file = "aiohttp-3.10.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1e52e59ed5f4cc3a3acfe2a610f8891f216f486de54d95d6600a2c9ba1581f4d"}, - {file = "aiohttp-3.10.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2b3935a22c9e41a8000d90588bed96cf395ef572dbb409be44c6219c61d900d"}, - {file = "aiohttp-3.10.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4bef1480ee50f75abcfcb4b11c12de1005968ca9d0172aec4a5057ba9f2b644f"}, - {file = "aiohttp-3.10.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:671745ea7db19693ce867359d503772177f0b20fa8f6ee1e74e00449f4c4151d"}, - {file = "aiohttp-3.10.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b50b367308ca8c12e0b50cba5773bc9abe64c428d3fd2bbf5cd25aab37c77bf"}, - {file = "aiohttp-3.10.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6a504d7cdb431a777d05a124fd0b21efb94498efa743103ea01b1e3136d2e4fb"}, - {file = "aiohttp-3.10.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:66bc81361131763660b969132a22edce2c4d184978ba39614e8f8f95db5c95f8"}, - {file = "aiohttp-3.10.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:27cf19a38506e2e9f12fc17e55f118f04897b0a78537055d93a9de4bf3022e3d"}, - {file = "aiohttp-3.10.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3468b39f977a11271517c6925b226720e148311039a380cc9117b1e2258a721f"}, - {file = "aiohttp-3.10.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:9d26da22a793dfd424be1050712a70c0afd96345245c29aced1e35dbace03413"}, - {file = "aiohttp-3.10.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:844d48ff9173d0b941abed8b2ea6a412f82b56d9ab1edb918c74000c15839362"}, - {file = "aiohttp-3.10.6-cp313-cp313-win32.whl", hash = "sha256:2dd56e3c43660ed3bea67fd4c5025f1ac1f9ecf6f0b991a6e5efe2e678c490c5"}, - {file = "aiohttp-3.10.6-cp313-cp313-win_amd64.whl", hash = "sha256:c91781d969fbced1993537f45efe1213bd6fccb4b37bfae2a026e20d6fbed206"}, - {file = "aiohttp-3.10.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:4407a80bca3e694f2d2a523058e20e1f9f98a416619e04f6dc09dc910352ac8b"}, - {file = "aiohttp-3.10.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1cb045ec5961f51af3e2c08cd6fe523f07cc6e345033adee711c49b7b91bb954"}, - {file = "aiohttp-3.10.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4fabdcdc781a36b8fd7b2ca9dea8172f29a99e11d00ca0f83ffeb50958da84a1"}, - {file = "aiohttp-3.10.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79a9f42efcc2681790595ab3d03c0e52d01edc23a0973ea09f0dc8d295e12b8e"}, - {file = "aiohttp-3.10.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cca776a440795db437d82c07455761c85bbcf3956221c3c23b8c93176c278ce7"}, - {file = "aiohttp-3.10.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5582de171f0898139cf51dd9fcdc79b848e28d9abd68e837f0803fc9f30807b1"}, - {file = "aiohttp-3.10.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:370e2d47575c53c817ee42a18acc34aad8da4dbdaac0a6c836d58878955f1477"}, - {file = "aiohttp-3.10.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:444d1704e2af6b30766debed9be8a795958029e552fe77551355badb1944012c"}, - {file = "aiohttp-3.10.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:40271a2a375812967401c9ca8077de9368e09a43a964f4dce0ff603301ec9358"}, - {file = "aiohttp-3.10.6-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:f3af26f86863fad12e25395805bb0babbd49d512806af91ec9708a272b696248"}, - {file = "aiohttp-3.10.6-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:4752df44df48fd42b80f51d6a97553b482cda1274d9dc5df214a3a1aa5d8f018"}, - {file = "aiohttp-3.10.6-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:2cd5290ab66cfca2f90045db2cc6434c1f4f9fbf97c9f1c316e785033782e7d2"}, - {file = "aiohttp-3.10.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:3427031064b0d5c95647e6369c4aa3c556402f324a3e18107cb09517abe5f962"}, - {file = "aiohttp-3.10.6-cp38-cp38-win32.whl", hash = "sha256:614fc21e86adc28e4165a6391f851a6da6e9cbd7bb232d0df7718b453a89ee98"}, - {file = "aiohttp-3.10.6-cp38-cp38-win_amd64.whl", hash = "sha256:58c5d7318a136a3874c78717dd6de57519bc64f6363c5827c2b1cb775bea71dd"}, - {file = "aiohttp-3.10.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5db26bbca8e7968c4c977a0c640e0b9ce7224e1f4dcafa57870dc6ee28e27de6"}, - {file = "aiohttp-3.10.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3fb4216e3ec0dbc01db5ba802f02ed78ad8f07121be54eb9e918448cc3f61b7c"}, - {file = "aiohttp-3.10.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a976ef488f26e224079deb3d424f29144c6d5ba4ded313198169a8af8f47fb82"}, - {file = "aiohttp-3.10.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a86610174de8a85a920e956e2d4f9945e7da89f29a00e95ac62a4a414c4ef4e"}, - {file = "aiohttp-3.10.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:217791c6a399cc4f2e6577bb44344cba1f5714a2aebf6a0bea04cfa956658284"}, - {file = "aiohttp-3.10.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ba3662d41abe2eab0eeec7ee56f33ef4e0b34858f38abf24377687f9e1fb00a5"}, - {file = "aiohttp-3.10.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4dfa5ad4bce9ca30a76117fbaa1c1decf41ebb6c18a4e098df44298941566f9"}, - {file = "aiohttp-3.10.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e0009258e97502936d3bd5bf2ced15769629097d0abb81e6495fba1047824fe0"}, - {file = "aiohttp-3.10.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0a75d5c9fb4f06c41d029ae70ad943c3a844c40c0a769d12be4b99b04f473d3d"}, - {file = "aiohttp-3.10.6-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:8198b7c002aae2b40b2d16bfe724b9a90bcbc9b78b2566fc96131ef4e382574d"}, - {file = "aiohttp-3.10.6-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:4611db8c907f90fe86be112efdc2398cd7b4c8eeded5a4f0314b70fdea8feab0"}, - {file = "aiohttp-3.10.6-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:ff99ae06eef85c7a565854826114ced72765832ee16c7e3e766c5e4c5b98d20e"}, - {file = "aiohttp-3.10.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7641920bdcc7cd2d3ddfb8bb9133a6c9536b09dbd49490b79e125180b2d25b93"}, - {file = "aiohttp-3.10.6-cp39-cp39-win32.whl", hash = "sha256:e2e7d5591ea868d5ec82b90bbeb366a198715672841d46281b623e23079593db"}, - {file = "aiohttp-3.10.6-cp39-cp39-win_amd64.whl", hash = "sha256:b504c08c45623bf5c7ca41be380156d925f00199b3970efd758aef4a77645feb"}, - {file = "aiohttp-3.10.6.tar.gz", hash = "sha256:d2578ef941be0c2ba58f6f421a703527d08427237ed45ecb091fed6f83305336"}, + {file = "aiohttp-3.10.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a1ba7bc139592339ddeb62c06486d0fa0f4ca61216e14137a40d626c81faf10c"}, + {file = "aiohttp-3.10.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:85e4d7bd05d18e4b348441e7584c681eff646e3bf38f68b2626807f3add21aa2"}, + {file = "aiohttp-3.10.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:69de056022e7abf69cb9fec795515973cc3eeaff51e3ea8d72a77aa933a91c52"}, + {file = "aiohttp-3.10.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee3587506898d4a404b33bd19689286ccf226c3d44d7a73670c8498cd688e42c"}, + {file = "aiohttp-3.10.8-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fe285a697c851734285369614443451462ce78aac2b77db23567507484b1dc6f"}, + {file = "aiohttp-3.10.8-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10c7932337285a6bfa3a5fe1fd4da90b66ebfd9d0cbd1544402e1202eb9a8c3e"}, + {file = "aiohttp-3.10.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd9716ef0224fe0d0336997eb242f40619f9f8c5c57e66b525a1ebf9f1d8cebe"}, + {file = "aiohttp-3.10.8-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ceacea31f8a55cdba02bc72c93eb2e1b77160e91f8abd605969c168502fd71eb"}, + {file = "aiohttp-3.10.8-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9721554bfa9e15f6e462da304374c2f1baede3cb06008c36c47fa37ea32f1dc4"}, + {file = "aiohttp-3.10.8-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:22cdeb684d8552490dd2697a5138c4ecb46f844892df437aaf94f7eea99af879"}, + {file = "aiohttp-3.10.8-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e56bb7e31c4bc79956b866163170bc89fd619e0581ce813330d4ea46921a4881"}, + {file = "aiohttp-3.10.8-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:3a95d2686bc4794d66bd8de654e41b5339fab542b2bca9238aa63ed5f4f2ce82"}, + {file = "aiohttp-3.10.8-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d82404a0e7b10e0d7f022cf44031b78af8a4f99bd01561ac68f7c24772fed021"}, + {file = "aiohttp-3.10.8-cp310-cp310-win32.whl", hash = "sha256:4e10b04542d27e21538e670156e88766543692a0a883f243ba8fad9ddea82e53"}, + {file = "aiohttp-3.10.8-cp310-cp310-win_amd64.whl", hash = "sha256:680dbcff5adc7f696ccf8bf671d38366a1f620b5616a1d333d0cb33956065395"}, + {file = "aiohttp-3.10.8-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:33a68011a38020ed4ff41ae0dbf4a96a202562ecf2024bdd8f65385f1d07f6ef"}, + {file = "aiohttp-3.10.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6c7efa6616a95e3bd73b8a69691012d2ef1f95f9ea0189e42f338fae080c2fc6"}, + {file = "aiohttp-3.10.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ddb9b9764cfb4459acf01c02d2a59d3e5066b06a846a364fd1749aa168efa2be"}, + {file = "aiohttp-3.10.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c7f270f4ca92760f98a42c45a58674fff488e23b144ec80b1cc6fa2effed377"}, + {file = "aiohttp-3.10.8-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6984dda9d79064361ab58d03f6c1e793ea845c6cfa89ffe1a7b9bb400dfd56bd"}, + {file = "aiohttp-3.10.8-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3f6d47e392c27206701565c8df4cac6ebed28fdf6dcaea5b1eea7a4631d8e6db"}, + {file = "aiohttp-3.10.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a72f89aea712c619b2ca32c6f4335c77125ede27530ad9705f4f349357833695"}, + {file = "aiohttp-3.10.8-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c36074b26f3263879ba8e4dbd33db2b79874a3392f403a70b772701363148b9f"}, + {file = "aiohttp-3.10.8-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e32148b4a745e70a255a1d44b5664de1f2e24fcefb98a75b60c83b9e260ddb5b"}, + {file = "aiohttp-3.10.8-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5aa1a073514cf59c81ad49a4ed9b5d72b2433638cd53160fd2f3a9cfa94718db"}, + {file = "aiohttp-3.10.8-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:d3a79200a9d5e621c4623081ddb25380b713c8cf5233cd11c1aabad990bb9381"}, + {file = "aiohttp-3.10.8-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e45fdfcb2d5bcad83373e4808825b7512953146d147488114575780640665027"}, + {file = "aiohttp-3.10.8-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f78e2a78432c537ae876a93013b7bc0027ba5b93ad7b3463624c4b6906489332"}, + {file = "aiohttp-3.10.8-cp311-cp311-win32.whl", hash = "sha256:f8179855a4e4f3b931cb1764ec87673d3fbdcca2af496c8d30567d7b034a13db"}, + {file = "aiohttp-3.10.8-cp311-cp311-win_amd64.whl", hash = "sha256:ef9b484604af05ca745b6108ca1aaa22ae1919037ae4f93aaf9a37ba42e0b835"}, + {file = "aiohttp-3.10.8-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ab2d6523575fc98896c80f49ac99e849c0b0e69cc80bf864eed6af2ae728a52b"}, + {file = "aiohttp-3.10.8-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f5d5d5401744dda50b943d8764508d0e60cc2d3305ac1e6420935861a9d544bc"}, + {file = "aiohttp-3.10.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de23085cf90911600ace512e909114385026b16324fa203cc74c81f21fd3276a"}, + {file = "aiohttp-3.10.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4618f0d2bf523043866a9ff8458900d8eb0a6d4018f251dae98e5f1fb699f3a8"}, + {file = "aiohttp-3.10.8-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:21c1925541ca84f7b5e0df361c0a813a7d6a56d3b0030ebd4b220b8d232015f9"}, + {file = "aiohttp-3.10.8-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:497a7d20caea8855c5429db3cdb829385467217d7feb86952a6107e033e031b9"}, + {file = "aiohttp-3.10.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c887019dbcb4af58a091a45ccf376fffe800b5531b45c1efccda4bedf87747ea"}, + {file = "aiohttp-3.10.8-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40d2d719c3c36a7a65ed26400e2b45b2d9ed7edf498f4df38b2ae130f25a0d01"}, + {file = "aiohttp-3.10.8-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:57359785f27394a8bcab0da6dcd46706d087dfebf59a8d0ad2e64a4bc2f6f94f"}, + {file = "aiohttp-3.10.8-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a961ee6f2cdd1a2be4735333ab284691180d40bad48f97bb598841bfcbfb94ec"}, + {file = "aiohttp-3.10.8-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:fe3d79d6af839ffa46fdc5d2cf34295390894471e9875050eafa584cb781508d"}, + {file = "aiohttp-3.10.8-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9a281cba03bdaa341c70b7551b2256a88d45eead149f48b75a96d41128c240b3"}, + {file = "aiohttp-3.10.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c6769d71bfb1ed60321363a9bc05e94dcf05e38295ef41d46ac08919e5b00d19"}, + {file = "aiohttp-3.10.8-cp312-cp312-win32.whl", hash = "sha256:a3081246bab4d419697ee45e555cef5cd1def7ac193dff6f50be761d2e44f194"}, + {file = "aiohttp-3.10.8-cp312-cp312-win_amd64.whl", hash = "sha256:ab1546fc8e00676febc81c548a876c7bde32f881b8334b77f84719ab2c7d28dc"}, + {file = "aiohttp-3.10.8-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:b1a012677b8e0a39e181e218de47d6741c5922202e3b0b65e412e2ce47c39337"}, + {file = "aiohttp-3.10.8-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2df786c96c57cd6b87156ba4c5f166af7b88f3fc05f9d592252fdc83d8615a3c"}, + {file = "aiohttp-3.10.8-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8885ca09d3a9317219c0831276bfe26984b17b2c37b7bf70dd478d17092a4772"}, + {file = "aiohttp-3.10.8-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4dbf252ac19860e0ab56cd480d2805498f47c5a2d04f5995d8d8a6effd04b48c"}, + {file = "aiohttp-3.10.8-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b2036479b6b94afaaca7d07b8a68dc0e67b0caf5f6293bb6a5a1825f5923000"}, + {file = "aiohttp-3.10.8-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:365783e1b7c40b59ed4ce2b5a7491bae48f41cd2c30d52647a5b1ee8604c68ad"}, + {file = "aiohttp-3.10.8-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:270e653b5a4b557476a1ed40e6b6ce82f331aab669620d7c95c658ef976c9c5e"}, + {file = "aiohttp-3.10.8-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8960fabc20bfe4fafb941067cda8e23c8c17c98c121aa31c7bf0cdab11b07842"}, + {file = "aiohttp-3.10.8-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f21e8f2abed9a44afc3d15bba22e0dfc71e5fa859bea916e42354c16102b036f"}, + {file = "aiohttp-3.10.8-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:fecd55e7418fabd297fd836e65cbd6371aa4035a264998a091bbf13f94d9c44d"}, + {file = "aiohttp-3.10.8-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:badb51d851358cd7535b647bb67af4854b64f3c85f0d089c737f75504d5910ec"}, + {file = "aiohttp-3.10.8-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:e860985f30f3a015979e63e7ba1a391526cdac1b22b7b332579df7867848e255"}, + {file = "aiohttp-3.10.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:71462f8eeca477cbc0c9700a9464e3f75f59068aed5e9d4a521a103692da72dc"}, + {file = "aiohttp-3.10.8-cp313-cp313-win32.whl", hash = "sha256:177126e971782769b34933e94fddd1089cef0fe6b82fee8a885e539f5b0f0c6a"}, + {file = "aiohttp-3.10.8-cp313-cp313-win_amd64.whl", hash = "sha256:98a4eb60e27033dee9593814ca320ee8c199489fbc6b2699d0f710584db7feb7"}, + {file = "aiohttp-3.10.8-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ffef3d763e4c8fc97e740da5b4d0f080b78630a3914f4e772a122bbfa608c1db"}, + {file = "aiohttp-3.10.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:597128cb7bc5f068181b49a732961f46cb89f85686206289d6ccb5e27cb5fbe2"}, + {file = "aiohttp-3.10.8-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f23a6c1d09de5de89a33c9e9b229106cb70dcfdd55e81a3a3580eaadaa32bc92"}, + {file = "aiohttp-3.10.8-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da57af0c54a302b7c655fa1ccd5b1817a53739afa39924ef1816e7b7c8a07ccb"}, + {file = "aiohttp-3.10.8-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e7a6af57091056a79a35104d6ec29d98ec7f1fb7270ad9c6fff871b678d1ff8"}, + {file = "aiohttp-3.10.8-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:32710d6b3b6c09c60c794d84ca887a3a2890131c0b02b3cefdcc6709a2260a7c"}, + {file = "aiohttp-3.10.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b91f4f62ad39a8a42d511d66269b46cb2fb7dea9564c21ab6c56a642d28bff5"}, + {file = "aiohttp-3.10.8-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:471a8c47344b9cc309558b3fcc469bd2c12b49322b4b31eb386c4a2b2d44e44a"}, + {file = "aiohttp-3.10.8-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:fc0e7f91705445d79beafba9bb3057dd50830e40fe5417017a76a214af54e122"}, + {file = "aiohttp-3.10.8-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:85431c9131a9a0f65260dc7a65c800ca5eae78c4c9931618f18c8e0933a0e0c1"}, + {file = "aiohttp-3.10.8-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:b91557ee0893da52794b25660d4f57bb519bcad8b7df301acd3898f7197c5d81"}, + {file = "aiohttp-3.10.8-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:4954e6b06dd0be97e1a5751fc606be1f9edbdc553c5d9b57d72406a8fbd17f9d"}, + {file = "aiohttp-3.10.8-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:a087c84b4992160ffef7afd98ef24177c8bd4ad61c53607145a8377457385100"}, + {file = "aiohttp-3.10.8-cp38-cp38-win32.whl", hash = "sha256:e1f0f7b27171b2956a27bd8f899751d0866ddabdd05cbddf3520f945130a908c"}, + {file = "aiohttp-3.10.8-cp38-cp38-win_amd64.whl", hash = "sha256:c4916070e12ae140110aa598031876c1bf8676a36a750716ea0aa5bd694aa2e7"}, + {file = "aiohttp-3.10.8-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5284997e3d88d0dfb874c43e51ae8f4a6f4ca5b90dcf22995035187253d430db"}, + {file = "aiohttp-3.10.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9443d9ebc5167ce1fbb552faf2d666fb22ef5716a8750be67efd140a7733738c"}, + {file = "aiohttp-3.10.8-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b667e2a03407d79a76c618dc30cedebd48f082d85880d0c9c4ec2faa3e10f43e"}, + {file = "aiohttp-3.10.8-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98fae99d5c2146f254b7806001498e6f9ffb0e330de55a35e72feb7cb2fa399b"}, + {file = "aiohttp-3.10.8-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8296edd99d0dd9d0eb8b9e25b3b3506eef55c1854e9cc230f0b3f885f680410b"}, + {file = "aiohttp-3.10.8-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1ce46dfb49cfbf9e92818be4b761d4042230b1f0e05ffec0aad15b3eb162b905"}, + {file = "aiohttp-3.10.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c38cfd355fd86c39b2d54651bd6ed7d63d4fe3b5553f364bae3306e2445f847"}, + {file = "aiohttp-3.10.8-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:713dff3f87ceec3bde4f3f484861464e722cf7533f9fa6b824ec82bb5a9010a7"}, + {file = "aiohttp-3.10.8-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:21a72f4a9c69a8567a0aca12042f12bba25d3139fd5dd8eeb9931f4d9e8599cd"}, + {file = "aiohttp-3.10.8-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:6d1ad868624f6cea77341ef2877ad4e71f7116834a6cd7ec36ec5c32f94ee6ae"}, + {file = "aiohttp-3.10.8-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:a78ba86d5a08207d1d1ad10b97aed6ea48b374b3f6831d02d0b06545ac0f181e"}, + {file = "aiohttp-3.10.8-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:aff048793d05e1ce05b62e49dccf81fe52719a13f4861530706619506224992b"}, + {file = "aiohttp-3.10.8-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d088ca05381fd409793571d8e34eca06daf41c8c50a05aeed358d2d340c7af81"}, + {file = "aiohttp-3.10.8-cp39-cp39-win32.whl", hash = "sha256:ee97c4e54f457c366e1f76fbbf3e8effee9de57dae671084a161c00f481106ce"}, + {file = "aiohttp-3.10.8-cp39-cp39-win_amd64.whl", hash = "sha256:d95ae4420669c871667aad92ba8cce6251d61d79c1a38504621094143f94a8b4"}, + {file = "aiohttp-3.10.8.tar.gz", hash = "sha256:21f8225f7dc187018e8433c9326be01477fb2810721e048b33ac49091b19fb4a"}, ] [package.dependencies] @@ -216,6 +216,20 @@ doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.21.0b1)"] trio = ["trio (>=0.26.1)"] +[[package]] +name = "anytree" +version = "2.12.1" +description = "Powerful and Lightweight Python Tree Data Structure with various plugins" +optional = true +python-versions = ">=3.7.2,<4" +files = [ + {file = "anytree-2.12.1-py3-none-any.whl", hash = "sha256:5ea9e61caf96db1e5b3d0a914378d2cd83c269dfce1fb8242ce96589fa3382f0"}, + {file = "anytree-2.12.1.tar.gz", hash = "sha256:244def434ccf31b668ed282954e5d315b4e066c4940b94aff4a7962d85947830"}, +] + +[package.dependencies] +six = "*" + [[package]] name = "apscheduler" version = "3.10.4" @@ -346,6 +360,23 @@ docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphi tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] +[[package]] +name = "autograd" +version = "1.7.0" +description = "Efficiently computes derivatives of NumPy code." +optional = true +python-versions = ">=3.8" +files = [ + {file = "autograd-1.7.0-py3-none-any.whl", hash = "sha256:49680300f842f3a8722b060ac0d3ed7aca071d1ad4d3d38c9fdadafdcc73c30b"}, + {file = "autograd-1.7.0.tar.gz", hash = "sha256:de743fd368d6df523cd37305dcd171861a9752a144493677d2c9f5a56983ff2f"}, +] + +[package.dependencies] +numpy = "*" + +[package.extras] +scipy = ["scipy"] + [[package]] name = "backoff" version = "2.2.1" @@ -397,11 +428,29 @@ files = [ tests = ["pytest (>=3.2.1,!=3.3.0)"] typecheck = ["mypy"] +[[package]] +name = "beartype" +version = "0.18.5" +description = "Unbearably fast runtime type checking in pure Python." +optional = true +python-versions = ">=3.8.0" +files = [ + {file = "beartype-0.18.5-py3-none-any.whl", hash = "sha256:5301a14f2a9a5540fe47ec6d34d758e9cd8331d36c4760fc7a5499ab86310089"}, + {file = "beartype-0.18.5.tar.gz", hash = "sha256:264ddc2f1da9ec94ff639141fbe33d22e12a9f75aa863b83b7046ffff1381927"}, +] + +[package.extras] +all = ["typing-extensions (>=3.10.0.0)"] +dev = ["autoapi (>=0.9.0)", "coverage (>=5.5)", "equinox", "mypy (>=0.800)", "numpy", "pandera", "pydata-sphinx-theme (<=0.7.2)", "pytest (>=4.0.0)", "sphinx", "sphinx (>=4.2.0,<6.0.0)", "sphinxext-opengraph (>=0.7.5)", "tox (>=3.20.1)", "typing-extensions (>=3.10.0.0)"] +doc-rtd = ["autoapi (>=0.9.0)", "pydata-sphinx-theme (<=0.7.2)", "sphinx (>=4.2.0,<6.0.0)", "sphinxext-opengraph (>=0.7.5)"] +test-tox = ["equinox", "mypy (>=0.800)", "numpy", "pandera", "pytest (>=4.0.0)", "sphinx", "typing-extensions (>=3.10.0.0)"] +test-tox-coverage = ["coverage (>=5.5)"] + [[package]] name = "beautifulsoup4" version = "4.12.3" description = "Screen-scraping library" -optional = true +optional = false python-versions = ">=3.6.0" files = [ {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, @@ -466,17 +515,17 @@ uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "boto3" -version = "1.35.27" +version = "1.35.31" description = "The AWS SDK for Python" optional = true python-versions = ">=3.8" files = [ - {file = "boto3-1.35.27-py3-none-any.whl", hash = "sha256:3da139ca038032e92086e26d23833b557f0c257520162bfd3d6f580bf8032c86"}, - {file = "boto3-1.35.27.tar.gz", hash = "sha256:10d0fe15670b83a3f26572ab20d9152a064cee4c54b5ea9a1eeb1f0c3b807a7b"}, + {file = "boto3-1.35.31-py3-none-any.whl", hash = "sha256:2e9af74d10d8af7610a8d8468d2914961f116912a024fce17351825260385a52"}, + {file = "boto3-1.35.31.tar.gz", hash = "sha256:8c593af260c4ea3eb6f079c09908f94494ca2222aa4e40a7ff490fab1cee8b39"}, ] [package.dependencies] -botocore = ">=1.35.27,<1.36.0" +botocore = ">=1.35.31,<1.36.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -485,13 +534,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.35.27" +version = "1.35.31" description = "Low-level, data-driven core of boto 3." optional = true python-versions = ">=3.8" files = [ - {file = "botocore-1.35.27-py3-none-any.whl", hash = "sha256:c299c70b5330a8634e032883ce8a72c2c6d9fdbc985d8191199cb86b92e7cbbd"}, - {file = "botocore-1.35.27.tar.gz", hash = "sha256:f68875c26cd57a9d22c0f7a981ecb1636d7ce4d0e35797e04765b53e7bfed3e7"}, + {file = "botocore-1.35.31-py3-none-any.whl", hash = "sha256:4cee814875bc78656aef4011d3d6b2231e96f53ea3661ee428201afb579d5c31"}, + {file = "botocore-1.35.31.tar.gz", hash = "sha256:f7bfa910cf2cbcc8c2307c1cf7b93495d614c2d699883417893e0a337fe4eb63"}, ] [package.dependencies] @@ -502,6 +551,20 @@ urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version > [package.extras] crt = ["awscrt (==0.21.5)"] +[[package]] +name = "bs4" +version = "0.0.2" +description = "Dummy package for Beautiful Soup (beautifulsoup4)" +optional = false +python-versions = "*" +files = [ + {file = "bs4-0.0.2-py2.py3-none-any.whl", hash = "sha256:abf8742c0805ef7f662dce4b51cca104cffe52b835238afc169142ab9b3fbccc"}, + {file = "bs4-0.0.2.tar.gz", hash = "sha256:a48685c58f50fe127722417bae83fe6badf500d54b55f7e39ffe43b798653925"}, +] + +[package.dependencies] +beautifulsoup4 = "*" + [[package]] name = "certifi" version = "2024.8.30" @@ -517,7 +580,7 @@ files = [ name = "cffi" version = "1.17.1" description = "Foreign Function Interface for Python calling C code." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"}, @@ -742,6 +805,90 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "contourpy" +version = "1.3.0" +description = "Python library for calculating contours of 2D quadrilateral grids" +optional = true +python-versions = ">=3.9" +files = [ + {file = "contourpy-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:880ea32e5c774634f9fcd46504bf9f080a41ad855f4fef54f5380f5133d343c7"}, + {file = "contourpy-1.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:76c905ef940a4474a6289c71d53122a4f77766eef23c03cd57016ce19d0f7b42"}, + {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92f8557cbb07415a4d6fa191f20fd9d2d9eb9c0b61d1b2f52a8926e43c6e9af7"}, + {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:36f965570cff02b874773c49bfe85562b47030805d7d8360748f3eca570f4cab"}, + {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cacd81e2d4b6f89c9f8a5b69b86490152ff39afc58a95af002a398273e5ce589"}, + {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69375194457ad0fad3a839b9e29aa0b0ed53bb54db1bfb6c3ae43d111c31ce41"}, + {file = "contourpy-1.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7a52040312b1a858b5e31ef28c2e865376a386c60c0e248370bbea2d3f3b760d"}, + {file = "contourpy-1.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3faeb2998e4fcb256542e8a926d08da08977f7f5e62cf733f3c211c2a5586223"}, + {file = "contourpy-1.3.0-cp310-cp310-win32.whl", hash = "sha256:36e0cff201bcb17a0a8ecc7f454fe078437fa6bda730e695a92f2d9932bd507f"}, + {file = "contourpy-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:87ddffef1dbe5e669b5c2440b643d3fdd8622a348fe1983fad7a0f0ccb1cd67b"}, + {file = "contourpy-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0fa4c02abe6c446ba70d96ece336e621efa4aecae43eaa9b030ae5fb92b309ad"}, + {file = "contourpy-1.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:834e0cfe17ba12f79963861e0f908556b2cedd52e1f75e6578801febcc6a9f49"}, + {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbc4c3217eee163fa3984fd1567632b48d6dfd29216da3ded3d7b844a8014a66"}, + {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4865cd1d419e0c7a7bf6de1777b185eebdc51470800a9f42b9e9decf17762081"}, + {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:303c252947ab4b14c08afeb52375b26781ccd6a5ccd81abcdfc1fafd14cf93c1"}, + {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:637f674226be46f6ba372fd29d9523dd977a291f66ab2a74fbeb5530bb3f445d"}, + {file = "contourpy-1.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:76a896b2f195b57db25d6b44e7e03f221d32fe318d03ede41f8b4d9ba1bff53c"}, + {file = "contourpy-1.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e1fd23e9d01591bab45546c089ae89d926917a66dceb3abcf01f6105d927e2cb"}, + {file = "contourpy-1.3.0-cp311-cp311-win32.whl", hash = "sha256:d402880b84df3bec6eab53cd0cf802cae6a2ef9537e70cf75e91618a3801c20c"}, + {file = "contourpy-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:6cb6cc968059db9c62cb35fbf70248f40994dfcd7aa10444bbf8b3faeb7c2d67"}, + {file = "contourpy-1.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:570ef7cf892f0afbe5b2ee410c507ce12e15a5fa91017a0009f79f7d93a1268f"}, + {file = "contourpy-1.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:da84c537cb8b97d153e9fb208c221c45605f73147bd4cadd23bdae915042aad6"}, + {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0be4d8425bfa755e0fd76ee1e019636ccc7c29f77a7c86b4328a9eb6a26d0639"}, + {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c0da700bf58f6e0b65312d0a5e695179a71d0163957fa381bb3c1f72972537c"}, + {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eb8b141bb00fa977d9122636b16aa67d37fd40a3d8b52dd837e536d64b9a4d06"}, + {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3634b5385c6716c258d0419c46d05c8aa7dc8cb70326c9a4fb66b69ad2b52e09"}, + {file = "contourpy-1.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0dce35502151b6bd35027ac39ba6e5a44be13a68f55735c3612c568cac3805fd"}, + {file = "contourpy-1.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aea348f053c645100612b333adc5983d87be69acdc6d77d3169c090d3b01dc35"}, + {file = "contourpy-1.3.0-cp312-cp312-win32.whl", hash = "sha256:90f73a5116ad1ba7174341ef3ea5c3150ddf20b024b98fb0c3b29034752c8aeb"}, + {file = "contourpy-1.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:b11b39aea6be6764f84360fce6c82211a9db32a7c7de8fa6dd5397cf1d079c3b"}, + {file = "contourpy-1.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3e1c7fa44aaae40a2247e2e8e0627f4bea3dd257014764aa644f319a5f8600e3"}, + {file = "contourpy-1.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:364174c2a76057feef647c802652f00953b575723062560498dc7930fc9b1cb7"}, + {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32b238b3b3b649e09ce9aaf51f0c261d38644bdfa35cbaf7b263457850957a84"}, + {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d51fca85f9f7ad0b65b4b9fe800406d0d77017d7270d31ec3fb1cc07358fdea0"}, + {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:732896af21716b29ab3e988d4ce14bc5133733b85956316fb0c56355f398099b"}, + {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d73f659398a0904e125280836ae6f88ba9b178b2fed6884f3b1f95b989d2c8da"}, + {file = "contourpy-1.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c6c7c2408b7048082932cf4e641fa3b8ca848259212f51c8c59c45aa7ac18f14"}, + {file = "contourpy-1.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f317576606de89da6b7e0861cf6061f6146ead3528acabff9236458a6ba467f8"}, + {file = "contourpy-1.3.0-cp313-cp313-win32.whl", hash = "sha256:31cd3a85dbdf1fc002280c65caa7e2b5f65e4a973fcdf70dd2fdcb9868069294"}, + {file = "contourpy-1.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:4553c421929ec95fb07b3aaca0fae668b2eb5a5203d1217ca7c34c063c53d087"}, + {file = "contourpy-1.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:345af746d7766821d05d72cb8f3845dfd08dd137101a2cb9b24de277d716def8"}, + {file = "contourpy-1.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3bb3808858a9dc68f6f03d319acd5f1b8a337e6cdda197f02f4b8ff67ad2057b"}, + {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:420d39daa61aab1221567b42eecb01112908b2cab7f1b4106a52caaec8d36973"}, + {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4d63ee447261e963af02642ffcb864e5a2ee4cbfd78080657a9880b8b1868e18"}, + {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:167d6c890815e1dac9536dca00828b445d5d0df4d6a8c6adb4a7ec3166812fa8"}, + {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:710a26b3dc80c0e4febf04555de66f5fd17e9cf7170a7b08000601a10570bda6"}, + {file = "contourpy-1.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:75ee7cb1a14c617f34a51d11fa7524173e56551646828353c4af859c56b766e2"}, + {file = "contourpy-1.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:33c92cdae89ec5135d036e7218e69b0bb2851206077251f04a6c4e0e21f03927"}, + {file = "contourpy-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a11077e395f67ffc2c44ec2418cfebed032cd6da3022a94fc227b6faf8e2acb8"}, + {file = "contourpy-1.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e8134301d7e204c88ed7ab50028ba06c683000040ede1d617298611f9dc6240c"}, + {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e12968fdfd5bb45ffdf6192a590bd8ddd3ba9e58360b29683c6bb71a7b41edca"}, + {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fd2a0fc506eccaaa7595b7e1418951f213cf8255be2600f1ea1b61e46a60c55f"}, + {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4cfb5c62ce023dfc410d6059c936dcf96442ba40814aefbfa575425a3a7f19dc"}, + {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68a32389b06b82c2fdd68276148d7b9275b5f5cf13e5417e4252f6d1a34f72a2"}, + {file = "contourpy-1.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:94e848a6b83da10898cbf1311a815f770acc9b6a3f2d646f330d57eb4e87592e"}, + {file = "contourpy-1.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d78ab28a03c854a873787a0a42254a0ccb3cb133c672f645c9f9c8f3ae9d0800"}, + {file = "contourpy-1.3.0-cp39-cp39-win32.whl", hash = "sha256:81cb5ed4952aae6014bc9d0421dec7c5835c9c8c31cdf51910b708f548cf58e5"}, + {file = "contourpy-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:14e262f67bd7e6eb6880bc564dcda30b15e351a594657e55b7eec94b6ef72843"}, + {file = "contourpy-1.3.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:fe41b41505a5a33aeaed2a613dccaeaa74e0e3ead6dd6fd3a118fb471644fd6c"}, + {file = "contourpy-1.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eca7e17a65f72a5133bdbec9ecf22401c62bcf4821361ef7811faee695799779"}, + {file = "contourpy-1.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:1ec4dc6bf570f5b22ed0d7efba0dfa9c5b9e0431aeea7581aa217542d9e809a4"}, + {file = "contourpy-1.3.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:00ccd0dbaad6d804ab259820fa7cb0b8036bda0686ef844d24125d8287178ce0"}, + {file = "contourpy-1.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ca947601224119117f7c19c9cdf6b3ab54c5726ef1d906aa4a69dfb6dd58102"}, + {file = "contourpy-1.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c6ec93afeb848a0845a18989da3beca3eec2c0f852322efe21af1931147d12cb"}, + {file = "contourpy-1.3.0.tar.gz", hash = "sha256:7ffa0db17717a8ffb127efd0c95a4362d996b892c2904db72428d5b52e1938a4"}, +] + +[package.dependencies] +numpy = ">=1.23" + +[package.extras] +bokeh = ["bokeh", "selenium"] +docs = ["furo", "sphinx (>=7.2)", "sphinx-copybutton"] +mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.11.1)", "types-Pillow"] +test = ["Pillow", "contourpy[test-no-images]", "matplotlib"] +test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist", "wurlitzer"] + [[package]] name = "coverage" version = "7.6.1" @@ -833,7 +980,7 @@ toml = ["tomli"] name = "cryptography" version = "43.0.1" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "cryptography-43.0.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:8385d98f6a3bf8bb2d65a73e17ed87a3ba84f6991c155691c51112075f9ffc5d"}, @@ -878,11 +1025,26 @@ ssh = ["bcrypt (>=3.1.5)"] test = ["certifi", "cryptography-vectors (==43.0.1)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] test-randomorder = ["pytest-randomly"] +[[package]] +name = "cycler" +version = "0.12.1" +description = "Composable style cycles" +optional = true +python-versions = ">=3.8" +files = [ + {file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"}, + {file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"}, +] + +[package.extras] +docs = ["ipython", "matplotlib", "numpydoc", "sphinx"] +tests = ["pytest", "pytest-cov", "pytest-xdist"] + [[package]] name = "dataclasses-json" version = "0.6.7" description = "Easily serialize dataclasses to and from JSON." -optional = true +optional = false python-versions = "<4.0,>=3.7" files = [ {file = "dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a"}, @@ -897,7 +1059,7 @@ typing-inspect = ">=0.4.0,<1" name = "deepdiff" version = "7.0.1" description = "Deep Difference and Search of any Python object/data. Recreate objects by adding adding deltas to each other." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "deepdiff-7.0.1-py3-none-any.whl", hash = "sha256:447760081918216aa4fd4ca78a4b6a848b81307b2ea94c810255334b759e1dc3"}, @@ -1106,6 +1268,77 @@ typing_extensions = ">=4" [package.extras] dev = ["black", "mypy", "pre-commit", "pylint", "pytest", "pytest-benchmark", "pytest-cov"] +[[package]] +name = "fonttools" +version = "4.54.1" +description = "Tools to manipulate font files" +optional = true +python-versions = ">=3.8" +files = [ + {file = "fonttools-4.54.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7ed7ee041ff7b34cc62f07545e55e1468808691dddfd315d51dd82a6b37ddef2"}, + {file = "fonttools-4.54.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:41bb0b250c8132b2fcac148e2e9198e62ff06f3cc472065dff839327945c5882"}, + {file = "fonttools-4.54.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7965af9b67dd546e52afcf2e38641b5be956d68c425bef2158e95af11d229f10"}, + {file = "fonttools-4.54.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:278913a168f90d53378c20c23b80f4e599dca62fbffae4cc620c8eed476b723e"}, + {file = "fonttools-4.54.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0e88e3018ac809b9662615072dcd6b84dca4c2d991c6d66e1970a112503bba7e"}, + {file = "fonttools-4.54.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4aa4817f0031206e637d1e685251ac61be64d1adef111060df84fdcbc6ab6c44"}, + {file = "fonttools-4.54.1-cp310-cp310-win32.whl", hash = "sha256:7e3b7d44e18c085fd8c16dcc6f1ad6c61b71ff463636fcb13df7b1b818bd0c02"}, + {file = "fonttools-4.54.1-cp310-cp310-win_amd64.whl", hash = "sha256:dd9cc95b8d6e27d01e1e1f1fae8559ef3c02c76317da650a19047f249acd519d"}, + {file = "fonttools-4.54.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5419771b64248484299fa77689d4f3aeed643ea6630b2ea750eeab219588ba20"}, + {file = "fonttools-4.54.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:301540e89cf4ce89d462eb23a89464fef50915255ece765d10eee8b2bf9d75b2"}, + {file = "fonttools-4.54.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76ae5091547e74e7efecc3cbf8e75200bc92daaeb88e5433c5e3e95ea8ce5aa7"}, + {file = "fonttools-4.54.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82834962b3d7c5ca98cb56001c33cf20eb110ecf442725dc5fdf36d16ed1ab07"}, + {file = "fonttools-4.54.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d26732ae002cc3d2ecab04897bb02ae3f11f06dd7575d1df46acd2f7c012a8d8"}, + {file = "fonttools-4.54.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:58974b4987b2a71ee08ade1e7f47f410c367cdfc5a94fabd599c88165f56213a"}, + {file = "fonttools-4.54.1-cp311-cp311-win32.whl", hash = "sha256:ab774fa225238986218a463f3fe151e04d8c25d7de09df7f0f5fce27b1243dbc"}, + {file = "fonttools-4.54.1-cp311-cp311-win_amd64.whl", hash = "sha256:07e005dc454eee1cc60105d6a29593459a06321c21897f769a281ff2d08939f6"}, + {file = "fonttools-4.54.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:54471032f7cb5fca694b5f1a0aaeba4af6e10ae989df408e0216f7fd6cdc405d"}, + {file = "fonttools-4.54.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fa92cb248e573daab8d032919623cc309c005086d743afb014c836636166f08"}, + {file = "fonttools-4.54.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a911591200114969befa7f2cb74ac148bce5a91df5645443371aba6d222e263"}, + {file = "fonttools-4.54.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93d458c8a6a354dc8b48fc78d66d2a8a90b941f7fec30e94c7ad9982b1fa6bab"}, + {file = "fonttools-4.54.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5eb2474a7c5be8a5331146758debb2669bf5635c021aee00fd7c353558fc659d"}, + {file = "fonttools-4.54.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c9c563351ddc230725c4bdf7d9e1e92cbe6ae8553942bd1fb2b2ff0884e8b714"}, + {file = "fonttools-4.54.1-cp312-cp312-win32.whl", hash = "sha256:fdb062893fd6d47b527d39346e0c5578b7957dcea6d6a3b6794569370013d9ac"}, + {file = "fonttools-4.54.1-cp312-cp312-win_amd64.whl", hash = "sha256:e4564cf40cebcb53f3dc825e85910bf54835e8a8b6880d59e5159f0f325e637e"}, + {file = "fonttools-4.54.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6e37561751b017cf5c40fce0d90fd9e8274716de327ec4ffb0df957160be3bff"}, + {file = "fonttools-4.54.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:357cacb988a18aace66e5e55fe1247f2ee706e01debc4b1a20d77400354cddeb"}, + {file = "fonttools-4.54.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8e953cc0bddc2beaf3a3c3b5dd9ab7554677da72dfaf46951e193c9653e515a"}, + {file = "fonttools-4.54.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:58d29b9a294573d8319f16f2f79e42428ba9b6480442fa1836e4eb89c4d9d61c"}, + {file = "fonttools-4.54.1-cp313-cp313-win32.whl", hash = "sha256:9ef1b167e22709b46bf8168368b7b5d3efeaaa746c6d39661c1b4405b6352e58"}, + {file = "fonttools-4.54.1-cp313-cp313-win_amd64.whl", hash = "sha256:262705b1663f18c04250bd1242b0515d3bbae177bee7752be67c979b7d47f43d"}, + {file = "fonttools-4.54.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ed2f80ca07025551636c555dec2b755dd005e2ea8fbeb99fc5cdff319b70b23b"}, + {file = "fonttools-4.54.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9dc080e5a1c3b2656caff2ac2633d009b3a9ff7b5e93d0452f40cd76d3da3b3c"}, + {file = "fonttools-4.54.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d152d1be65652fc65e695e5619e0aa0982295a95a9b29b52b85775243c06556"}, + {file = "fonttools-4.54.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8583e563df41fdecef31b793b4dd3af8a9caa03397be648945ad32717a92885b"}, + {file = "fonttools-4.54.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:0d1d353ef198c422515a3e974a1e8d5b304cd54a4c2eebcae708e37cd9eeffb1"}, + {file = "fonttools-4.54.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:fda582236fee135d4daeca056c8c88ec5f6f6d88a004a79b84a02547c8f57386"}, + {file = "fonttools-4.54.1-cp38-cp38-win32.whl", hash = "sha256:e7d82b9e56716ed32574ee106cabca80992e6bbdcf25a88d97d21f73a0aae664"}, + {file = "fonttools-4.54.1-cp38-cp38-win_amd64.whl", hash = "sha256:ada215fd079e23e060157aab12eba0d66704316547f334eee9ff26f8c0d7b8ab"}, + {file = "fonttools-4.54.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f5b8a096e649768c2f4233f947cf9737f8dbf8728b90e2771e2497c6e3d21d13"}, + {file = "fonttools-4.54.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4e10d2e0a12e18f4e2dd031e1bf7c3d7017be5c8dbe524d07706179f355c5dac"}, + {file = "fonttools-4.54.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31c32d7d4b0958600eac75eaf524b7b7cb68d3a8c196635252b7a2c30d80e986"}, + {file = "fonttools-4.54.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c39287f5c8f4a0c5a55daf9eaf9ccd223ea59eed3f6d467133cc727d7b943a55"}, + {file = "fonttools-4.54.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a7a310c6e0471602fe3bf8efaf193d396ea561486aeaa7adc1f132e02d30c4b9"}, + {file = "fonttools-4.54.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d3b659d1029946f4ff9b6183984578041b520ce0f8fb7078bb37ec7445806b33"}, + {file = "fonttools-4.54.1-cp39-cp39-win32.whl", hash = "sha256:e96bc94c8cda58f577277d4a71f51c8e2129b8b36fd05adece6320dd3d57de8a"}, + {file = "fonttools-4.54.1-cp39-cp39-win_amd64.whl", hash = "sha256:e8a4b261c1ef91e7188a30571be6ad98d1c6d9fa2427244c545e2fa0a2494dd7"}, + {file = "fonttools-4.54.1-py3-none-any.whl", hash = "sha256:37cddd62d83dc4f72f7c3f3c2bcf2697e89a30efb152079896544a93907733bd"}, + {file = "fonttools-4.54.1.tar.gz", hash = "sha256:957f669d4922f92c171ba01bef7f29410668db09f6c02111e22b2bce446f3285"}, +] + +[package.extras] +all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "pycairo", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0)", "xattr", "zopfli (>=0.1.4)"] +graphite = ["lz4 (>=1.7.4.2)"] +interpolatable = ["munkres", "pycairo", "scipy"] +lxml = ["lxml (>=4.0)"] +pathops = ["skia-pathops (>=0.5.0)"] +plot = ["matplotlib"] +repacker = ["uharfbuzz (>=0.23.0)"] +symfont = ["sympy"] +type1 = ["xattr"] +ufo = ["fs (>=2.2.0,<3)"] +unicode = ["unicodedata2 (>=15.1.0)"] +woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] + [[package]] name = "frozenlist" version = "1.4.1" @@ -1242,21 +1475,109 @@ files = [ {file = "future-1.0.0.tar.gz", hash = "sha256:bd2968309307861edae1458a4f8a4f3598c03be43b97521076aebf5d94c07b05"}, ] +[[package]] +name = "gensim" +version = "4.3.3" +description = "Python framework for fast Vector Space Modelling" +optional = true +python-versions = ">=3.8" +files = [ + {file = "gensim-4.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4e72840adfbea35c5804fd559bc0cb6bc9f439926220a37d852b7ce76eb325c1"}, + {file = "gensim-4.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4019263c9d9afae7c669f880c17e09461e77a71afce04ed4d79cf71a4cad2848"}, + {file = "gensim-4.3.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dea62d3e2ada547687bde6cbba37efa50b534db77e9d44fd5802676bb072c9d9"}, + {file = "gensim-4.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fac93ef5e44982defef9d3c1e4cd00245506b8a29cec19ec5e00f0221b8144c"}, + {file = "gensim-4.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:7c3409f755fb8d62da99cea65e7a40a99d21f8fd86443a3aaf2d90eb68995021"}, + {file = "gensim-4.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:99e7b70352aecc6c1674dde82b75f453e7a5d1cc71ac1cfbc460bf1fe20501b7"}, + {file = "gensim-4.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:32a4cac3f3c38af2069eab9524609fc92ebaeb2692b7280cfda365a3517a280a"}, + {file = "gensim-4.3.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c071b4329ed1be02446eb7ef637b94c68cf0080c15c57fbcde667fce2e49c3fe"}, + {file = "gensim-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d662bf96e3d741b6ab61a54be842a7cbf5e45193008b2f4225c758cafd7f9cdc"}, + {file = "gensim-4.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:a54bd53a0e6f991abb837f126663353657270e75be53287e8a568ada0b35b1b0"}, + {file = "gensim-4.3.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9a65ed1a8c1fc83890b4eb2a45ae2b32e82a0209c970c8c74694d0374c2415cb"}, + {file = "gensim-4.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4db485e08a0287e0fd6a029d89b90913d1df38f1dcd34cd2ab758873ba9255f3"}, + {file = "gensim-4.3.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7198987116373ab99f034b292a04ac841531d12b56345851c98b40a3fcd93a85"}, + {file = "gensim-4.3.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6237a50de4da7a037b19b2b6c430b6537243dcdedebf94afeb089e951953e601"}, + {file = "gensim-4.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:c910c2d5a71f532273166a3a82762959973f0513b221a495fa5a2a07652ee66d"}, + {file = "gensim-4.3.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d7efa5e35d3f0ec02e6e8343b623c2c863be99e8c26866cf0bebd24fb10198c"}, + {file = "gensim-4.3.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2e8eaf5ef576f4d45e98cf87e7edda9afb469dff954a923402dc1ffc35195901"}, + {file = "gensim-4.3.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9161e52a6ec2a0580df66e9fac4ff7fc43efdc40674fbd4dd9e914796cc68bc3"}, + {file = "gensim-4.3.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a200d6ac522cdf91e6048e1a368318c6b1b6e0c79009dfd408345ea2b9d3c096"}, + {file = "gensim-4.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:065547124a93948926b88cb854e1c09750e9a4c7be92f55858159aa8a23359c3"}, + {file = "gensim-4.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688a13b9bba839fedc7f3da6806d5701a756ed940839702ba6d7f494e917baef"}, + {file = "gensim-4.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c560d28133cca58078221d60fce346f98f2c5e93d2ad42942f32c0d60903f65b"}, + {file = "gensim-4.3.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:832311f0c420c0841c98b9e6cc4d83ea362add6db917bf2d646de4bed48a29f7"}, + {file = "gensim-4.3.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1049f5bc2a84b21a1cb9976741826c0ebf25cfdff4a888361db4b4a697d99f0d"}, + {file = "gensim-4.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:e99b236b6638a30d7f878e2e21a94dab2f6d4b4fd3c242f44dca1341940cb0cb"}, + {file = "gensim-4.3.3.tar.gz", hash = "sha256:84852076a6a3d88d7dac5be245e24c21c3b819b565e14c1b61fa3e5ee76dcf57"}, +] + +[package.dependencies] +numpy = ">=1.18.5,<2.0" +scipy = ">=1.7.0,<1.14.0" +smart-open = ">=1.8.1" + +[package.extras] +distributed = ["Pyro4 (>=4.27)"] +docs = ["POT", "Pyro4", "Pyro4 (>=4.27)", "annoy", "matplotlib", "memory-profiler", "nltk", "pandas", "pytest", "pytest-cov", "scikit-learn", "sphinx (==5.1.1)", "sphinx-gallery (==0.11.1)", "sphinxcontrib-napoleon (==0.7)", "sphinxcontrib.programoutput (==0.17)", "statsmodels", "testfixtures", "visdom (>=0.1.8,!=0.1.8.7)"] +test = ["POT", "pytest", "pytest-cov", "testfixtures", "visdom (>=0.1.8,!=0.1.8.7)"] +test-win = ["POT", "pytest", "pytest-cov", "testfixtures"] + [[package]] name = "gotrue" -version = "2.8.1" +version = "2.9.1" description = "Python Client Library for Supabase Auth" optional = true python-versions = "<4.0,>=3.8" files = [ - {file = "gotrue-2.8.1-py3-none-any.whl", hash = "sha256:97dff077d71cca629f046c35ba34fae132b69c55fe271651766ddcf6d8132468"}, - {file = "gotrue-2.8.1.tar.gz", hash = "sha256:644d0096c4c390f7e36d9cb05271a7091c01e7dc6d506eb117b8fe8fc48eb8d9"}, + {file = "gotrue-2.9.1-py3-none-any.whl", hash = "sha256:6ffccd0d971d8e2883a650640fa94e53483c022119d687d1496cd79040736127"}, + {file = "gotrue-2.9.1.tar.gz", hash = "sha256:bc076c2030e5b5a189937fc1985b311844105ad1c78685b353f05253d0ebffed"}, ] [package.dependencies] httpx = {version = ">=0.24,<0.28", extras = ["http2"]} pydantic = ">=1.10,<3" +[[package]] +name = "graspologic" +version = "3.4.1" +description = "A set of Python modules for graph statistics" +optional = true +python-versions = "<3.13,>=3.9" +files = [ + {file = "graspologic-3.4.1-py3-none-any.whl", hash = "sha256:c6563e087eda599bad1de831d4b7321c0daa7a82f4e85a7d7737ff67e07cdda2"}, + {file = "graspologic-3.4.1.tar.gz", hash = "sha256:7561f0b852a2bccd351bff77e8db07d9892f9dfa35a420fdec01690e4fdc8075"}, +] + +[package.dependencies] +anytree = ">=2.12.1,<3.0.0" +beartype = ">=0.18.5,<0.19.0" +gensim = ">=4.3.2,<5.0.0" +graspologic-native = ">=1.2.1,<2.0.0" +hyppo = ">=0.4.0,<0.5.0" +joblib = ">=1.4.2,<2.0.0" +matplotlib = ">=3.8.4,<4.0.0" +networkx = ">=3,<4" +numpy = ">=1.26.4,<2.0.0" +POT = ">=0.9,<0.10" +scikit-learn = ">=1.4.2,<2.0.0" +scipy = "1.12.0" +seaborn = ">=0.13.2,<0.14.0" +statsmodels = ">=0.14.2,<0.15.0" +typing-extensions = ">=4.4.0,<5.0.0" +umap-learn = ">=0.5.6,<0.6.0" + +[[package]] +name = "graspologic-native" +version = "1.2.1" +description = "Python native companion module to the graspologic library" +optional = true +python-versions = ">=3.6, <3.13" +files = [ + {file = "graspologic_native-1.2.1-cp36-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:eccb2fa475b604375e34b4ae1d5497a428c34ed65f27888495239f8e120acea1"}, + {file = "graspologic_native-1.2.1-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a44cfdee11718c01c0f6c544750b3ae64e28cc03432a620fe0295704bd0d618d"}, + {file = "graspologic_native-1.2.1-cp36-abi3-win_amd64.whl", hash = "sha256:56b5e66ba003fd38efc0919ce90fa22d379456e177dca65e26626498d2b9b96b"}, + {file = "graspologic_native-1.2.1.tar.gz", hash = "sha256:72b7586028a91e9fef9af0ef314d368f0240c18dca99e6e6c546334359a8610a"}, +] + [[package]] name = "greenlet" version = "3.1.1" @@ -1345,61 +1666,70 @@ test = ["objgraph", "psutil"] [[package]] name = "grpcio" -version = "1.66.1" +version = "1.66.2" description = "HTTP/2-based RPC framework" optional = true python-versions = ">=3.8" files = [ - {file = "grpcio-1.66.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:4877ba180591acdf127afe21ec1c7ff8a5ecf0fe2600f0d3c50e8c4a1cbc6492"}, - {file = "grpcio-1.66.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:3750c5a00bd644c75f4507f77a804d0189d97a107eb1481945a0cf3af3e7a5ac"}, - {file = "grpcio-1.66.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:a013c5fbb12bfb5f927444b477a26f1080755a931d5d362e6a9a720ca7dbae60"}, - {file = "grpcio-1.66.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b1b24c23d51a1e8790b25514157d43f0a4dce1ac12b3f0b8e9f66a5e2c4c132f"}, - {file = "grpcio-1.66.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7ffb8ea674d68de4cac6f57d2498fef477cef582f1fa849e9f844863af50083"}, - {file = "grpcio-1.66.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:307b1d538140f19ccbd3aed7a93d8f71103c5d525f3c96f8616111614b14bf2a"}, - {file = "grpcio-1.66.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1c17ebcec157cfb8dd445890a03e20caf6209a5bd4ac5b040ae9dbc59eef091d"}, - {file = "grpcio-1.66.1-cp310-cp310-win32.whl", hash = "sha256:ef82d361ed5849d34cf09105d00b94b6728d289d6b9235513cb2fcc79f7c432c"}, - {file = "grpcio-1.66.1-cp310-cp310-win_amd64.whl", hash = "sha256:292a846b92cdcd40ecca46e694997dd6b9be6c4c01a94a0dfb3fcb75d20da858"}, - {file = "grpcio-1.66.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:c30aeceeaff11cd5ddbc348f37c58bcb96da8d5aa93fed78ab329de5f37a0d7a"}, - {file = "grpcio-1.66.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8a1e224ce6f740dbb6b24c58f885422deebd7eb724aff0671a847f8951857c26"}, - {file = "grpcio-1.66.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:a66fe4dc35d2330c185cfbb42959f57ad36f257e0cc4557d11d9f0a3f14311df"}, - {file = "grpcio-1.66.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3ba04659e4fce609de2658fe4dbf7d6ed21987a94460f5f92df7579fd5d0e22"}, - {file = "grpcio-1.66.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4573608e23f7e091acfbe3e84ac2045680b69751d8d67685ffa193a4429fedb1"}, - {file = "grpcio-1.66.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:7e06aa1f764ec8265b19d8f00140b8c4b6ca179a6dc67aa9413867c47e1fb04e"}, - {file = "grpcio-1.66.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3885f037eb11f1cacc41f207b705f38a44b69478086f40608959bf5ad85826dd"}, - {file = "grpcio-1.66.1-cp311-cp311-win32.whl", hash = "sha256:97ae7edd3f3f91480e48ede5d3e7d431ad6005bfdbd65c1b56913799ec79e791"}, - {file = "grpcio-1.66.1-cp311-cp311-win_amd64.whl", hash = "sha256:cfd349de4158d797db2bd82d2020554a121674e98fbe6b15328456b3bf2495bb"}, - {file = "grpcio-1.66.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:a92c4f58c01c77205df6ff999faa008540475c39b835277fb8883b11cada127a"}, - {file = "grpcio-1.66.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:fdb14bad0835914f325349ed34a51940bc2ad965142eb3090081593c6e347be9"}, - {file = "grpcio-1.66.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:f03a5884c56256e08fd9e262e11b5cfacf1af96e2ce78dc095d2c41ccae2c80d"}, - {file = "grpcio-1.66.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ca2559692d8e7e245d456877a85ee41525f3ed425aa97eb7a70fc9a79df91a0"}, - {file = "grpcio-1.66.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84ca1be089fb4446490dd1135828bd42a7c7f8421e74fa581611f7afdf7ab761"}, - {file = "grpcio-1.66.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:d639c939ad7c440c7b2819a28d559179a4508783f7e5b991166f8d7a34b52815"}, - {file = "grpcio-1.66.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b9feb4e5ec8dc2d15709f4d5fc367794d69277f5d680baf1910fc9915c633524"}, - {file = "grpcio-1.66.1-cp312-cp312-win32.whl", hash = "sha256:7101db1bd4cd9b880294dec41a93fcdce465bdbb602cd8dc5bd2d6362b618759"}, - {file = "grpcio-1.66.1-cp312-cp312-win_amd64.whl", hash = "sha256:b0aa03d240b5539648d996cc60438f128c7f46050989e35b25f5c18286c86734"}, - {file = "grpcio-1.66.1-cp38-cp38-linux_armv7l.whl", hash = "sha256:ecfe735e7a59e5a98208447293ff8580e9db1e890e232b8b292dc8bd15afc0d2"}, - {file = "grpcio-1.66.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:4825a3aa5648010842e1c9d35a082187746aa0cdbf1b7a2a930595a94fb10fce"}, - {file = "grpcio-1.66.1-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:f517fd7259fe823ef3bd21e508b653d5492e706e9f0ef82c16ce3347a8a5620c"}, - {file = "grpcio-1.66.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f1fe60d0772831d96d263b53d83fb9a3d050a94b0e94b6d004a5ad111faa5b5b"}, - {file = "grpcio-1.66.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31a049daa428f928f21090403e5d18ea02670e3d5d172581670be006100db9ef"}, - {file = "grpcio-1.66.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6f914386e52cbdeb5d2a7ce3bf1fdfacbe9d818dd81b6099a05b741aaf3848bb"}, - {file = "grpcio-1.66.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:bff2096bdba686019fb32d2dde45b95981f0d1490e054400f70fc9a8af34b49d"}, - {file = "grpcio-1.66.1-cp38-cp38-win32.whl", hash = "sha256:aa8ba945c96e73de29d25331b26f3e416e0c0f621e984a3ebdb2d0d0b596a3b3"}, - {file = "grpcio-1.66.1-cp38-cp38-win_amd64.whl", hash = "sha256:161d5c535c2bdf61b95080e7f0f017a1dfcb812bf54093e71e5562b16225b4ce"}, - {file = "grpcio-1.66.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:d0cd7050397b3609ea51727b1811e663ffda8bda39c6a5bb69525ef12414b503"}, - {file = "grpcio-1.66.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0e6c9b42ded5d02b6b1fea3a25f036a2236eeb75d0579bfd43c0018c88bf0a3e"}, - {file = "grpcio-1.66.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:c9f80f9fad93a8cf71c7f161778ba47fd730d13a343a46258065c4deb4b550c0"}, - {file = "grpcio-1.66.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5dd67ed9da78e5121efc5c510f0122a972216808d6de70953a740560c572eb44"}, - {file = "grpcio-1.66.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48b0d92d45ce3be2084b92fb5bae2f64c208fea8ceed7fccf6a7b524d3c4942e"}, - {file = "grpcio-1.66.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:4d813316d1a752be6f5c4360c49f55b06d4fe212d7df03253dfdae90c8a402bb"}, - {file = "grpcio-1.66.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9c9bebc6627873ec27a70fc800f6083a13c70b23a5564788754b9ee52c5aef6c"}, - {file = "grpcio-1.66.1-cp39-cp39-win32.whl", hash = "sha256:30a1c2cf9390c894c90bbc70147f2372130ad189cffef161f0432d0157973f45"}, - {file = "grpcio-1.66.1-cp39-cp39-win_amd64.whl", hash = "sha256:17663598aadbedc3cacd7bbde432f541c8e07d2496564e22b214b22c7523dac8"}, - {file = "grpcio-1.66.1.tar.gz", hash = "sha256:35334f9c9745add3e357e3372756fd32d925bd52c41da97f4dfdafbde0bf0ee2"}, + {file = "grpcio-1.66.2-cp310-cp310-linux_armv7l.whl", hash = "sha256:fe96281713168a3270878255983d2cb1a97e034325c8c2c25169a69289d3ecfa"}, + {file = "grpcio-1.66.2-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:73fc8f8b9b5c4a03e802b3cd0c18b2b06b410d3c1dcbef989fdeb943bd44aff7"}, + {file = "grpcio-1.66.2-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:03b0b307ba26fae695e067b94cbb014e27390f8bc5ac7a3a39b7723fed085604"}, + {file = "grpcio-1.66.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d69ce1f324dc2d71e40c9261d3fdbe7d4c9d60f332069ff9b2a4d8a257c7b2b"}, + {file = "grpcio-1.66.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05bc2ceadc2529ab0b227b1310d249d95d9001cd106aa4d31e8871ad3c428d73"}, + {file = "grpcio-1.66.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8ac475e8da31484efa25abb774674d837b343afb78bb3bcdef10f81a93e3d6bf"}, + {file = "grpcio-1.66.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0be4e0490c28da5377283861bed2941d1d20ec017ca397a5df4394d1c31a9b50"}, + {file = "grpcio-1.66.2-cp310-cp310-win32.whl", hash = "sha256:4e504572433f4e72b12394977679161d495c4c9581ba34a88d843eaf0f2fbd39"}, + {file = "grpcio-1.66.2-cp310-cp310-win_amd64.whl", hash = "sha256:2018b053aa15782db2541ca01a7edb56a0bf18c77efed975392583725974b249"}, + {file = "grpcio-1.66.2-cp311-cp311-linux_armv7l.whl", hash = "sha256:2335c58560a9e92ac58ff2bc5649952f9b37d0735608242973c7a8b94a6437d8"}, + {file = "grpcio-1.66.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:45a3d462826f4868b442a6b8fdbe8b87b45eb4f5b5308168c156b21eca43f61c"}, + {file = "grpcio-1.66.2-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:a9539f01cb04950fd4b5ab458e64a15f84c2acc273670072abe49a3f29bbad54"}, + {file = "grpcio-1.66.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce89f5876662f146d4c1f695dda29d4433a5d01c8681fbd2539afff535da14d4"}, + {file = "grpcio-1.66.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d25a14af966438cddf498b2e338f88d1c9706f3493b1d73b93f695c99c5f0e2a"}, + {file = "grpcio-1.66.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6001e575b8bbd89eee11960bb640b6da6ae110cf08113a075f1e2051cc596cae"}, + {file = "grpcio-1.66.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4ea1d062c9230278793820146c95d038dc0f468cbdd172eec3363e42ff1c7d01"}, + {file = "grpcio-1.66.2-cp311-cp311-win32.whl", hash = "sha256:38b68498ff579a3b1ee8f93a05eb48dc2595795f2f62716e797dc24774c1aaa8"}, + {file = "grpcio-1.66.2-cp311-cp311-win_amd64.whl", hash = "sha256:6851de821249340bdb100df5eacfecfc4e6075fa85c6df7ee0eb213170ec8e5d"}, + {file = "grpcio-1.66.2-cp312-cp312-linux_armv7l.whl", hash = "sha256:802d84fd3d50614170649853d121baaaa305de7b65b3e01759247e768d691ddf"}, + {file = "grpcio-1.66.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:80fd702ba7e432994df208f27514280b4b5c6843e12a48759c9255679ad38db8"}, + {file = "grpcio-1.66.2-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:12fda97ffae55e6526825daf25ad0fa37483685952b5d0f910d6405c87e3adb6"}, + {file = "grpcio-1.66.2-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:950da58d7d80abd0ea68757769c9db0a95b31163e53e5bb60438d263f4bed7b7"}, + {file = "grpcio-1.66.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e636ce23273683b00410f1971d209bf3689238cf5538d960adc3cdfe80dd0dbd"}, + {file = "grpcio-1.66.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:a917d26e0fe980b0ac7bfcc1a3c4ad6a9a4612c911d33efb55ed7833c749b0ee"}, + {file = "grpcio-1.66.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:49f0ca7ae850f59f828a723a9064cadbed90f1ece179d375966546499b8a2c9c"}, + {file = "grpcio-1.66.2-cp312-cp312-win32.whl", hash = "sha256:31fd163105464797a72d901a06472860845ac157389e10f12631025b3e4d0453"}, + {file = "grpcio-1.66.2-cp312-cp312-win_amd64.whl", hash = "sha256:ff1f7882e56c40b0d33c4922c15dfa30612f05fb785074a012f7cda74d1c3679"}, + {file = "grpcio-1.66.2-cp313-cp313-linux_armv7l.whl", hash = "sha256:3b00efc473b20d8bf83e0e1ae661b98951ca56111feb9b9611df8efc4fe5d55d"}, + {file = "grpcio-1.66.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:1caa38fb22a8578ab8393da99d4b8641e3a80abc8fd52646f1ecc92bcb8dee34"}, + {file = "grpcio-1.66.2-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:c408f5ef75cfffa113cacd8b0c0e3611cbfd47701ca3cdc090594109b9fcbaed"}, + {file = "grpcio-1.66.2-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c806852deaedee9ce8280fe98955c9103f62912a5b2d5ee7e3eaa284a6d8d8e7"}, + {file = "grpcio-1.66.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f145cc21836c332c67baa6fc81099d1d27e266401565bf481948010d6ea32d46"}, + {file = "grpcio-1.66.2-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:73e3b425c1e155730273f73e419de3074aa5c5e936771ee0e4af0814631fb30a"}, + {file = "grpcio-1.66.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:9c509a4f78114cbc5f0740eb3d7a74985fd2eff022971bc9bc31f8bc93e66a3b"}, + {file = "grpcio-1.66.2-cp313-cp313-win32.whl", hash = "sha256:20657d6b8cfed7db5e11b62ff7dfe2e12064ea78e93f1434d61888834bc86d75"}, + {file = "grpcio-1.66.2-cp313-cp313-win_amd64.whl", hash = "sha256:fb70487c95786e345af5e854ffec8cb8cc781bcc5df7930c4fbb7feaa72e1cdf"}, + {file = "grpcio-1.66.2-cp38-cp38-linux_armv7l.whl", hash = "sha256:a18e20d8321c6400185b4263e27982488cb5cdd62da69147087a76a24ef4e7e3"}, + {file = "grpcio-1.66.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:02697eb4a5cbe5a9639f57323b4c37bcb3ab2d48cec5da3dc2f13334d72790dd"}, + {file = "grpcio-1.66.2-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:99a641995a6bc4287a6315989ee591ff58507aa1cbe4c2e70d88411c4dcc0839"}, + {file = "grpcio-1.66.2-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ed71e81782966ffead60268bbda31ea3f725ebf8aa73634d5dda44f2cf3fb9c"}, + {file = "grpcio-1.66.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbd27c24a4cc5e195a7f56cfd9312e366d5d61b86e36d46bbe538457ea6eb8dd"}, + {file = "grpcio-1.66.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d9a9724a156c8ec6a379869b23ba3323b7ea3600851c91489b871e375f710bc8"}, + {file = "grpcio-1.66.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d8d4732cc5052e92cea2f78b233c2e2a52998ac40cd651f40e398893ad0d06ec"}, + {file = "grpcio-1.66.2-cp38-cp38-win32.whl", hash = "sha256:7b2c86457145ce14c38e5bf6bdc19ef88e66c5fee2c3d83285c5aef026ba93b3"}, + {file = "grpcio-1.66.2-cp38-cp38-win_amd64.whl", hash = "sha256:e88264caad6d8d00e7913996030bac8ad5f26b7411495848cc218bd3a9040b6c"}, + {file = "grpcio-1.66.2-cp39-cp39-linux_armv7l.whl", hash = "sha256:c400ba5675b67025c8a9f48aa846f12a39cf0c44df5cd060e23fda5b30e9359d"}, + {file = "grpcio-1.66.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:66a0cd8ba6512b401d7ed46bb03f4ee455839957f28b8d61e7708056a806ba6a"}, + {file = "grpcio-1.66.2-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:06de8ec0bd71be123eec15b0e0d457474931c2c407869b6c349bd9bed4adbac3"}, + {file = "grpcio-1.66.2-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb57870449dfcfac428afbb5a877829fcb0d6db9d9baa1148705739e9083880e"}, + {file = "grpcio-1.66.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b672abf90a964bfde2d0ecbce30f2329a47498ba75ce6f4da35a2f4532b7acbc"}, + {file = "grpcio-1.66.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ad2efdbe90c73b0434cbe64ed372e12414ad03c06262279b104a029d1889d13e"}, + {file = "grpcio-1.66.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9c3a99c519f4638e700e9e3f83952e27e2ea10873eecd7935823dab0c1c9250e"}, + {file = "grpcio-1.66.2-cp39-cp39-win32.whl", hash = "sha256:78fa51ebc2d9242c0fc5db0feecc57a9943303b46664ad89921f5079e2e4ada7"}, + {file = "grpcio-1.66.2-cp39-cp39-win_amd64.whl", hash = "sha256:728bdf36a186e7f51da73be7f8d09457a03061be848718d0edf000e709418987"}, + {file = "grpcio-1.66.2.tar.gz", hash = "sha256:563588c587b75c34b928bc428548e5b00ea38c46972181a4d8b75ba7e3f24231"}, ] [package.extras] -protobuf = ["grpcio-tools (>=1.66.1)"] +protobuf = ["grpcio-tools (>=1.66.2)"] [[package]] name = "grpcio-tools" @@ -1549,13 +1879,13 @@ files = [ [[package]] name = "httpcore" -version = "1.0.5" +version = "1.0.6" description = "A minimal low-level HTTP client." optional = false python-versions = ">=3.8" files = [ - {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"}, - {file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"}, + {file = "httpcore-1.0.6-py3-none-any.whl", hash = "sha256:27b59625743b85577a8c0e10e55b50b5368a4f2cfe8cc7bcfa9cf00829c2682f"}, + {file = "httpcore-1.0.6.tar.gz", hash = "sha256:73f6dbd6eb8c21bbf7ef8efad555481853f5f6acdeaff1edb0694289269ee17f"}, ] [package.dependencies] @@ -1566,7 +1896,7 @@ h11 = ">=0.13,<0.15" asyncio = ["anyio (>=4.0,<5.0)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] -trio = ["trio (>=0.22.0,<0.26.0)"] +trio = ["trio (>=0.22.0,<1.0)"] [[package]] name = "httpx" @@ -1639,6 +1969,23 @@ files = [ {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, ] +[[package]] +name = "hyppo" +version = "0.4.0" +description = "A comprehensive independence testing package" +optional = true +python-versions = "*" +files = [ + {file = "hyppo-0.4.0-py3-none-any.whl", hash = "sha256:4e75565b8deb601485cd7bc1b5c3f44e6ddf329136fc81e65d011f9b4e95132f"}, +] + +[package.dependencies] +autograd = ">=1.3" +numba = ">=0.46" +numpy = ">=1.17" +scikit-learn = ">=0.19.1" +scipy = ">=1.4.0" + [[package]] name = "identify" version = "2.6.1" @@ -1816,11 +2163,22 @@ files = [ {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, ] +[[package]] +name = "joblib" +version = "1.4.2" +description = "Lightweight pipelining with Python functions" +optional = true +python-versions = ">=3.8" +files = [ + {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, + {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, +] + [[package]] name = "jsonpath-python" version = "1.0.6" description = "A more powerful JSONPath implementation in modern python" -optional = true +optional = false python-versions = ">=3.6" files = [ {file = "jsonpath-python-1.0.6.tar.gz", hash = "sha256:dd5be4a72d8a2995c3f583cf82bf3cd1a9544cfdabf2d22595b67aff07349666"}, @@ -1862,15 +2220,138 @@ files = [ [package.dependencies] referencing = ">=0.31.0" +[[package]] +name = "kiwisolver" +version = "1.4.7" +description = "A fast implementation of the Cassowary constraint solver" +optional = true +python-versions = ">=3.8" +files = [ + {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8a9c83f75223d5e48b0bc9cb1bf2776cf01563e00ade8775ffe13b0b6e1af3a6"}, + {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:58370b1ffbd35407444d57057b57da5d6549d2d854fa30249771775c63b5fe17"}, + {file = "kiwisolver-1.4.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aa0abdf853e09aff551db11fce173e2177d00786c688203f52c87ad7fcd91ef9"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8d53103597a252fb3ab8b5845af04c7a26d5e7ea8122303dd7a021176a87e8b9"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:88f17c5ffa8e9462fb79f62746428dd57b46eb931698e42e990ad63103f35e6c"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88a9ca9c710d598fd75ee5de59d5bda2684d9db36a9f50b6125eaea3969c2599"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f4d742cb7af1c28303a51b7a27aaee540e71bb8e24f68c736f6f2ffc82f2bf05"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e28c7fea2196bf4c2f8d46a0415c77a1c480cc0724722f23d7410ffe9842c407"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e968b84db54f9d42046cf154e02911e39c0435c9801681e3fc9ce8a3c4130278"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0c18ec74c0472de033e1bebb2911c3c310eef5649133dd0bedf2a169a1b269e5"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8f0ea6da6d393d8b2e187e6a5e3fb81f5862010a40c3945e2c6d12ae45cfb2ad"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:f106407dda69ae456dd1227966bf445b157ccc80ba0dff3802bb63f30b74e895"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:84ec80df401cfee1457063732d90022f93951944b5b58975d34ab56bb150dfb3"}, + {file = "kiwisolver-1.4.7-cp310-cp310-win32.whl", hash = "sha256:71bb308552200fb2c195e35ef05de12f0c878c07fc91c270eb3d6e41698c3bcc"}, + {file = "kiwisolver-1.4.7-cp310-cp310-win_amd64.whl", hash = "sha256:44756f9fd339de0fb6ee4f8c1696cfd19b2422e0d70b4cefc1cc7f1f64045a8c"}, + {file = "kiwisolver-1.4.7-cp310-cp310-win_arm64.whl", hash = "sha256:78a42513018c41c2ffd262eb676442315cbfe3c44eed82385c2ed043bc63210a"}, + {file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d2b0e12a42fb4e72d509fc994713d099cbb15ebf1103545e8a45f14da2dfca54"}, + {file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2a8781ac3edc42ea4b90bc23e7d37b665d89423818e26eb6df90698aa2287c95"}, + {file = "kiwisolver-1.4.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:46707a10836894b559e04b0fd143e343945c97fd170d69a2d26d640b4e297935"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef97b8df011141c9b0f6caf23b29379f87dd13183c978a30a3c546d2c47314cb"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ab58c12a2cd0fc769089e6d38466c46d7f76aced0a1f54c77652446733d2d02"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:803b8e1459341c1bb56d1c5c010406d5edec8a0713a0945851290a7930679b51"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9a9e8a507420fe35992ee9ecb302dab68550dedc0da9e2880dd88071c5fb052"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18077b53dc3bb490e330669a99920c5e6a496889ae8c63b58fbc57c3d7f33a18"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6af936f79086a89b3680a280c47ea90b4df7047b5bdf3aa5c524bbedddb9e545"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:3abc5b19d24af4b77d1598a585b8a719beb8569a71568b66f4ebe1fb0449460b"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:933d4de052939d90afbe6e9d5273ae05fb836cc86c15b686edd4b3560cc0ee36"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:65e720d2ab2b53f1f72fb5da5fb477455905ce2c88aaa671ff0a447c2c80e8e3"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3bf1ed55088f214ba6427484c59553123fdd9b218a42bbc8c6496d6754b1e523"}, + {file = "kiwisolver-1.4.7-cp311-cp311-win32.whl", hash = "sha256:4c00336b9dd5ad96d0a558fd18a8b6f711b7449acce4c157e7343ba92dd0cf3d"}, + {file = "kiwisolver-1.4.7-cp311-cp311-win_amd64.whl", hash = "sha256:929e294c1ac1e9f615c62a4e4313ca1823ba37326c164ec720a803287c4c499b"}, + {file = "kiwisolver-1.4.7-cp311-cp311-win_arm64.whl", hash = "sha256:e33e8fbd440c917106b237ef1a2f1449dfbb9b6f6e1ce17c94cd6a1e0d438376"}, + {file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:5360cc32706dab3931f738d3079652d20982511f7c0ac5711483e6eab08efff2"}, + {file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942216596dc64ddb25adb215c3c783215b23626f8d84e8eff8d6d45c3f29f75a"}, + {file = "kiwisolver-1.4.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:48b571ecd8bae15702e4f22d3ff6a0f13e54d3d00cd25216d5e7f658242065ee"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad42ba922c67c5f219097b28fae965e10045ddf145d2928bfac2eb2e17673640"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:612a10bdae23404a72941a0fc8fa2660c6ea1217c4ce0dbcab8a8f6543ea9e7f"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9e838bba3a3bac0fe06d849d29772eb1afb9745a59710762e4ba3f4cb8424483"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:22f499f6157236c19f4bbbd472fa55b063db77a16cd74d49afe28992dff8c258"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693902d433cf585133699972b6d7c42a8b9f8f826ebcaf0132ff55200afc599e"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4e77f2126c3e0b0d055f44513ed349038ac180371ed9b52fe96a32aa071a5107"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:657a05857bda581c3656bfc3b20e353c232e9193eb167766ad2dc58b56504948"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4bfa75a048c056a411f9705856abfc872558e33c055d80af6a380e3658766038"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:34ea1de54beef1c104422d210c47c7d2a4999bdecf42c7b5718fbe59a4cac383"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:90da3b5f694b85231cf93586dad5e90e2d71b9428f9aad96952c99055582f520"}, + {file = "kiwisolver-1.4.7-cp312-cp312-win32.whl", hash = "sha256:18e0cca3e008e17fe9b164b55735a325140a5a35faad8de92dd80265cd5eb80b"}, + {file = "kiwisolver-1.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:58cb20602b18f86f83a5c87d3ee1c766a79c0d452f8def86d925e6c60fbf7bfb"}, + {file = "kiwisolver-1.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:f5a8b53bdc0b3961f8b6125e198617c40aeed638b387913bf1ce78afb1b0be2a"}, + {file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2e6039dcbe79a8e0f044f1c39db1986a1b8071051efba3ee4d74f5b365f5226e"}, + {file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a1ecf0ac1c518487d9d23b1cd7139a6a65bc460cd101ab01f1be82ecf09794b6"}, + {file = "kiwisolver-1.4.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7ab9ccab2b5bd5702ab0803676a580fffa2aa178c2badc5557a84cc943fcf750"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f816dd2277f8d63d79f9c8473a79fe54047bc0467754962840782c575522224d"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf8bcc23ceb5a1b624572a1623b9f79d2c3b337c8c455405ef231933a10da379"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dea0bf229319828467d7fca8c7c189780aa9ff679c94539eed7532ebe33ed37c"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c06a4c7cf15ec739ce0e5971b26c93638730090add60e183530d70848ebdd34"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:913983ad2deb14e66d83c28b632fd35ba2b825031f2fa4ca29675e665dfecbe1"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5337ec7809bcd0f424c6b705ecf97941c46279cf5ed92311782c7c9c2026f07f"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4c26ed10c4f6fa6ddb329a5120ba3b6db349ca192ae211e882970bfc9d91420b"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c619b101e6de2222c1fcb0531e1b17bbffbe54294bfba43ea0d411d428618c27"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:073a36c8273647592ea332e816e75ef8da5c303236ec0167196793eb1e34657a"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3ce6b2b0231bda412463e152fc18335ba32faf4e8c23a754ad50ffa70e4091ee"}, + {file = "kiwisolver-1.4.7-cp313-cp313-win32.whl", hash = "sha256:f4c9aee212bc89d4e13f58be11a56cc8036cabad119259d12ace14b34476fd07"}, + {file = "kiwisolver-1.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:8a3ec5aa8e38fc4c8af308917ce12c536f1c88452ce554027e55b22cbbfbff76"}, + {file = "kiwisolver-1.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:76c8094ac20ec259471ac53e774623eb62e6e1f56cd8690c67ce6ce4fcb05650"}, + {file = "kiwisolver-1.4.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5d5abf8f8ec1f4e22882273c423e16cae834c36856cac348cfbfa68e01c40f3a"}, + {file = "kiwisolver-1.4.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:aeb3531b196ef6f11776c21674dba836aeea9d5bd1cf630f869e3d90b16cfade"}, + {file = "kiwisolver-1.4.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b7d755065e4e866a8086c9bdada157133ff466476a2ad7861828e17b6026e22c"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08471d4d86cbaec61f86b217dd938a83d85e03785f51121e791a6e6689a3be95"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7bbfcb7165ce3d54a3dfbe731e470f65739c4c1f85bb1018ee912bae139e263b"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d34eb8494bea691a1a450141ebb5385e4b69d38bb8403b5146ad279f4b30fa3"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9242795d174daa40105c1d86aba618e8eab7bf96ba8c3ee614da8302a9f95503"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a0f64a48bb81af7450e641e3fe0b0394d7381e342805479178b3d335d60ca7cf"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8e045731a5416357638d1700927529e2b8ab304811671f665b225f8bf8d8f933"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:4322872d5772cae7369f8351da1edf255a604ea7087fe295411397d0cfd9655e"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e1631290ee9271dffe3062d2634c3ecac02c83890ada077d225e081aca8aab89"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:edcfc407e4eb17e037bca59be0e85a2031a2ac87e4fed26d3e9df88b4165f92d"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4d05d81ecb47d11e7f8932bd8b61b720bf0b41199358f3f5e36d38e28f0532c5"}, + {file = "kiwisolver-1.4.7-cp38-cp38-win32.whl", hash = "sha256:b38ac83d5f04b15e515fd86f312479d950d05ce2368d5413d46c088dda7de90a"}, + {file = "kiwisolver-1.4.7-cp38-cp38-win_amd64.whl", hash = "sha256:d83db7cde68459fc803052a55ace60bea2bae361fc3b7a6d5da07e11954e4b09"}, + {file = "kiwisolver-1.4.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3f9362ecfca44c863569d3d3c033dbe8ba452ff8eed6f6b5806382741a1334bd"}, + {file = "kiwisolver-1.4.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e8df2eb9b2bac43ef8b082e06f750350fbbaf2887534a5be97f6cf07b19d9583"}, + {file = "kiwisolver-1.4.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f32d6edbc638cde7652bd690c3e728b25332acbadd7cad670cc4a02558d9c417"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e2e6c39bd7b9372b0be21456caab138e8e69cc0fc1190a9dfa92bd45a1e6e904"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:dda56c24d869b1193fcc763f1284b9126550eaf84b88bbc7256e15028f19188a"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79849239c39b5e1fd906556c474d9b0439ea6792b637511f3fe3a41158d89ca8"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5e3bc157fed2a4c02ec468de4ecd12a6e22818d4f09cde2c31ee3226ffbefab2"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3da53da805b71e41053dc670f9a820d1157aae77b6b944e08024d17bcd51ef88"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8705f17dfeb43139a692298cb6637ee2e59c0194538153e83e9ee0c75c2eddde"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:82a5c2f4b87c26bb1a0ef3d16b5c4753434633b83d365cc0ddf2770c93829e3c"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ce8be0466f4c0d585cdb6c1e2ed07232221df101a4c6f28821d2aa754ca2d9e2"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:409afdfe1e2e90e6ee7fc896f3df9a7fec8e793e58bfa0d052c8a82f99c37abb"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5b9c3f4ee0b9a439d2415012bd1b1cc2df59e4d6a9939f4d669241d30b414327"}, + {file = "kiwisolver-1.4.7-cp39-cp39-win32.whl", hash = "sha256:a79ae34384df2b615eefca647a2873842ac3b596418032bef9a7283675962644"}, + {file = "kiwisolver-1.4.7-cp39-cp39-win_amd64.whl", hash = "sha256:cf0438b42121a66a3a667de17e779330fc0f20b0d97d59d2f2121e182b0505e4"}, + {file = "kiwisolver-1.4.7-cp39-cp39-win_arm64.whl", hash = "sha256:764202cc7e70f767dab49e8df52c7455e8de0df5d858fa801a11aa0d882ccf3f"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:94252291e3fe68001b1dd747b4c0b3be12582839b95ad4d1b641924d68fd4643"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5b7dfa3b546da08a9f622bb6becdb14b3e24aaa30adba66749d38f3cc7ea9706"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd3de6481f4ed8b734da5df134cd5a6a64fe32124fe83dde1e5b5f29fe30b1e6"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a91b5f9f1205845d488c928e8570dcb62b893372f63b8b6e98b863ebd2368ff2"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40fa14dbd66b8b8f470d5fc79c089a66185619d31645f9b0773b88b19f7223c4"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:eb542fe7933aa09d8d8f9d9097ef37532a7df6497819d16efe4359890a2f417a"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:bfa1acfa0c54932d5607e19a2c24646fb4c1ae2694437789129cf099789a3b00"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:eee3ea935c3d227d49b4eb85660ff631556841f6e567f0f7bda972df6c2c9935"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f3160309af4396e0ed04db259c3ccbfdc3621b5559b5453075e5de555e1f3a1b"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a17f6a29cf8935e587cc8a4dbfc8368c55edc645283db0ce9801016f83526c2d"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10849fb2c1ecbfae45a693c070e0320a91b35dd4bcf58172c023b994283a124d"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:ac542bf38a8a4be2dc6b15248d36315ccc65f0743f7b1a76688ffb6b5129a5c2"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8b01aac285f91ca889c800042c35ad3b239e704b150cfd3382adfc9dcc780e39"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:48be928f59a1f5c8207154f935334d374e79f2b5d212826307d072595ad76a2e"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f37cfe618a117e50d8c240555331160d73d0411422b59b5ee217843d7b693608"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:599b5c873c63a1f6ed7eead644a8a380cfbdf5db91dcb6f85707aaab213b1674"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:801fa7802e5cfabe3ab0c81a34c323a319b097dfb5004be950482d882f3d7225"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:0c6c43471bc764fad4bc99c5c2d6d16a676b1abf844ca7c8702bdae92df01ee0"}, + {file = "kiwisolver-1.4.7.tar.gz", hash = "sha256:9893ff81bd7107f7b685d3017cc6583daadb4fc26e4a888350df530e41980a60"}, +] + [[package]] name = "litellm" -version = "1.48.1" +version = "1.48.7" description = "Library to easily interface with LLM API providers" optional = true python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" files = [ - {file = "litellm-1.48.1-py3-none-any.whl", hash = "sha256:a3a03b31ba8b8eafda2b7f58c5cf3a1dfff091d2158df26a5ec08fc48a8df06e"}, - {file = "litellm-1.48.1.tar.gz", hash = "sha256:6bf2282a517835e137ff1de854f1be335416f2a671e20a1cfef51d009bf01493"}, + {file = "litellm-1.48.7-py3-none-any.whl", hash = "sha256:4971a9e681188635c2ee6dc44fe35bb2774586e9018682adcccdbb516b839c64"}, + {file = "litellm-1.48.7.tar.gz", hash = "sha256:ff1fef7049e9afa09598f98d1e510a6d5f252ec65c0526b8bfaf13eadfcf65e5"}, ] [package.dependencies] @@ -1890,6 +2371,36 @@ tokenizers = "*" extra-proxy = ["azure-identity (>=1.15.0,<2.0.0)", "azure-keyvault-secrets (>=4.8.0,<5.0.0)", "google-cloud-kms (>=2.21.3,<3.0.0)", "prisma (==0.11.0)", "resend (>=0.8.0,<0.9.0)"] proxy = ["PyJWT (>=2.8.0,<3.0.0)", "apscheduler (>=3.10.4,<4.0.0)", "backoff", "cryptography (>=42.0.5,<43.0.0)", "fastapi (>=0.111.0,<0.112.0)", "fastapi-sso (>=0.10.0,<0.11.0)", "gunicorn (>=22.0.0,<23.0.0)", "orjson (>=3.9.7,<4.0.0)", "pynacl (>=1.5.0,<2.0.0)", "python-multipart (>=0.0.9,<0.0.10)", "pyyaml (>=6.0.1,<7.0.0)", "rq", "uvicorn (>=0.22.0,<0.23.0)"] +[[package]] +name = "llvmlite" +version = "0.43.0" +description = "lightweight wrapper around basic LLVM functionality" +optional = true +python-versions = ">=3.9" +files = [ + {file = "llvmlite-0.43.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a289af9a1687c6cf463478f0fa8e8aa3b6fb813317b0d70bf1ed0759eab6f761"}, + {file = "llvmlite-0.43.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6d4fd101f571a31acb1559ae1af30f30b1dc4b3186669f92ad780e17c81e91bc"}, + {file = "llvmlite-0.43.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d434ec7e2ce3cc8f452d1cd9a28591745de022f931d67be688a737320dfcead"}, + {file = "llvmlite-0.43.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6912a87782acdff6eb8bf01675ed01d60ca1f2551f8176a300a886f09e836a6a"}, + {file = "llvmlite-0.43.0-cp310-cp310-win_amd64.whl", hash = "sha256:14f0e4bf2fd2d9a75a3534111e8ebeb08eda2f33e9bdd6dfa13282afacdde0ed"}, + {file = "llvmlite-0.43.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3e8d0618cb9bfe40ac38a9633f2493d4d4e9fcc2f438d39a4e854f39cc0f5f98"}, + {file = "llvmlite-0.43.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e0a9a1a39d4bf3517f2af9d23d479b4175ead205c592ceeb8b89af48a327ea57"}, + {file = "llvmlite-0.43.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1da416ab53e4f7f3bc8d4eeba36d801cc1894b9fbfbf2022b29b6bad34a7df2"}, + {file = "llvmlite-0.43.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:977525a1e5f4059316b183fb4fd34fa858c9eade31f165427a3977c95e3ee749"}, + {file = "llvmlite-0.43.0-cp311-cp311-win_amd64.whl", hash = "sha256:d5bd550001d26450bd90777736c69d68c487d17bf371438f975229b2b8241a91"}, + {file = "llvmlite-0.43.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f99b600aa7f65235a5a05d0b9a9f31150c390f31261f2a0ba678e26823ec38f7"}, + {file = "llvmlite-0.43.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:35d80d61d0cda2d767f72de99450766250560399edc309da16937b93d3b676e7"}, + {file = "llvmlite-0.43.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eccce86bba940bae0d8d48ed925f21dbb813519169246e2ab292b5092aba121f"}, + {file = "llvmlite-0.43.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df6509e1507ca0760787a199d19439cc887bfd82226f5af746d6977bd9f66844"}, + {file = "llvmlite-0.43.0-cp312-cp312-win_amd64.whl", hash = "sha256:7a2872ee80dcf6b5dbdc838763d26554c2a18aa833d31a2635bff16aafefb9c9"}, + {file = "llvmlite-0.43.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9cd2a7376f7b3367019b664c21f0c61766219faa3b03731113ead75107f3b66c"}, + {file = "llvmlite-0.43.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:18e9953c748b105668487b7c81a3e97b046d8abf95c4ddc0cd3c94f4e4651ae8"}, + {file = "llvmlite-0.43.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74937acd22dc11b33946b67dca7680e6d103d6e90eeaaaf932603bec6fe7b03a"}, + {file = "llvmlite-0.43.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc9efc739cc6ed760f795806f67889923f7274276f0eb45092a1473e40d9b867"}, + {file = "llvmlite-0.43.0-cp39-cp39-win_amd64.whl", hash = "sha256:47e147cdda9037f94b399bf03bfd8a6b6b1f2f90be94a454e3386f006455a9b4"}, + {file = "llvmlite-0.43.0.tar.gz", hash = "sha256:ae2b5b5c3ef67354824fb75517c8db5fbe93bc02cd9671f3c62271626bc041d5"}, +] + [[package]] name = "loguru" version = "0.7.2" @@ -2150,7 +2661,7 @@ files = [ name = "marshmallow" version = "3.22.0" description = "A lightweight library for converting complex datatypes to and from native Python datatypes." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "marshmallow-3.22.0-py3-none-any.whl", hash = "sha256:71a2dce49ef901c3f97ed296ae5051135fd3febd2bf43afe0ae9a82143a494d9"}, @@ -2165,6 +2676,69 @@ dev = ["marshmallow[tests]", "pre-commit (>=3.5,<4.0)", "tox"] docs = ["alabaster (==1.0.0)", "autodocsumm (==0.2.13)", "sphinx (==8.0.2)", "sphinx-issues (==4.1.0)", "sphinx-version-warning (==1.1.2)"] tests = ["pytest", "pytz", "simplejson"] +[[package]] +name = "matplotlib" +version = "3.9.2" +description = "Python plotting package" +optional = true +python-versions = ">=3.9" +files = [ + {file = "matplotlib-3.9.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9d78bbc0cbc891ad55b4f39a48c22182e9bdaea7fc0e5dbd364f49f729ca1bbb"}, + {file = "matplotlib-3.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c375cc72229614632c87355366bdf2570c2dac01ac66b8ad048d2dabadf2d0d4"}, + {file = "matplotlib-3.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d94ff717eb2bd0b58fe66380bd8b14ac35f48a98e7c6765117fe67fb7684e64"}, + {file = "matplotlib-3.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab68d50c06938ef28681073327795c5db99bb4666214d2d5f880ed11aeaded66"}, + {file = "matplotlib-3.9.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:65aacf95b62272d568044531e41de26285d54aec8cb859031f511f84bd8b495a"}, + {file = "matplotlib-3.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:3fd595f34aa8a55b7fc8bf9ebea8aa665a84c82d275190a61118d33fbc82ccae"}, + {file = "matplotlib-3.9.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d8dd059447824eec055e829258ab092b56bb0579fc3164fa09c64f3acd478772"}, + {file = "matplotlib-3.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c797dac8bb9c7a3fd3382b16fe8f215b4cf0f22adccea36f1545a6d7be310b41"}, + {file = "matplotlib-3.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d719465db13267bcef19ea8954a971db03b9f48b4647e3860e4bc8e6ed86610f"}, + {file = "matplotlib-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8912ef7c2362f7193b5819d17dae8629b34a95c58603d781329712ada83f9447"}, + {file = "matplotlib-3.9.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7741f26a58a240f43bee74965c4882b6c93df3e7eb3de160126d8c8f53a6ae6e"}, + {file = "matplotlib-3.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:ae82a14dab96fbfad7965403c643cafe6515e386de723e498cf3eeb1e0b70cc7"}, + {file = "matplotlib-3.9.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ac43031375a65c3196bee99f6001e7fa5bdfb00ddf43379d3c0609bdca042df9"}, + {file = "matplotlib-3.9.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:be0fc24a5e4531ae4d8e858a1a548c1fe33b176bb13eff7f9d0d38ce5112a27d"}, + {file = "matplotlib-3.9.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf81de2926c2db243c9b2cbc3917619a0fc85796c6ba4e58f541df814bbf83c7"}, + {file = "matplotlib-3.9.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6ee45bc4245533111ced13f1f2cace1e7f89d1c793390392a80c139d6cf0e6c"}, + {file = "matplotlib-3.9.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:306c8dfc73239f0e72ac50e5a9cf19cc4e8e331dd0c54f5e69ca8758550f1e1e"}, + {file = "matplotlib-3.9.2-cp312-cp312-win_amd64.whl", hash = "sha256:5413401594cfaff0052f9d8b1aafc6d305b4bd7c4331dccd18f561ff7e1d3bd3"}, + {file = "matplotlib-3.9.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:18128cc08f0d3cfff10b76baa2f296fc28c4607368a8402de61bb3f2eb33c7d9"}, + {file = "matplotlib-3.9.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4876d7d40219e8ae8bb70f9263bcbe5714415acfdf781086601211335e24f8aa"}, + {file = "matplotlib-3.9.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d9f07a80deab4bb0b82858a9e9ad53d1382fd122be8cde11080f4e7dfedb38b"}, + {file = "matplotlib-3.9.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7c0410f181a531ec4e93bbc27692f2c71a15c2da16766f5ba9761e7ae518413"}, + {file = "matplotlib-3.9.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:909645cce2dc28b735674ce0931a4ac94e12f5b13f6bb0b5a5e65e7cea2c192b"}, + {file = "matplotlib-3.9.2-cp313-cp313-win_amd64.whl", hash = "sha256:f32c7410c7f246838a77d6d1eff0c0f87f3cb0e7c4247aebea71a6d5a68cab49"}, + {file = "matplotlib-3.9.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:37e51dd1c2db16ede9cfd7b5cabdfc818b2c6397c83f8b10e0e797501c963a03"}, + {file = "matplotlib-3.9.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b82c5045cebcecd8496a4d694d43f9cc84aeeb49fe2133e036b207abe73f4d30"}, + {file = "matplotlib-3.9.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f053c40f94bc51bc03832a41b4f153d83f2062d88c72b5e79997072594e97e51"}, + {file = "matplotlib-3.9.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbe196377a8248972f5cede786d4c5508ed5f5ca4a1e09b44bda889958b33f8c"}, + {file = "matplotlib-3.9.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5816b1e1fe8c192cbc013f8f3e3368ac56fbecf02fb41b8f8559303f24c5015e"}, + {file = "matplotlib-3.9.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:cef2a73d06601437be399908cf13aee74e86932a5ccc6ccdf173408ebc5f6bb2"}, + {file = "matplotlib-3.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e0830e188029c14e891fadd99702fd90d317df294c3298aad682739c5533721a"}, + {file = "matplotlib-3.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ba9c1299c920964e8d3857ba27173b4dbb51ca4bab47ffc2c2ba0eb5e2cbc5"}, + {file = "matplotlib-3.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1cd93b91ab47a3616b4d3c42b52f8363b88ca021e340804c6ab2536344fad9ca"}, + {file = "matplotlib-3.9.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6d1ce5ed2aefcdce11904fc5bbea7d9c21fff3d5f543841edf3dea84451a09ea"}, + {file = "matplotlib-3.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:b2696efdc08648536efd4e1601b5fd491fd47f4db97a5fbfd175549a7365c1b2"}, + {file = "matplotlib-3.9.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d52a3b618cb1cbb769ce2ee1dcdb333c3ab6e823944e9a2d36e37253815f9556"}, + {file = "matplotlib-3.9.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:039082812cacd6c6bec8e17a9c1e6baca230d4116d522e81e1f63a74d01d2e21"}, + {file = "matplotlib-3.9.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6758baae2ed64f2331d4fd19be38b7b4eae3ecec210049a26b6a4f3ae1c85dcc"}, + {file = "matplotlib-3.9.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:050598c2b29e0b9832cde72bcf97627bf00262adbc4a54e2b856426bb2ef0697"}, + {file = "matplotlib-3.9.2.tar.gz", hash = "sha256:96ab43906269ca64a6366934106fa01534454a69e471b7bf3d79083981aaab92"}, +] + +[package.dependencies] +contourpy = ">=1.0.1" +cycler = ">=0.10" +fonttools = ">=4.22.0" +kiwisolver = ">=1.3.1" +numpy = ">=1.23" +packaging = ">=20.0" +pillow = ">=8" +pyparsing = ">=2.3.1" +python-dateutil = ">=2.7" + +[package.extras] +dev = ["meson-python (>=0.13.1)", "numpy (>=1.25)", "pybind11 (>=2.6)", "setuptools (>=64)", "setuptools_scm (>=7)"] + [[package]] name = "mccabe" version = "0.7.0" @@ -2349,25 +2923,6 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] -[[package]] -name = "neo4j" -version = "5.24.0" -description = "Neo4j Bolt driver for Python" -optional = true -python-versions = ">=3.7" -files = [ - {file = "neo4j-5.24.0-py3-none-any.whl", hash = "sha256:5b4705cfe8130020f33e75e31ad3fcfe67ee958e07d0c3c4936e9c8245a1ea58"}, - {file = "neo4j-5.24.0.tar.gz", hash = "sha256:499ca35135847528f4ee70314bd49c8b08b031e4dfd588bb06c1c2fb35d729e2"}, -] - -[package.dependencies] -pytz = "*" - -[package.extras] -numpy = ["numpy (>=1.7.0,<2.0.0)"] -pandas = ["numpy (>=1.7.0,<2.0.0)", "pandas (>=1.1.0,<3.0.0)"] -pyarrow = ["pyarrow (>=1.0.0)"] - [[package]] name = "nest-asyncio" version = "1.6.0" @@ -2379,6 +2934,24 @@ files = [ {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"}, ] +[[package]] +name = "networkx" +version = "3.3" +description = "Python package for creating and manipulating graphs and networks" +optional = true +python-versions = ">=3.10" +files = [ + {file = "networkx-3.3-py3-none-any.whl", hash = "sha256:28575580c6ebdaf4505b22c6256a2b9de86b316dc63ba9e93abde3d78dfdbcf2"}, + {file = "networkx-3.3.tar.gz", hash = "sha256:0c127d8b2f4865f59ae9cb8aafcd60b5c70f3241ebd66f7defad7c4ab90126c9"}, +] + +[package.extras] +default = ["matplotlib (>=3.6)", "numpy (>=1.23)", "pandas (>=1.4)", "scipy (>=1.9,!=1.11.0,!=1.11.1)"] +developer = ["changelist (==0.5)", "mypy (>=1.1)", "pre-commit (>=3.2)", "rtoml"] +doc = ["myst-nb (>=1.0)", "numpydoc (>=1.7)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.14)", "sphinx (>=7)", "sphinx-gallery (>=0.14)", "texext (>=0.6.7)"] +extra = ["lxml (>=4.6)", "pydot (>=2.0)", "pygraphviz (>=1.12)", "sympy (>=1.10)"] +test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] + [[package]] name = "nodeenv" version = "1.9.1" @@ -2390,6 +2963,40 @@ files = [ {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, ] +[[package]] +name = "numba" +version = "0.60.0" +description = "compiling Python code using LLVM" +optional = true +python-versions = ">=3.9" +files = [ + {file = "numba-0.60.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d761de835cd38fb400d2c26bb103a2726f548dc30368853121d66201672e651"}, + {file = "numba-0.60.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:159e618ef213fba758837f9837fb402bbe65326e60ba0633dbe6c7f274d42c1b"}, + {file = "numba-0.60.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1527dc578b95c7c4ff248792ec33d097ba6bef9eda466c948b68dfc995c25781"}, + {file = "numba-0.60.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe0b28abb8d70f8160798f4de9d486143200f34458d34c4a214114e445d7124e"}, + {file = "numba-0.60.0-cp310-cp310-win_amd64.whl", hash = "sha256:19407ced081d7e2e4b8d8c36aa57b7452e0283871c296e12d798852bc7d7f198"}, + {file = "numba-0.60.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a17b70fc9e380ee29c42717e8cc0bfaa5556c416d94f9aa96ba13acb41bdece8"}, + {file = "numba-0.60.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3fb02b344a2a80efa6f677aa5c40cd5dd452e1b35f8d1c2af0dfd9ada9978e4b"}, + {file = "numba-0.60.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5f4fde652ea604ea3c86508a3fb31556a6157b2c76c8b51b1d45eb40c8598703"}, + {file = "numba-0.60.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4142d7ac0210cc86432b818338a2bc368dc773a2f5cf1e32ff7c5b378bd63ee8"}, + {file = "numba-0.60.0-cp311-cp311-win_amd64.whl", hash = "sha256:cac02c041e9b5bc8cf8f2034ff6f0dbafccd1ae9590dc146b3a02a45e53af4e2"}, + {file = "numba-0.60.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d7da4098db31182fc5ffe4bc42c6f24cd7d1cb8a14b59fd755bfee32e34b8404"}, + {file = "numba-0.60.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:38d6ea4c1f56417076ecf8fc327c831ae793282e0ff51080c5094cb726507b1c"}, + {file = "numba-0.60.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:62908d29fb6a3229c242e981ca27e32a6e606cc253fc9e8faeb0e48760de241e"}, + {file = "numba-0.60.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0ebaa91538e996f708f1ab30ef4d3ddc344b64b5227b67a57aa74f401bb68b9d"}, + {file = "numba-0.60.0-cp312-cp312-win_amd64.whl", hash = "sha256:f75262e8fe7fa96db1dca93d53a194a38c46da28b112b8a4aca168f0df860347"}, + {file = "numba-0.60.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:01ef4cd7d83abe087d644eaa3d95831b777aa21d441a23703d649e06b8e06b74"}, + {file = "numba-0.60.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:819a3dfd4630d95fd574036f99e47212a1af41cbcb019bf8afac63ff56834449"}, + {file = "numba-0.60.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b983bd6ad82fe868493012487f34eae8bf7dd94654951404114f23c3466d34b"}, + {file = "numba-0.60.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c151748cd269ddeab66334bd754817ffc0cabd9433acb0f551697e5151917d25"}, + {file = "numba-0.60.0-cp39-cp39-win_amd64.whl", hash = "sha256:3031547a015710140e8c87226b4cfe927cac199835e5bf7d4fe5cb64e814e3ab"}, + {file = "numba-0.60.0.tar.gz", hash = "sha256:5df6158e5584eece5fc83294b949fd30b9f1125df7708862205217e068aabf16"}, +] + +[package.dependencies] +llvmlite = "==0.43.*" +numpy = ">=1.22,<2.1" + [[package]] name = "numpy" version = "1.26.4" @@ -2451,13 +3058,13 @@ httpx = ">=0.27.0,<0.28.0" [[package]] name = "openai" -version = "1.48.0" +version = "1.51.0" description = "The official Python library for the openai API" optional = false python-versions = ">=3.7.1" files = [ - {file = "openai-1.48.0-py3-none-any.whl", hash = "sha256:7c4af223f0bf615ce4a12453729952c9a8b04ffe8c78aa77981b12fd970149cf"}, - {file = "openai-1.48.0.tar.gz", hash = "sha256:1d3b69ea62c287c4885a6f3ce840768564cd5f52c60ac5f890fef80d43cc4799"}, + {file = "openai-1.51.0-py3-none-any.whl", hash = "sha256:d9affafb7e51e5a27dce78589d4964ce4d6f6d560307265933a94b2e3f3c5d2c"}, + {file = "openai-1.51.0.tar.gz", hash = "sha256:8dc4f9d75ccdd5466fc8c99a952186eddceb9fd6ba694044773f3736a847149d"}, ] [package.dependencies] @@ -2491,7 +3098,7 @@ et-xmlfile = "*" name = "ordered-set" version = "4.1.0" description = "An OrderedSet is a custom MutableSet that remembers its order, so that every" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "ordered-set-4.1.0.tar.gz", hash = "sha256:694a8e44c87657c59292ede72891eb91d34131f6531463aab3009191c77364a8"}, @@ -2512,6 +3119,92 @@ files = [ {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, ] +[[package]] +name = "pandas" +version = "2.2.3" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = true +python-versions = ">=3.9" +files = [ + {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"}, + {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f"}, + {file = "pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32"}, + {file = "pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a"}, + {file = "pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb"}, + {file = "pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761"}, + {file = "pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e"}, + {file = "pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.22.4", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, +] +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.7" + +[package.extras] +all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"] +aws = ["s3fs (>=2022.11.0)"] +clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"] +compression = ["zstandard (>=0.19.0)"] +computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"] +feather = ["pyarrow (>=10.0.1)"] +fss = ["fsspec (>=2022.11.0)"] +gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"] +hdf5 = ["tables (>=3.8.0)"] +html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"] +mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"] +parquet = ["pyarrow (>=10.0.1)"] +performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"] +plot = ["matplotlib (>=3.6.3)"] +postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"] +pyarrow = ["pyarrow (>=10.0.1)"] +spss = ["pyreadstat (>=1.2.0)"] +sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.9.2)"] + [[package]] name = "passlib" version = "1.7.4" @@ -2540,6 +3233,24 @@ files = [ {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, ] +[[package]] +name = "patsy" +version = "0.5.6" +description = "A Python package for describing statistical models and for building design matrices." +optional = true +python-versions = "*" +files = [ + {file = "patsy-0.5.6-py2.py3-none-any.whl", hash = "sha256:19056886fd8fa71863fa32f0eb090267f21fb74be00f19f5c70b2e9d76c883c6"}, + {file = "patsy-0.5.6.tar.gz", hash = "sha256:95c6d47a7222535f84bff7f63d7303f2e297747a598db89cf5c67f0c0c7d2cdb"}, +] + +[package.dependencies] +numpy = ">=1.4" +six = "*" + +[package.extras] +test = ["pytest", "pytest-cov", "scipy"] + [[package]] name = "pgvector" version = "0.1.8" @@ -2683,13 +3394,13 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "postgrest" -version = "0.16.11" +version = "0.17.0" description = "PostgREST client for Python. This library provides an ORM interface to PostgREST." optional = true python-versions = "<4.0,>=3.8" files = [ - {file = "postgrest-0.16.11-py3-none-any.whl", hash = "sha256:22fb6b817ace1f68aa648fd4ce0f56d2786c9260fa4ed2cb9046191231a682b8"}, - {file = "postgrest-0.16.11.tar.gz", hash = "sha256:10af51b4c39e288ad7df2db92d6a61fb3c4683131b40561f473e3de116e83fa5"}, + {file = "postgrest-0.17.0-py3-none-any.whl", hash = "sha256:df2530e903955ffddbd21d92a99abc8d09d6efb357ce33438fca68d4b46b5d95"}, + {file = "postgrest-0.17.0.tar.gz", hash = "sha256:5ee05d8d6796b9d716585d2ad589db57ef832af6c2592a3e39dcef8993929cff"}, ] [package.dependencies] @@ -2721,6 +3432,70 @@ dev = ["black", "flake8", "flake8-print", "isort", "pre-commit"] sentry = ["django", "sentry-sdk"] test = ["coverage", "django", "flake8", "freezegun (==0.3.15)", "mock (>=2.0.0)", "pylint", "pytest", "pytest-timeout"] +[[package]] +name = "pot" +version = "0.9.4" +description = "Python Optimal Transport Library" +optional = true +python-versions = ">=3.7" +files = [ + {file = "POT-0.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8418ab9c24f549290fdc452caebb58ded05b986a024063fe3354cfd2e704b378"}, + {file = "POT-0.9.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:096cd3b454f87ff9c8f48d8e221bc26509d8f9355ce99d9fefe83560f82278b5"}, + {file = "POT-0.9.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6e67d420a479ed66f4549c785e157bb3dce2c5489bf81a44ac922a6e9471fe69"}, + {file = "POT-0.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:107bc7225906a3fa3aafdb441e1d24c55eaf1ee3badd1c93ab6199865f689221"}, + {file = "POT-0.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dfee044f744b1722912654c8b905bc289ce160524db5ca0e853f1aa442ffea55"}, + {file = "POT-0.9.4-cp310-cp310-win32.whl", hash = "sha256:421c3efb8da2f1ce9605f9f2068047ea629b95de87baa15b8786b3e664de9dbd"}, + {file = "POT-0.9.4-cp310-cp310-win_amd64.whl", hash = "sha256:6e76194a62f29ddadc975e18cf7f07d22060735bd3fb9a023781e0e126a05822"}, + {file = "POT-0.9.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:148040b89063790ab784458d5d200ba4a7f36c54fdb62ea0842f8d5d4c5c6ccb"}, + {file = "POT-0.9.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1109fc77989834a1467be731ff957d90c2b558e772cff6c06fb90f7cbe58b014"}, + {file = "POT-0.9.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9f8218cd419e8457b37fe2b8060b5bf9bd07d4671d5f5fa4d5ac98c58b5be8c0"}, + {file = "POT-0.9.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ea0055f18e26917ff326f39dd5e5fd43bcc9eccaab4b09a4f8d7785c8921250"}, + {file = "POT-0.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f03b4af3f56234787d2a34e0637dac9c1e1de4cb3f7386ca4f91331f0c4af187"}, + {file = "POT-0.9.4-cp311-cp311-win32.whl", hash = "sha256:a69f6d554df1de47dd312fc065d9171bdbedf48c90c8409889485945ffaaeacf"}, + {file = "POT-0.9.4-cp311-cp311-win_amd64.whl", hash = "sha256:8791c8f09a852901e03e2dc1c6aec4f374b58b3ee905a90349713587aa16e26a"}, + {file = "POT-0.9.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:1a7a55b3fd528e6808f99de0165dcacf185eb86ae3aff4d358b850479b76a8ba"}, + {file = "POT-0.9.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a9bbd2507605be6eae4f0f0d6f6f0ff91ce3eeb5b7c8eeb350e4eb76bcc6940a"}, + {file = "POT-0.9.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5594ab0e78329307ce4cd293f2de409513876695d60fb4c1013b5dd46069f256"}, + {file = "POT-0.9.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0ca658105d129b752c8d20751ff2cb965d1bdcaecec319ae489b135c58d9da9"}, + {file = "POT-0.9.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e6732f1acd94959b8fa13a4fa250ad49c1e6894ece488a81f4427dbf13df4776"}, + {file = "POT-0.9.4-cp312-cp312-win32.whl", hash = "sha256:bf7f5253fee6ba7df5dd854b739540f701153cabab80dd25332dfac93d84bec1"}, + {file = "POT-0.9.4-cp312-cp312-win_amd64.whl", hash = "sha256:597ff64b06a157871feb84e6e82b3f5dfbfff57161c14660ab2ddbcc93c940e6"}, + {file = "POT-0.9.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:385b41606943fbc73f1ab96fd994117d79c4ad782c91bbb7ba74c0359e9de887"}, + {file = "POT-0.9.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b3f697e084243b9fe0a975331e471fd09610343c6aa28172232958e39100ede6"}, + {file = "POT-0.9.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b967fb9cafc6ad30a6d51b21d6307b384491a106d6dc75f37bee52a3f63575c3"}, + {file = "POT-0.9.4-cp37-cp37m-win32.whl", hash = "sha256:35926c2f4d2ee49309dce340f7f6646fe451ca1e0d11b2d017a851d482bf4468"}, + {file = "POT-0.9.4-cp37-cp37m-win_amd64.whl", hash = "sha256:f29fa1fcd591d1940e2279dc1422ff46c0c273f6be4ecbcaa819d91dd196573e"}, + {file = "POT-0.9.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:63f8b50f448e32a4ae274dd1e68e22b1a2bc291c53c5c6ec5afadfb930b6a809"}, + {file = "POT-0.9.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cabd13a728d2db40b3989ad57810769dfba8972b686ae7f4881dbd315252e5d9"}, + {file = "POT-0.9.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5984157d5a819bd6c55db3b0d8fe631ff398c243e80a9e9e933cbd1ee7c7588c"}, + {file = "POT-0.9.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b8da4e3268eeee40dff96364f0a9f0326979d565d970ec74a1688b8ad338022"}, + {file = "POT-0.9.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ede957083299e4904627621f4d2c8a6b56af108fef9b486330f65519a395f10a"}, + {file = "POT-0.9.4-cp38-cp38-win32.whl", hash = "sha256:79716079d7970c6c0bf909f986c65d7103135e36b364ba5fa5caed97d7aa6464"}, + {file = "POT-0.9.4-cp38-cp38-win_amd64.whl", hash = "sha256:3246493745bcf2b353312183b9ab547466cae711936f991a6754b62f55ff1dec"}, + {file = "POT-0.9.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:223c4ca199b679e4c2b8a79fb49d31f2c7fab2975c2c37d1e68a0a7fbe2bc55d"}, + {file = "POT-0.9.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c30d77994d02984ad32665f5975e272e8c02e8d5288c4edfbec08617c5c38f91"}, + {file = "POT-0.9.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5b0fe5be45112c12cc0f6ab61fb85ed9161ca540b24a4037e5d53ab86f390a49"}, + {file = "POT-0.9.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab68bdfeae54719d202e923f18ec29869c09b105e42f94568221fc92996f0f4d"}, + {file = "POT-0.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2847015e3bb65171eb70eca786f8cebe806960af40625ebc17c858b6314a9e0b"}, + {file = "POT-0.9.4-cp39-cp39-win32.whl", hash = "sha256:2e35d68c6e676108d6eeb7e6b119c4c19dca364992bca57f3f513660bfb1810c"}, + {file = "POT-0.9.4-cp39-cp39-win_amd64.whl", hash = "sha256:e7d029f024ed58f8d10b3e4d517df551bb9758ac12d0503be793f44258f2dffc"}, + {file = "pot-0.9.4.tar.gz", hash = "sha256:4cf8b46bf4992c37529161c32dd5e3334e0c089399142f08ed6d455b57015edd"}, +] + +[package.dependencies] +numpy = ">=1.16" +scipy = ">=1.6" + +[package.extras] +all = ["autograd", "cvxopt", "jax", "jaxlib", "matplotlib", "pymanopt", "scikit-learn", "tensorflow", "torch", "torch-geometric"] +backend-jax = ["jax", "jaxlib"] +backend-tf = ["tensorflow"] +backend-torch = ["torch"] +cvxopt = ["cvxopt"] +dr = ["autograd", "pymanopt", "scikit-learn"] +gnn = ["torch", "torch-geometric"] +plot = ["matplotlib"] + [[package]] name = "pre-commit" version = "2.21.0" @@ -2884,7 +3659,7 @@ files = [ name = "pycparser" version = "2.22" description = "C parser in Python" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, @@ -3041,11 +3816,43 @@ dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pyte docs = ["sphinx", "sphinx-rtd-theme", "zope.interface"] tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] +[[package]] +name = "pynndescent" +version = "0.5.13" +description = "Nearest Neighbor Descent" +optional = true +python-versions = "*" +files = [ + {file = "pynndescent-0.5.13-py3-none-any.whl", hash = "sha256:69aabb8f394bc631b6ac475a1c7f3994c54adf3f51cd63b2730fefba5771b949"}, + {file = "pynndescent-0.5.13.tar.gz", hash = "sha256:d74254c0ee0a1eeec84597d5fe89fedcf778593eeabe32c2f97412934a9800fb"}, +] + +[package.dependencies] +joblib = ">=0.11" +llvmlite = ">=0.30" +numba = ">=0.51.2" +scikit-learn = ">=0.18" +scipy = ">=1.0" + +[[package]] +name = "pyparsing" +version = "3.1.4" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +optional = true +python-versions = ">=3.6.8" +files = [ + {file = "pyparsing-3.1.4-py3-none-any.whl", hash = "sha256:a6a7ee4235a3f944aa1fa2249307708f893fe5717dc603503c6c7969c070fb7c"}, + {file = "pyparsing-3.1.4.tar.gz", hash = "sha256:f86ec8d1a83f11977c9a6ea7598e8c27fc5cddfa5b07ea2241edbbde1d7bc032"}, +] + +[package.extras] +diagrams = ["jinja2", "railroad-diagrams"] + [[package]] name = "pypdf" version = "4.3.1" description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" -optional = true +optional = false python-versions = ">=3.6" files = [ {file = "pypdf-4.3.1-py3-none-any.whl", hash = "sha256:64b31da97eda0771ef22edb1bfecd5deee4b72c3d1736b7df2689805076d6418"}, @@ -3300,38 +4107,20 @@ files = [ [[package]] name = "realtime" -version = "2.0.2" +version = "2.0.5" description = "" optional = true python-versions = "<4.0,>=3.9" files = [ - {file = "realtime-2.0.2-py3-none-any.whl", hash = "sha256:2634c915bc38807f2013f21e8bcc4d2f79870dfd81460ddb9393883d0489928a"}, - {file = "realtime-2.0.2.tar.gz", hash = "sha256:519da9325b3b8102139d51785013d592f6b2403d81fa21d838a0b0234723ed7d"}, + {file = "realtime-2.0.5-py3-none-any.whl", hash = "sha256:f9ec2d762794709e37a8e2745c8dfd86eac4870678808f09676c8f2b7bfa6bbc"}, + {file = "realtime-2.0.5.tar.gz", hash = "sha256:133828fbc2cc2325fb015fe071c6da9fb488819cac96d85ed297045c715b35f5"}, ] [package.dependencies] -aiohttp = ">=3.10.2,<4.0.0" +aiohttp = ">=3.10.6,<4.0.0" python-dateutil = ">=2.8.1,<3.0.0" typing-extensions = ">=4.12.2,<5.0.0" -websockets = ">=11,<13" - -[[package]] -name = "redis" -version = "5.0.8" -description = "Python client for Redis database and key-value store" -optional = true -python-versions = ">=3.7" -files = [ - {file = "redis-5.0.8-py3-none-any.whl", hash = "sha256:56134ee08ea909106090934adc36f65c9bcbbaecea5b21ba704ba6fb561f8eb4"}, - {file = "redis-5.0.8.tar.gz", hash = "sha256:0c5b10d387568dfe0698c6fad6615750c24170e548ca2deac10c649d463e9870"}, -] - -[package.dependencies] -async-timeout = {version = ">=4.0.3", markers = "python_full_version < \"3.11.3\""} - -[package.extras] -hiredis = ["hiredis (>1.0.0)"] -ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)"] +websockets = ">=11,<14" [[package]] name = "referencing" @@ -3476,7 +4265,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "requests-toolbelt" version = "1.0.0" description = "A utility belt for advanced users of python-requests" -optional = true +optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, @@ -3615,6 +4404,114 @@ botocore = ">=1.33.2,<2.0a.0" [package.extras] crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] +[[package]] +name = "scikit-learn" +version = "1.5.2" +description = "A set of python modules for machine learning and data mining" +optional = true +python-versions = ">=3.9" +files = [ + {file = "scikit_learn-1.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:299406827fb9a4f862626d0fe6c122f5f87f8910b86fe5daa4c32dcd742139b6"}, + {file = "scikit_learn-1.5.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:2d4cad1119c77930b235579ad0dc25e65c917e756fe80cab96aa3b9428bd3fb0"}, + {file = "scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c412ccc2ad9bf3755915e3908e677b367ebc8d010acbb3f182814524f2e5540"}, + {file = "scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a686885a4b3818d9e62904d91b57fa757fc2bed3e465c8b177be652f4dd37c8"}, + {file = "scikit_learn-1.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:c15b1ca23d7c5f33cc2cb0a0d6aaacf893792271cddff0edbd6a40e8319bc113"}, + {file = "scikit_learn-1.5.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:03b6158efa3faaf1feea3faa884c840ebd61b6484167c711548fce208ea09445"}, + {file = "scikit_learn-1.5.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1ff45e26928d3b4eb767a8f14a9a6efbf1cbff7c05d1fb0f95f211a89fd4f5de"}, + {file = "scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f763897fe92d0e903aa4847b0aec0e68cadfff77e8a0687cabd946c89d17e675"}, + {file = "scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8b0ccd4a902836493e026c03256e8b206656f91fbcc4fde28c57a5b752561f1"}, + {file = "scikit_learn-1.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:6c16d84a0d45e4894832b3c4d0bf73050939e21b99b01b6fd59cbb0cf39163b6"}, + {file = "scikit_learn-1.5.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f932a02c3f4956dfb981391ab24bda1dbd90fe3d628e4b42caef3e041c67707a"}, + {file = "scikit_learn-1.5.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3b923d119d65b7bd555c73be5423bf06c0105678ce7e1f558cb4b40b0a5502b1"}, + {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"}, + {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"}, + {file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"}, + {file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"}, + {file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"}, + {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"}, + {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca64b3089a6d9b9363cd3546f8978229dcbb737aceb2c12144ee3f70f95684b7"}, + {file = "scikit_learn-1.5.2-cp39-cp39-win_amd64.whl", hash = "sha256:3bed4909ba187aca80580fe2ef370d9180dcf18e621a27c4cf2ef10d279a7efe"}, + {file = "scikit_learn-1.5.2.tar.gz", hash = "sha256:b4237ed7b3fdd0a4882792e68ef2545d5baa50aca3bb45aa7df468138ad8f94d"}, +] + +[package.dependencies] +joblib = ">=1.2.0" +numpy = ">=1.19.5" +scipy = ">=1.6.0" +threadpoolctl = ">=3.1.0" + +[package.extras] +benchmark = ["matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "pandas (>=1.1.5)"] +build = ["cython (>=3.0.10)", "meson-python (>=0.16.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "polars (>=0.20.30)", "pooch (>=1.6.0)", "pydata-sphinx-theme (>=0.15.3)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=7.3.7)", "sphinx-copybutton (>=0.5.2)", "sphinx-design (>=0.5.0)", "sphinx-design (>=0.6.0)", "sphinx-gallery (>=0.16.0)", "sphinx-prompt (>=1.4.0)", "sphinx-remove-toctrees (>=1.0.0.post1)", "sphinxcontrib-sass (>=0.3.4)", "sphinxext-opengraph (>=0.9.1)"] +examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"] +install = ["joblib (>=1.2.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)", "threadpoolctl (>=3.1.0)"] +maintenance = ["conda-lock (==2.5.6)"] +tests = ["black (>=24.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.9)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.20.30)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.2.1)", "scikit-image (>=0.17.2)"] + +[[package]] +name = "scipy" +version = "1.12.0" +description = "Fundamental algorithms for scientific computing in Python" +optional = true +python-versions = ">=3.9" +files = [ + {file = "scipy-1.12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:78e4402e140879387187f7f25d91cc592b3501a2e51dfb320f48dfb73565f10b"}, + {file = "scipy-1.12.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f5f00ebaf8de24d14b8449981a2842d404152774c1a1d880c901bf454cb8e2a1"}, + {file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e53958531a7c695ff66c2e7bb7b79560ffdc562e2051644c5576c39ff8efb563"}, + {file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e32847e08da8d895ce09d108a494d9eb78974cf6de23063f93306a3e419960c"}, + {file = "scipy-1.12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4c1020cad92772bf44b8e4cdabc1df5d87376cb219742549ef69fc9fd86282dd"}, + {file = "scipy-1.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:75ea2a144096b5e39402e2ff53a36fecfd3b960d786b7efd3c180e29c39e53f2"}, + {file = "scipy-1.12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:408c68423f9de16cb9e602528be4ce0d6312b05001f3de61fe9ec8b1263cad08"}, + {file = "scipy-1.12.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5adfad5dbf0163397beb4aca679187d24aec085343755fcdbdeb32b3679f254c"}, + {file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3003652496f6e7c387b1cf63f4bb720951cfa18907e998ea551e6de51a04467"}, + {file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b8066bce124ee5531d12a74b617d9ac0ea59245246410e19bca549656d9a40a"}, + {file = "scipy-1.12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8bee4993817e204d761dba10dbab0774ba5a8612e57e81319ea04d84945375ba"}, + {file = "scipy-1.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:a24024d45ce9a675c1fb8494e8e5244efea1c7a09c60beb1eeb80373d0fecc70"}, + {file = "scipy-1.12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e7e76cc48638228212c747ada851ef355c2bb5e7f939e10952bc504c11f4e372"}, + {file = "scipy-1.12.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:f7ce148dffcd64ade37b2df9315541f9adad6efcaa86866ee7dd5db0c8f041c3"}, + {file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c39f92041f490422924dfdb782527a4abddf4707616e07b021de33467f917bc"}, + {file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7ebda398f86e56178c2fa94cad15bf457a218a54a35c2a7b4490b9f9cb2676c"}, + {file = "scipy-1.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:95e5c750d55cf518c398a8240571b0e0782c2d5a703250872f36eaf737751338"}, + {file = "scipy-1.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e646d8571804a304e1da01040d21577685ce8e2db08ac58e543eaca063453e1c"}, + {file = "scipy-1.12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:913d6e7956c3a671de3b05ccb66b11bc293f56bfdef040583a7221d9e22a2e35"}, + {file = "scipy-1.12.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:bba1b0c7256ad75401c73e4b3cf09d1f176e9bd4248f0d3112170fb2ec4db067"}, + {file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:730badef9b827b368f351eacae2e82da414e13cf8bd5051b4bdfd720271a5371"}, + {file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6546dc2c11a9df6926afcbdd8a3edec28566e4e785b915e849348c6dd9f3f490"}, + {file = "scipy-1.12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:196ebad3a4882081f62a5bf4aeb7326aa34b110e533aab23e4374fcccb0890dc"}, + {file = "scipy-1.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:b360f1b6b2f742781299514e99ff560d1fe9bd1bff2712894b52abe528d1fd1e"}, + {file = "scipy-1.12.0.tar.gz", hash = "sha256:4bf5abab8a36d20193c698b0f1fc282c1d083c94723902c447e5d2f1780936a3"}, +] + +[package.dependencies] +numpy = ">=1.22.4,<1.29.0" + +[package.extras] +dev = ["click", "cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pydevtool", "rich-click", "ruff", "types-psutil", "typing_extensions"] +doc = ["jupytext", "matplotlib (>2)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-design (>=0.2.0)"] +test = ["asv", "gmpy2", "hypothesis", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + +[[package]] +name = "seaborn" +version = "0.13.2" +description = "Statistical data visualization" +optional = true +python-versions = ">=3.8" +files = [ + {file = "seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987"}, + {file = "seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7"}, +] + +[package.dependencies] +matplotlib = ">=3.4,<3.6.1 || >3.6.1" +numpy = ">=1.20,<1.24.0 || >1.24.0" +pandas = ">=1.2" + +[package.extras] +dev = ["flake8", "flit", "mypy", "pandas-stubs", "pre-commit", "pytest", "pytest-cov", "pytest-xdist"] +docs = ["ipykernel", "nbconvert", "numpydoc", "pydata_sphinx_theme (==0.10.0rc2)", "pyyaml", "sphinx (<6.0.0)", "sphinx-copybutton", "sphinx-design", "sphinx-issues"] +stats = ["scipy (>=1.7)", "statsmodels (>=0.12)"] + [[package]] name = "setuptools" version = "75.1.0" @@ -3646,6 +4543,31 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "smart-open" +version = "7.0.4" +description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)" +optional = true +python-versions = "<4.0,>=3.7" +files = [ + {file = "smart_open-7.0.4-py3-none-any.whl", hash = "sha256:4e98489932b3372595cddc075e6033194775165702887216b65eba760dfd8d47"}, + {file = "smart_open-7.0.4.tar.gz", hash = "sha256:62b65852bdd1d1d516839fcb1f6bc50cd0f16e05b4ec44b52f43d38bcb838524"}, +] + +[package.dependencies] +wrapt = "*" + +[package.extras] +all = ["azure-common", "azure-core", "azure-storage-blob", "boto3", "google-cloud-storage (>=2.6.0)", "paramiko", "requests", "zstandard"] +azure = ["azure-common", "azure-core", "azure-storage-blob"] +gcs = ["google-cloud-storage (>=2.6.0)"] +http = ["requests"] +s3 = ["boto3"] +ssh = ["paramiko"] +test = ["azure-common", "azure-core", "azure-storage-blob", "boto3", "google-cloud-storage (>=2.6.0)", "moto[server]", "paramiko", "pytest", "pytest-rerunfailures", "requests", "responses", "zstandard"] +webhdfs = ["requests"] +zst = ["zstandard"] + [[package]] name = "sniffio" version = "1.3.1" @@ -3661,7 +4583,7 @@ files = [ name = "soupsieve" version = "2.6" description = "A modern CSS selector implementation for Beautiful Soup." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"}, @@ -3772,15 +4694,66 @@ anyio = ">=3.4.0,<5" [package.extras] full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7)", "pyyaml"] +[[package]] +name = "statsmodels" +version = "0.14.3" +description = "Statistical computations and models for Python" +optional = true +python-versions = ">=3.9" +files = [ + {file = "statsmodels-0.14.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7372c92f18b8afb06355e067285abb94e8b214afd9f2fda6d3c26f3ea004cbdf"}, + {file = "statsmodels-0.14.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:42459cdaafe217f455e6b95c05d9e089caf02dd53295aebe63bc1e0206f83176"}, + {file = "statsmodels-0.14.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a72d3d9fe61f70baf18667bc9cf2e68b6bdd8f5cce4f7b21f9e662e19d2ffdf"}, + {file = "statsmodels-0.14.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9050e5817f23a5adcb87822406b5260758795c42c41fa2fa60816023f0a0d8ef"}, + {file = "statsmodels-0.14.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f12d74743936323165dae648f75193ee4a47381a85610be661d34de56c7634e0"}, + {file = "statsmodels-0.14.3-cp310-cp310-win_amd64.whl", hash = "sha256:53212f597747534bed475bbd89f4bc39a3757c20692bb7664021e30fbd967c53"}, + {file = "statsmodels-0.14.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e49a63757e12269ef02841f05906e91bdb70f5bc358cbaca97f171f4a4de09c4"}, + {file = "statsmodels-0.14.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:de4b989f0fea684f89bdf5ff641f9acb7acddfd712459f28365904a974afaeff"}, + {file = "statsmodels-0.14.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45a5ae177e92348532bf2522f27feecd0589b88b243709b28e2b068631c9c181"}, + {file = "statsmodels-0.14.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a736ac24da1388e444bb2b0d381a7307b29074b237acef040a793cfdd508e160"}, + {file = "statsmodels-0.14.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ea8491b6a36fca738403037709e9469412a9d3e8a8e54db482c20e8dd70efa1f"}, + {file = "statsmodels-0.14.3-cp311-cp311-win_amd64.whl", hash = "sha256:efb946ced8243923eb78909834699be55442172cea3dc37158e3e1c5370e4189"}, + {file = "statsmodels-0.14.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9bf3690f71ebacff0c976c1584994174bc1bb72785b5a35645b385a00a5107e0"}, + {file = "statsmodels-0.14.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:197bcb1aeaaa5c7e9ba4ad87c2369f9600c6cd69d6e2db829eb46d3d9fe534c9"}, + {file = "statsmodels-0.14.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:492b8fd867687f9539b1f7f111dafb2464e04f65fa834585c08725b8aa1a3d98"}, + {file = "statsmodels-0.14.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a849e78dcb3ed6416bb9043b9549415f1f8cd00426deb467ff4dfe0acbaaad8e"}, + {file = "statsmodels-0.14.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8a82aa8a99a428f39a9ead1b03fbd2339e40908412371abe089239d21467fd5"}, + {file = "statsmodels-0.14.3-cp312-cp312-win_amd64.whl", hash = "sha256:5724e51a370227655679f1a487f429919f03de325d7b5702e919526353d0cb1d"}, + {file = "statsmodels-0.14.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:78f579f8416b91b971fb0f27e18c3dec6946b4471ac2456a98dbfd24c72d180c"}, + {file = "statsmodels-0.14.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:eb84759e3c1a7b77cae4e7dfdc2ea09b1f1790446fd8476951680eb79e4a568d"}, + {file = "statsmodels-0.14.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7681296373de33d775b01201c51e340d01afb70c6a5ac9b7c66a9e120564967"}, + {file = "statsmodels-0.14.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:988346db6319f0c12e6137df674e10ebf551adb42445e05eea2e1d900898f670"}, + {file = "statsmodels-0.14.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c69b82b4f2a794199d1100ab4406f761516f71826856fa6bfc474a3189b77785"}, + {file = "statsmodels-0.14.3-cp313-cp313-win_amd64.whl", hash = "sha256:5114e5c0f10ce30616ef4a91dc24e66e1033c242589208e604d80a7931537f12"}, + {file = "statsmodels-0.14.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:280e69721925a936493153dba692b53a2fe4e3f46e5fafd32a453f5d9fa2a344"}, + {file = "statsmodels-0.14.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:97f28958e456aea788d4ffd83d7ade82d2a4a3bd5c7e8eabf791f224cddef2bf"}, + {file = "statsmodels-0.14.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ef24d6350a15f5d25f7c6cb774fce89dff77e3687181ce4410cafd6a4004f04"}, + {file = "statsmodels-0.14.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ddbd07b7d05e16d1a2ea6df3d7e2255dfb3e0363b91d859623d9fc3aff32b4a"}, + {file = "statsmodels-0.14.3-cp39-cp39-win_amd64.whl", hash = "sha256:42dfb9084a5520342248441904357bd5d7fcf01ec05c9bdc7dd764a88e15a9c4"}, + {file = "statsmodels-0.14.3.tar.gz", hash = "sha256:ecf3502643fa93aabe5f0bdf238efb59609517c4d60a811632d31fcdce86c2d2"}, +] + +[package.dependencies] +numpy = ">=1.22.3,<3" +packaging = ">=21.3" +pandas = ">=1.4,<2.1.0 || >2.1.0" +patsy = ">=0.5.6" +scipy = ">=1.8,<1.9.2 || >1.9.2" + +[package.extras] +build = ["cython (>=3.0.10)"] +develop = ["colorama", "cython (>=3.0.10)", "cython (>=3.0.10,<4)", "flake8", "isort", "joblib", "matplotlib (>=3)", "pytest (>=7.3.0,<8)", "pytest-cov", "pytest-randomly", "pytest-xdist", "pywinpty", "setuptools-scm[toml] (>=8.0,<9.0)"] +docs = ["ipykernel", "jupyter-client", "matplotlib", "nbconvert", "nbformat", "numpydoc", "pandas-datareader", "sphinx"] + [[package]] name = "storage3" -version = "0.7.7" +version = "0.8.0" description = "Supabase Storage client for Python." optional = true python-versions = "<4.0,>=3.8" files = [ - {file = "storage3-0.7.7-py3-none-any.whl", hash = "sha256:ed80a2546cd0b5c22e2c30ea71096db6c99268daf2958c603488e7d72efb8426"}, - {file = "storage3-0.7.7.tar.gz", hash = "sha256:9fba680cf761d139ad764f43f0e91c245d1ce1af2cc3afe716652f835f48f83e"}, + {file = "storage3-0.8.0-py3-none-any.whl", hash = "sha256:a1aa28a6eb685b8158f2de26589cd216fcd18328c5762d21159a4545e8cf6709"}, + {file = "storage3-0.8.0.tar.gz", hash = "sha256:cdb5af60ff240a8c2ef83cecdb5816042ce4a9162803456bcd1bca075b4c82df"}, ] [package.dependencies] @@ -3806,32 +4779,33 @@ test = ["pylint", "pytest", "pytest-black", "pytest-cov", "pytest-pylint"] [[package]] name = "supabase" -version = "2.7.4" +version = "2.8.1" description = "Supabase client for Python." optional = true python-versions = "<4.0,>=3.9" files = [ - {file = "supabase-2.7.4-py3-none-any.whl", hash = "sha256:01815fbc30cac753933d4a44a2529fd13cb7634b56c705c65b12a02c8e75982b"}, - {file = "supabase-2.7.4.tar.gz", hash = "sha256:5a979c7711b3c5ce688514fa0afc015780522569494e1a9a9d25d03b7c3d654b"}, + {file = "supabase-2.8.1-py3-none-any.whl", hash = "sha256:dfa8bef89b54129093521d5bba2136ff765baf67cd76d8ad0aa4984d61a7815c"}, + {file = "supabase-2.8.1.tar.gz", hash = "sha256:711c70e6acd9e2ff48ca0dc0b1bb70c01c25378cc5189ec9f5ed9655b30bc41d"}, ] [package.dependencies] -gotrue = ">=1.3,<3.0" +gotrue = ">=2.7.0,<3.0.0" httpx = ">=0.24,<0.28" -postgrest = ">=0.14,<0.17.0" +postgrest = ">=0.17.0,<0.18.0" realtime = ">=2.0.0,<3.0.0" -storage3 = ">=0.5.3,<0.8.0" -supafunc = ">=0.3.1,<0.6.0" +storage3 = ">=0.8.0,<0.9.0" +supafunc = ">=0.6.0,<0.7.0" +typing-extensions = ">=4.12.2,<5.0.0" [[package]] name = "supafunc" -version = "0.5.1" +version = "0.6.0" description = "Library for Supabase Functions" optional = true python-versions = "<4.0,>=3.8" files = [ - {file = "supafunc-0.5.1-py3-none-any.whl", hash = "sha256:b05e99a2b41270211a3f90ec843c04c5f27a5618f2d2d2eb8e07f41eb962a910"}, - {file = "supafunc-0.5.1.tar.gz", hash = "sha256:1ae9dce6bd935939c561650e86abb676af9665ecf5d4ffc1c7ec3c4932c84334"}, + {file = "supafunc-0.6.0-py3-none-any.whl", hash = "sha256:6e7fbc9992c2722216e1799f314aacf4ac147628741ad7cdd5c7be367e21f619"}, + {file = "supafunc-0.6.0.tar.gz", hash = "sha256:2f7b538d52f268dc5cc7e25743085665242ee1cc8b2b2d867ff0bccfea2d1856"}, ] [package.dependencies] @@ -3866,6 +4840,17 @@ files = [ [package.extras] tests = ["pytest", "pytest-cov"] +[[package]] +name = "threadpoolctl" +version = "3.5.0" +description = "threadpoolctl" +optional = true +python-versions = ">=3.8" +files = [ + {file = "threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467"}, + {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"}, +] + [[package]] name = "tiktoken" version = "0.7.0" @@ -4117,7 +5102,7 @@ files = [ name = "typing-inspect" version = "0.9.0" description = "Runtime inspection utilities for typing module." -optional = true +optional = false python-versions = "*" files = [ {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, @@ -4156,11 +5141,35 @@ tzdata = {version = "*", markers = "platform_system == \"Windows\""} [package.extras] devenv = ["check-manifest", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3)", "zest.releaser"] +[[package]] +name = "umap-learn" +version = "0.5.6" +description = "Uniform Manifold Approximation and Projection" +optional = true +python-versions = "*" +files = [ + {file = "umap-learn-0.5.6.tar.gz", hash = "sha256:5b3917a862c23ba0fc83bfcd67a7b719dec85b3d9c01fdc7d894cce455df4e03"}, + {file = "umap_learn-0.5.6-py3-none-any.whl", hash = "sha256:881cc0c2ee845b790bf0455aa1664f9f68b838d9d0fe12a1291b85c5a559c913"}, +] + +[package.dependencies] +numba = ">=0.51.2" +numpy = ">=1.17" +pynndescent = ">=0.5" +scikit-learn = ">=0.22" +scipy = ">=1.3.1" +tqdm = "*" + +[package.extras] +parametric-umap = ["tensorflow (>=2.1)"] +plot = ["bokeh", "colorcet", "datashader", "holoviews", "matplotlib", "pandas", "scikit-image", "seaborn"] +tbb = ["tbb (>=2019.0)"] + [[package]] name = "unstructured-client" version = "0.25.9" description = "Python Client SDK for Unstructured API" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "unstructured-client-0.25.9.tar.gz", hash = "sha256:fcc461623f58fefb0e22508e28bf653a8f6934b9779cb4a90dd68d77a39fb5b2"}, @@ -4252,13 +5261,13 @@ text-embedding = ["sentence-transformers (==2.*)"] [[package]] name = "virtualenv" -version = "20.26.5" +version = "20.26.6" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.7" files = [ - {file = "virtualenv-20.26.5-py3-none-any.whl", hash = "sha256:4f3ac17b81fba3ce3bd6f4ead2749a72da5929c01774948e243db9ba41df4ff6"}, - {file = "virtualenv-20.26.5.tar.gz", hash = "sha256:ce489cac131aa58f4b25e321d6d186171f78e6cb13fafbf32a840cee67733ff4"}, + {file = "virtualenv-20.26.6-py3-none-any.whl", hash = "sha256:7345cc5b25405607a624d8418154577459c3e0277f5466dd79c49d5e492995f2"}, + {file = "virtualenv-20.26.6.tar.gz", hash = "sha256:280aede09a2a5c317e409a00102e7077c6432c5a38f0ef938e643805a7ad2c48"}, ] [package.dependencies] @@ -4272,83 +5281,97 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [[package]] name = "websockets" -version = "12.0" +version = "13.1" description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" optional = true python-versions = ">=3.8" files = [ - {file = "websockets-12.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d554236b2a2006e0ce16315c16eaa0d628dab009c33b63ea03f41c6107958374"}, - {file = "websockets-12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2d225bb6886591b1746b17c0573e29804619c8f755b5598d875bb4235ea639be"}, - {file = "websockets-12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eb809e816916a3b210bed3c82fb88eaf16e8afcf9c115ebb2bacede1797d2547"}, - {file = "websockets-12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c588f6abc13f78a67044c6b1273a99e1cf31038ad51815b3b016ce699f0d75c2"}, - {file = "websockets-12.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5aa9348186d79a5f232115ed3fa9020eab66d6c3437d72f9d2c8ac0c6858c558"}, - {file = "websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6350b14a40c95ddd53e775dbdbbbc59b124a5c8ecd6fbb09c2e52029f7a9f480"}, - {file = "websockets-12.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:70ec754cc2a769bcd218ed8d7209055667b30860ffecb8633a834dde27d6307c"}, - {file = "websockets-12.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6e96f5ed1b83a8ddb07909b45bd94833b0710f738115751cdaa9da1fb0cb66e8"}, - {file = "websockets-12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4d87be612cbef86f994178d5186add3d94e9f31cc3cb499a0482b866ec477603"}, - {file = "websockets-12.0-cp310-cp310-win32.whl", hash = "sha256:befe90632d66caaf72e8b2ed4d7f02b348913813c8b0a32fae1cc5fe3730902f"}, - {file = "websockets-12.0-cp310-cp310-win_amd64.whl", hash = "sha256:363f57ca8bc8576195d0540c648aa58ac18cf85b76ad5202b9f976918f4219cf"}, - {file = "websockets-12.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5d873c7de42dea355d73f170be0f23788cf3fa9f7bed718fd2830eefedce01b4"}, - {file = "websockets-12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3f61726cae9f65b872502ff3c1496abc93ffbe31b278455c418492016e2afc8f"}, - {file = "websockets-12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed2fcf7a07334c77fc8a230755c2209223a7cc44fc27597729b8ef5425aa61a3"}, - {file = "websockets-12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e332c210b14b57904869ca9f9bf4ca32f5427a03eeb625da9b616c85a3a506c"}, - {file = "websockets-12.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5693ef74233122f8ebab026817b1b37fe25c411ecfca084b29bc7d6efc548f45"}, - {file = "websockets-12.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e9e7db18b4539a29cc5ad8c8b252738a30e2b13f033c2d6e9d0549b45841c04"}, - {file = "websockets-12.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6e2df67b8014767d0f785baa98393725739287684b9f8d8a1001eb2839031447"}, - {file = "websockets-12.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bea88d71630c5900690fcb03161ab18f8f244805c59e2e0dc4ffadae0a7ee0ca"}, - {file = "websockets-12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dff6cdf35e31d1315790149fee351f9e52978130cef6c87c4b6c9b3baf78bc53"}, - {file = "websockets-12.0-cp311-cp311-win32.whl", hash = "sha256:3e3aa8c468af01d70332a382350ee95f6986db479ce7af14d5e81ec52aa2b402"}, - {file = "websockets-12.0-cp311-cp311-win_amd64.whl", hash = "sha256:25eb766c8ad27da0f79420b2af4b85d29914ba0edf69f547cc4f06ca6f1d403b"}, - {file = "websockets-12.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0e6e2711d5a8e6e482cacb927a49a3d432345dfe7dea8ace7b5790df5932e4df"}, - {file = "websockets-12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:dbcf72a37f0b3316e993e13ecf32f10c0e1259c28ffd0a85cee26e8549595fbc"}, - {file = "websockets-12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12743ab88ab2af1d17dd4acb4645677cb7063ef4db93abffbf164218a5d54c6b"}, - {file = "websockets-12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b645f491f3c48d3f8a00d1fce07445fab7347fec54a3e65f0725d730d5b99cb"}, - {file = "websockets-12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9893d1aa45a7f8b3bc4510f6ccf8db8c3b62120917af15e3de247f0780294b92"}, - {file = "websockets-12.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f38a7b376117ef7aff996e737583172bdf535932c9ca021746573bce40165ed"}, - {file = "websockets-12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f764ba54e33daf20e167915edc443b6f88956f37fb606449b4a5b10ba42235a5"}, - {file = "websockets-12.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:1e4b3f8ea6a9cfa8be8484c9221ec0257508e3a1ec43c36acdefb2a9c3b00aa2"}, - {file = "websockets-12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9fdf06fd06c32205a07e47328ab49c40fc1407cdec801d698a7c41167ea45113"}, - {file = "websockets-12.0-cp312-cp312-win32.whl", hash = "sha256:baa386875b70cbd81798fa9f71be689c1bf484f65fd6fb08d051a0ee4e79924d"}, - {file = "websockets-12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ae0a5da8f35a5be197f328d4727dbcfafa53d1824fac3d96cdd3a642fe09394f"}, - {file = "websockets-12.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5f6ffe2c6598f7f7207eef9a1228b6f5c818f9f4d53ee920aacd35cec8110438"}, - {file = "websockets-12.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9edf3fc590cc2ec20dc9d7a45108b5bbaf21c0d89f9fd3fd1685e223771dc0b2"}, - {file = "websockets-12.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8572132c7be52632201a35f5e08348137f658e5ffd21f51f94572ca6c05ea81d"}, - {file = "websockets-12.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:604428d1b87edbf02b233e2c207d7d528460fa978f9e391bd8aaf9c8311de137"}, - {file = "websockets-12.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1a9d160fd080c6285e202327aba140fc9a0d910b09e423afff4ae5cbbf1c7205"}, - {file = "websockets-12.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87b4aafed34653e465eb77b7c93ef058516cb5acf3eb21e42f33928616172def"}, - {file = "websockets-12.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b2ee7288b85959797970114deae81ab41b731f19ebcd3bd499ae9ca0e3f1d2c8"}, - {file = "websockets-12.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7fa3d25e81bfe6a89718e9791128398a50dec6d57faf23770787ff441d851967"}, - {file = "websockets-12.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a571f035a47212288e3b3519944f6bf4ac7bc7553243e41eac50dd48552b6df7"}, - {file = "websockets-12.0-cp38-cp38-win32.whl", hash = "sha256:3c6cc1360c10c17463aadd29dd3af332d4a1adaa8796f6b0e9f9df1fdb0bad62"}, - {file = "websockets-12.0-cp38-cp38-win_amd64.whl", hash = "sha256:1bf386089178ea69d720f8db6199a0504a406209a0fc23e603b27b300fdd6892"}, - {file = "websockets-12.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ab3d732ad50a4fbd04a4490ef08acd0517b6ae6b77eb967251f4c263011a990d"}, - {file = "websockets-12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1d9697f3337a89691e3bd8dc56dea45a6f6d975f92e7d5f773bc715c15dde28"}, - {file = "websockets-12.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1df2fbd2c8a98d38a66f5238484405b8d1d16f929bb7a33ed73e4801222a6f53"}, - {file = "websockets-12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23509452b3bc38e3a057382c2e941d5ac2e01e251acce7adc74011d7d8de434c"}, - {file = "websockets-12.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e5fc14ec6ea568200ea4ef46545073da81900a2b67b3e666f04adf53ad452ec"}, - {file = "websockets-12.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46e71dbbd12850224243f5d2aeec90f0aaa0f2dde5aeeb8fc8df21e04d99eff9"}, - {file = "websockets-12.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b81f90dcc6c85a9b7f29873beb56c94c85d6f0dac2ea8b60d995bd18bf3e2aae"}, - {file = "websockets-12.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a02413bc474feda2849c59ed2dfb2cddb4cd3d2f03a2fedec51d6e959d9b608b"}, - {file = "websockets-12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bbe6013f9f791944ed31ca08b077e26249309639313fff132bfbf3ba105673b9"}, - {file = "websockets-12.0-cp39-cp39-win32.whl", hash = "sha256:cbe83a6bbdf207ff0541de01e11904827540aa069293696dd528a6640bd6a5f6"}, - {file = "websockets-12.0-cp39-cp39-win_amd64.whl", hash = "sha256:fc4e7fa5414512b481a2483775a8e8be7803a35b30ca805afa4998a84f9fd9e8"}, - {file = "websockets-12.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:248d8e2446e13c1d4326e0a6a4e9629cb13a11195051a73acf414812700badbd"}, - {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44069528d45a933997a6fef143030d8ca8042f0dfaad753e2906398290e2870"}, - {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c4e37d36f0d19f0a4413d3e18c0d03d0c268ada2061868c1e6f5ab1a6d575077"}, - {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d829f975fc2e527a3ef2f9c8f25e553eb7bc779c6665e8e1d52aa22800bb38b"}, - {file = "websockets-12.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2c71bd45a777433dd9113847af751aae36e448bc6b8c361a566cb043eda6ec30"}, - {file = "websockets-12.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0bee75f400895aef54157b36ed6d3b308fcab62e5260703add87f44cee9c82a6"}, - {file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:423fc1ed29f7512fceb727e2d2aecb952c46aa34895e9ed96071821309951123"}, - {file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27a5e9964ef509016759f2ef3f2c1e13f403725a5e6a1775555994966a66e931"}, - {file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3181df4583c4d3994d31fb235dc681d2aaad744fbdbf94c4802485ececdecf2"}, - {file = "websockets-12.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:b067cb952ce8bf40115f6c19f478dc71c5e719b7fbaa511359795dfd9d1a6468"}, - {file = "websockets-12.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:00700340c6c7ab788f176d118775202aadea7602c5cc6be6ae127761c16d6b0b"}, - {file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e469d01137942849cff40517c97a30a93ae79917752b34029f0ec72df6b46399"}, - {file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffefa1374cd508d633646d51a8e9277763a9b78ae71324183693959cf94635a7"}, - {file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba0cab91b3956dfa9f512147860783a1829a8d905ee218a9837c18f683239611"}, - {file = "websockets-12.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2cb388a5bfb56df4d9a406783b7f9dbefb888c09b71629351cc6b036e9259370"}, - {file = "websockets-12.0-py3-none-any.whl", hash = "sha256:dc284bbc8d7c78a6c69e0c7325ab46ee5e40bb4d50e494d8131a07ef47500e9e"}, - {file = "websockets-12.0.tar.gz", hash = "sha256:81df9cbcbb6c260de1e007e58c011bfebe2dafc8435107b0537f393dd38c8b1b"}, + {file = "websockets-13.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f48c749857f8fb598fb890a75f540e3221d0976ed0bf879cf3c7eef34151acee"}, + {file = "websockets-13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c7e72ce6bda6fb9409cc1e8164dd41d7c91466fb599eb047cfda72fe758a34a7"}, + {file = "websockets-13.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f779498eeec470295a2b1a5d97aa1bc9814ecd25e1eb637bd9d1c73a327387f6"}, + {file = "websockets-13.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4676df3fe46956fbb0437d8800cd5f2b6d41143b6e7e842e60554398432cf29b"}, + {file = "websockets-13.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7affedeb43a70351bb811dadf49493c9cfd1ed94c9c70095fd177e9cc1541fa"}, + {file = "websockets-13.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1971e62d2caa443e57588e1d82d15f663b29ff9dfe7446d9964a4b6f12c1e700"}, + {file = "websockets-13.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5f2e75431f8dc4a47f31565a6e1355fb4f2ecaa99d6b89737527ea917066e26c"}, + {file = "websockets-13.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:58cf7e75dbf7e566088b07e36ea2e3e2bd5676e22216e4cad108d4df4a7402a0"}, + {file = "websockets-13.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c90d6dec6be2c7d03378a574de87af9b1efea77d0c52a8301dd831ece938452f"}, + {file = "websockets-13.1-cp310-cp310-win32.whl", hash = "sha256:730f42125ccb14602f455155084f978bd9e8e57e89b569b4d7f0f0c17a448ffe"}, + {file = "websockets-13.1-cp310-cp310-win_amd64.whl", hash = "sha256:5993260f483d05a9737073be197371940c01b257cc45ae3f1d5d7adb371b266a"}, + {file = "websockets-13.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:61fc0dfcda609cda0fc9fe7977694c0c59cf9d749fbb17f4e9483929e3c48a19"}, + {file = "websockets-13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ceec59f59d092c5007e815def4ebb80c2de330e9588e101cf8bd94c143ec78a5"}, + {file = "websockets-13.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c1dca61c6db1166c48b95198c0b7d9c990b30c756fc2923cc66f68d17dc558fd"}, + {file = "websockets-13.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:308e20f22c2c77f3f39caca508e765f8725020b84aa963474e18c59accbf4c02"}, + {file = "websockets-13.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62d516c325e6540e8a57b94abefc3459d7dab8ce52ac75c96cad5549e187e3a7"}, + {file = "websockets-13.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87c6e35319b46b99e168eb98472d6c7d8634ee37750d7693656dc766395df096"}, + {file = "websockets-13.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5f9fee94ebafbc3117c30be1844ed01a3b177bb6e39088bc6b2fa1dc15572084"}, + {file = "websockets-13.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7c1e90228c2f5cdde263253fa5db63e6653f1c00e7ec64108065a0b9713fa1b3"}, + {file = "websockets-13.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6548f29b0e401eea2b967b2fdc1c7c7b5ebb3eeb470ed23a54cd45ef078a0db9"}, + {file = "websockets-13.1-cp311-cp311-win32.whl", hash = "sha256:c11d4d16e133f6df8916cc5b7e3e96ee4c44c936717d684a94f48f82edb7c92f"}, + {file = "websockets-13.1-cp311-cp311-win_amd64.whl", hash = "sha256:d04f13a1d75cb2b8382bdc16ae6fa58c97337253826dfe136195b7f89f661557"}, + {file = "websockets-13.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:9d75baf00138f80b48f1eac72ad1535aac0b6461265a0bcad391fc5aba875cfc"}, + {file = "websockets-13.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9b6f347deb3dcfbfde1c20baa21c2ac0751afaa73e64e5b693bb2b848efeaa49"}, + {file = "websockets-13.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de58647e3f9c42f13f90ac7e5f58900c80a39019848c5547bc691693098ae1bd"}, + {file = "websockets-13.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1b54689e38d1279a51d11e3467dd2f3a50f5f2e879012ce8f2d6943f00e83f0"}, + {file = "websockets-13.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf1781ef73c073e6b0f90af841aaf98501f975d306bbf6221683dd594ccc52b6"}, + {file = "websockets-13.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d23b88b9388ed85c6faf0e74d8dec4f4d3baf3ecf20a65a47b836d56260d4b9"}, + {file = "websockets-13.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3c78383585f47ccb0fcf186dcb8a43f5438bd7d8f47d69e0b56f71bf431a0a68"}, + {file = "websockets-13.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d6d300f8ec35c24025ceb9b9019ae9040c1ab2f01cddc2bcc0b518af31c75c14"}, + {file = "websockets-13.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a9dcaf8b0cc72a392760bb8755922c03e17a5a54e08cca58e8b74f6902b433cf"}, + {file = "websockets-13.1-cp312-cp312-win32.whl", hash = "sha256:2f85cf4f2a1ba8f602298a853cec8526c2ca42a9a4b947ec236eaedb8f2dc80c"}, + {file = "websockets-13.1-cp312-cp312-win_amd64.whl", hash = "sha256:38377f8b0cdeee97c552d20cf1865695fcd56aba155ad1b4ca8779a5b6ef4ac3"}, + {file = "websockets-13.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a9ab1e71d3d2e54a0aa646ab6d4eebfaa5f416fe78dfe4da2839525dc5d765c6"}, + {file = "websockets-13.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b9d7439d7fab4dce00570bb906875734df13d9faa4b48e261c440a5fec6d9708"}, + {file = "websockets-13.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:327b74e915cf13c5931334c61e1a41040e365d380f812513a255aa804b183418"}, + {file = "websockets-13.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:325b1ccdbf5e5725fdcb1b0e9ad4d2545056479d0eee392c291c1bf76206435a"}, + {file = "websockets-13.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:346bee67a65f189e0e33f520f253d5147ab76ae42493804319b5716e46dddf0f"}, + {file = "websockets-13.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91a0fa841646320ec0d3accdff5b757b06e2e5c86ba32af2e0815c96c7a603c5"}, + {file = "websockets-13.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:18503d2c5f3943e93819238bf20df71982d193f73dcecd26c94514f417f6b135"}, + {file = "websockets-13.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:a9cd1af7e18e5221d2878378fbc287a14cd527fdd5939ed56a18df8a31136bb2"}, + {file = "websockets-13.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:70c5be9f416aa72aab7a2a76c90ae0a4fe2755c1816c153c1a2bcc3333ce4ce6"}, + {file = "websockets-13.1-cp313-cp313-win32.whl", hash = "sha256:624459daabeb310d3815b276c1adef475b3e6804abaf2d9d2c061c319f7f187d"}, + {file = "websockets-13.1-cp313-cp313-win_amd64.whl", hash = "sha256:c518e84bb59c2baae725accd355c8dc517b4a3ed8db88b4bc93c78dae2974bf2"}, + {file = "websockets-13.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c7934fd0e920e70468e676fe7f1b7261c1efa0d6c037c6722278ca0228ad9d0d"}, + {file = "websockets-13.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:149e622dc48c10ccc3d2760e5f36753db9cacf3ad7bc7bbbfd7d9c819e286f23"}, + {file = "websockets-13.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a569eb1b05d72f9bce2ebd28a1ce2054311b66677fcd46cf36204ad23acead8c"}, + {file = "websockets-13.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95df24ca1e1bd93bbca51d94dd049a984609687cb2fb08a7f2c56ac84e9816ea"}, + {file = "websockets-13.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d8dbb1bf0c0a4ae8b40bdc9be7f644e2f3fb4e8a9aca7145bfa510d4a374eeb7"}, + {file = "websockets-13.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:035233b7531fb92a76beefcbf479504db8c72eb3bff41da55aecce3a0f729e54"}, + {file = "websockets-13.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:e4450fc83a3df53dec45922b576e91e94f5578d06436871dce3a6be38e40f5db"}, + {file = "websockets-13.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:463e1c6ec853202dd3657f156123d6b4dad0c546ea2e2e38be2b3f7c5b8e7295"}, + {file = "websockets-13.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:6d6855bbe70119872c05107e38fbc7f96b1d8cb047d95c2c50869a46c65a8e96"}, + {file = "websockets-13.1-cp38-cp38-win32.whl", hash = "sha256:204e5107f43095012b00f1451374693267adbb832d29966a01ecc4ce1db26faf"}, + {file = "websockets-13.1-cp38-cp38-win_amd64.whl", hash = "sha256:485307243237328c022bc908b90e4457d0daa8b5cf4b3723fd3c4a8012fce4c6"}, + {file = "websockets-13.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9b37c184f8b976f0c0a231a5f3d6efe10807d41ccbe4488df8c74174805eea7d"}, + {file = "websockets-13.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:163e7277e1a0bd9fb3c8842a71661ad19c6aa7bb3d6678dc7f89b17fbcc4aeb7"}, + {file = "websockets-13.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4b889dbd1342820cc210ba44307cf75ae5f2f96226c0038094455a96e64fb07a"}, + {file = "websockets-13.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:586a356928692c1fed0eca68b4d1c2cbbd1ca2acf2ac7e7ebd3b9052582deefa"}, + {file = "websockets-13.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7bd6abf1e070a6b72bfeb71049d6ad286852e285f146682bf30d0296f5fbadfa"}, + {file = "websockets-13.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2aad13a200e5934f5a6767492fb07151e1de1d6079c003ab31e1823733ae79"}, + {file = "websockets-13.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:df01aea34b6e9e33572c35cd16bae5a47785e7d5c8cb2b54b2acdb9678315a17"}, + {file = "websockets-13.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e54affdeb21026329fb0744ad187cf812f7d3c2aa702a5edb562b325191fcab6"}, + {file = "websockets-13.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:9ef8aa8bdbac47f4968a5d66462a2a0935d044bf35c0e5a8af152d58516dbeb5"}, + {file = "websockets-13.1-cp39-cp39-win32.whl", hash = "sha256:deeb929efe52bed518f6eb2ddc00cc496366a14c726005726ad62c2dd9017a3c"}, + {file = "websockets-13.1-cp39-cp39-win_amd64.whl", hash = "sha256:7c65ffa900e7cc958cd088b9a9157a8141c991f8c53d11087e6fb7277a03f81d"}, + {file = "websockets-13.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5dd6da9bec02735931fccec99d97c29f47cc61f644264eb995ad6c0c27667238"}, + {file = "websockets-13.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:2510c09d8e8df777177ee3d40cd35450dc169a81e747455cc4197e63f7e7bfe5"}, + {file = "websockets-13.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1c3cf67185543730888b20682fb186fc8d0fa6f07ccc3ef4390831ab4b388d9"}, + {file = "websockets-13.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bcc03c8b72267e97b49149e4863d57c2d77f13fae12066622dc78fe322490fe6"}, + {file = "websockets-13.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:004280a140f220c812e65f36944a9ca92d766b6cc4560be652a0a3883a79ed8a"}, + {file = "websockets-13.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:e2620453c075abeb0daa949a292e19f56de518988e079c36478bacf9546ced23"}, + {file = "websockets-13.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9156c45750b37337f7b0b00e6248991a047be4aa44554c9886fe6bdd605aab3b"}, + {file = "websockets-13.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:80c421e07973a89fbdd93e6f2003c17d20b69010458d3a8e37fb47874bd67d51"}, + {file = "websockets-13.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82d0ba76371769d6a4e56f7e83bb8e81846d17a6190971e38b5de108bde9b0d7"}, + {file = "websockets-13.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9875a0143f07d74dc5e1ded1c4581f0d9f7ab86c78994e2ed9e95050073c94d"}, + {file = "websockets-13.1-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a11e38ad8922c7961447f35c7b17bffa15de4d17c70abd07bfbe12d6faa3e027"}, + {file = "websockets-13.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:4059f790b6ae8768471cddb65d3c4fe4792b0ab48e154c9f0a04cefaabcd5978"}, + {file = "websockets-13.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:25c35bf84bf7c7369d247f0b8cfa157f989862c49104c5cf85cb5436a641d93e"}, + {file = "websockets-13.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:83f91d8a9bb404b8c2c41a707ac7f7f75b9442a0a876df295de27251a856ad09"}, + {file = "websockets-13.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a43cfdcddd07f4ca2b1afb459824dd3c6d53a51410636a2c7fc97b9a8cf4842"}, + {file = "websockets-13.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:48a2ef1381632a2f0cb4efeff34efa97901c9fbc118e01951ad7cfc10601a9bb"}, + {file = "websockets-13.1-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:459bf774c754c35dbb487360b12c5727adab887f1622b8aed5755880a21c4a20"}, + {file = "websockets-13.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:95858ca14a9f6fa8413d29e0a585b31b278388aa775b8a81fa24830123874678"}, + {file = "websockets-13.1-py3-none-any.whl", hash = "sha256:a9a396a6ad26130cdae92ae10c36af09d9bfe6cafe69670fd3b6da9b07b4044f"}, + {file = "websockets-13.1.tar.gz", hash = "sha256:a3b3366087c1bc0a2795111edcadddb8b3b59509d5db5d7ea3fdd69f954a8878"}, ] [[package]] @@ -4457,103 +5480,103 @@ files = [ [[package]] name = "yarl" -version = "1.12.1" +version = "1.13.1" description = "Yet another URL library" optional = true python-versions = ">=3.8" files = [ - {file = "yarl-1.12.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:64c5b0f2b937fe40d0967516eee5504b23cb247b8b7ffeba7213a467d9646fdc"}, - {file = "yarl-1.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2e430ac432f969ef21770645743611c1618362309e3ad7cab45acd1ad1a540ff"}, - {file = "yarl-1.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3e26e64f42bce5ddf9002092b2c37b13071c2e6413d5c05f9fa9de58ed2f7749"}, - {file = "yarl-1.12.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0103c52f8dfe5d573c856322149ddcd6d28f51b4d4a3ee5c4b3c1b0a05c3d034"}, - {file = "yarl-1.12.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b63465b53baeaf2122a337d4ab57d6bbdd09fcadceb17a974cfa8a0300ad9c67"}, - {file = "yarl-1.12.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17d4dc4ff47893a06737b8788ed2ba2f5ac4e8bb40281c8603920f7d011d5bdd"}, - {file = "yarl-1.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8b54949267bd5704324397efe9fbb6aa306466dee067550964e994d309db5f1"}, - {file = "yarl-1.12.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:10b690cd78cbaca2f96a7462f303fdd2b596d3978b49892e4b05a7567c591572"}, - {file = "yarl-1.12.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c85ab016e96a975afbdb9d49ca90f3bca9920ef27c64300843fe91c3d59d8d20"}, - {file = "yarl-1.12.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:c1caa5763d1770216596e0a71b5567f27aac28c95992110212c108ec74589a48"}, - {file = "yarl-1.12.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:595bbcdbfc4a9c6989d7489dca8510cba053ff46b16c84ffd95ac8e90711d419"}, - {file = "yarl-1.12.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e64f0421892a207d3780903085c1b04efeb53b16803b23d947de5a7261b71355"}, - {file = "yarl-1.12.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:319c206e83e46ec2421b25b300c8482b6fe8a018baca246be308c736d9dab267"}, - {file = "yarl-1.12.1-cp310-cp310-win32.whl", hash = "sha256:da045bd1147d12bd43fb032296640a7cc17a7f2eaba67495988362e99db24fd2"}, - {file = "yarl-1.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:aebbd47df77190ada603157f0b3670d578c110c31746ecc5875c394fdcc59a99"}, - {file = "yarl-1.12.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:28389a68981676bf74e2e199fe42f35d1aa27a9c98e3a03e6f58d2d3d054afe1"}, - {file = "yarl-1.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f736f54565f8dd7e3ab664fef2bc461d7593a389a7f28d4904af8d55a91bd55f"}, - {file = "yarl-1.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6dee0496d5f1a8f57f0f28a16f81a2033fc057a2cf9cd710742d11828f8c80e2"}, - {file = "yarl-1.12.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8981a94a27ac520a398302afb74ae2c0be1c3d2d215c75c582186a006c9e7b0"}, - {file = "yarl-1.12.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ff54340fc1129e8e181827e2234af3ff659b4f17d9bbe77f43bc19e6577fadec"}, - {file = "yarl-1.12.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:54c8cee662b5f8c30ad7eedfc26123f845f007798e4ff1001d9528fe959fd23c"}, - {file = "yarl-1.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e97a29b37830ba1262d8dfd48ddb5b28ad4d3ebecc5d93a9c7591d98641ec737"}, - {file = "yarl-1.12.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c89894cc6f6ddd993813e79244b36b215c14f65f9e4f1660b1f2ba9e5594b95"}, - {file = "yarl-1.12.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:712ba8722c0699daf186de089ddc4677651eb9875ed7447b2ad50697522cbdd9"}, - {file = "yarl-1.12.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:6e9a9f50892153bad5046c2a6df153224aa6f0573a5a8ab44fc54a1e886f6e21"}, - {file = "yarl-1.12.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1d4017e78fb22bc797c089b746230ad78ecd3cdb215bc0bd61cb72b5867da57e"}, - {file = "yarl-1.12.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:f494c01b28645c431239863cb17af8b8d15b93b0d697a0320d5dd34cd9d7c2fa"}, - {file = "yarl-1.12.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:de4544b1fb29cf14870c4e2b8a897c0242449f5dcebd3e0366aa0aa3cf58a23a"}, - {file = "yarl-1.12.1-cp311-cp311-win32.whl", hash = "sha256:7564525a4673fde53dee7d4c307a961c0951918f0b8c7f09b2c9e02067cf6504"}, - {file = "yarl-1.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:f23bb1a7a6e8e8b612a164fdd08e683bcc16c76f928d6dbb7bdbee2374fbfee6"}, - {file = "yarl-1.12.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a3e2aff8b822ab0e0bdbed9f50494b3a35629c4b9488ae391659973a37a9f53f"}, - {file = "yarl-1.12.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:22dda2799c8d39041d731e02bf7690f0ef34f1691d9ac9dfcb98dd1e94c8b058"}, - {file = "yarl-1.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:18c2a7757561f05439c243f517dbbb174cadfae3a72dee4ae7c693f5b336570f"}, - {file = "yarl-1.12.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:835010cc17d0020e7931d39e487d72c8e01c98e669b6896a8b8c9aa8ca69a949"}, - {file = "yarl-1.12.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e2254fe137c4a360b0a13173a56444f756252c9283ba4d267ca8e9081cd140ea"}, - {file = "yarl-1.12.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f6a071d2c3d39b4104f94fc08ab349e9b19b951ad4b8e3b6d7ea92d6ef7ccaf8"}, - {file = "yarl-1.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73a183042ae0918c82ce2df38c3db2409b0eeae88e3afdfc80fb67471a95b33b"}, - {file = "yarl-1.12.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:326b8a079a9afcac0575971e56dabdf7abb2ea89a893e6949b77adfeb058b50e"}, - {file = "yarl-1.12.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:126309c0f52a2219b3d1048aca00766429a1346596b186d51d9fa5d2070b7b13"}, - {file = "yarl-1.12.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:ba1c779b45a399cc25f511c681016626f69e51e45b9d350d7581998722825af9"}, - {file = "yarl-1.12.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:af1107299cef049ad00a93df4809517be432283a0847bcae48343ebe5ea340dc"}, - {file = "yarl-1.12.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:20d817c0893191b2ab0ba30b45b77761e8dfec30a029b7c7063055ca71157f84"}, - {file = "yarl-1.12.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d4f818f6371970d6a5d1e42878389bbfb69dcde631e4bbac5ec1cb11158565ca"}, - {file = "yarl-1.12.1-cp312-cp312-win32.whl", hash = "sha256:0ac33d22b2604b020569a82d5f8a03ba637ba42cc1adf31f616af70baf81710b"}, - {file = "yarl-1.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:fd24996e12e1ba7c397c44be75ca299da14cde34d74bc5508cce233676cc68d0"}, - {file = "yarl-1.12.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:dea360778e0668a7ad25d7727d03364de8a45bfd5d808f81253516b9f2217765"}, - {file = "yarl-1.12.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1f50a37aeeb5179d293465e522fd686080928c4d89e0ff215e1f963405ec4def"}, - {file = "yarl-1.12.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0274b1b7a9c9c32b7bf250583e673ff99fb9fccb389215841e2652d9982de740"}, - {file = "yarl-1.12.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4f3ab9eb8ab2d585ece959c48d234f7b39ac0ca1954a34d8b8e58a52064bdb3"}, - {file = "yarl-1.12.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8d31dd0245d88cf7239e96e8f2a99f815b06e458a5854150f8e6f0e61618d41b"}, - {file = "yarl-1.12.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a96198d5d26f40557d986c1253bfe0e02d18c9d9b93cf389daf1a3c9f7c755fa"}, - {file = "yarl-1.12.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddae504cfb556fe220efae65e35be63cd11e3c314b202723fc2119ce19f0ca2e"}, - {file = "yarl-1.12.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bce00f3b1f7f644faae89677ca68645ed5365f1c7f874fdd5ebf730a69640d38"}, - {file = "yarl-1.12.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eee5ff934b0c9f4537ff9596169d56cab1890918004791a7a06b879b3ba2a7ef"}, - {file = "yarl-1.12.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4ea99e64b2ad2635e0f0597b63f5ea6c374791ff2fa81cdd4bad8ed9f047f56f"}, - {file = "yarl-1.12.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:5c667b383529520b8dd6bd496fc318678320cb2a6062fdfe6d3618da6b8790f6"}, - {file = "yarl-1.12.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d920401941cb898ef089422e889759dd403309eb370d0e54f1bdf6ca07fef603"}, - {file = "yarl-1.12.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:501a1576716032cc6d48c7c47bcdc42d682273415a8f2908e7e72cb4625801f3"}, - {file = "yarl-1.12.1-cp313-cp313-win32.whl", hash = "sha256:24416bb5e221e29ddf8aac5b97e94e635ca2c5be44a1617ad6fe32556df44294"}, - {file = "yarl-1.12.1-cp313-cp313-win_amd64.whl", hash = "sha256:71af3766bb46738d12cc288d9b8de7ef6f79c31fd62757e2b8a505fe3680b27f"}, - {file = "yarl-1.12.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c924deab8105f86980983eced740433fb7554a7f66db73991affa4eda99d5402"}, - {file = "yarl-1.12.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5fb475a4cdde582c9528bb412b98f899680492daaba318231e96f1a0a1bb0d53"}, - {file = "yarl-1.12.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:36ee0115b9edca904153a66bb74a9ff1ce38caff015de94eadfb9ba8e6ecd317"}, - {file = "yarl-1.12.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2631c9d7386bd2d4ce24ecc6ebf9ae90b3efd713d588d90504eaa77fec4dba01"}, - {file = "yarl-1.12.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2376d8cf506dffd0e5f2391025ae8675b09711016656590cb03b55894161fcfa"}, - {file = "yarl-1.12.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:24197ba3114cc85ddd4091e19b2ddc62650f2e4a899e51b074dfd52d56cf8c72"}, - {file = "yarl-1.12.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfdf419bf5d3644f94cd7052954fc233522f5a1b371fc0b00219ebd9c14d5798"}, - {file = "yarl-1.12.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8112f640a4f7e7bf59f7cabf0d47a29b8977528c521d73a64d5cc9e99e48a174"}, - {file = "yarl-1.12.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:607d12f0901f6419a8adceb139847c42c83864b85371f58270e42753f9780fa6"}, - {file = "yarl-1.12.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:664380c7ed524a280b6a2d5d9126389c3e96cd6e88986cdb42ca72baa27421d6"}, - {file = "yarl-1.12.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:0d0a5e87bc48d76dfcfc16295201e9812d5f33d55b4a0b7cad1025b92bf8b91b"}, - {file = "yarl-1.12.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:eff6bac402719c14e17efe845d6b98593c56c843aca6def72080fbede755fd1f"}, - {file = "yarl-1.12.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:22839d1d1eab9e4b427828a88a22beb86f67c14d8ff81175505f1cc8493f3500"}, - {file = "yarl-1.12.1-cp38-cp38-win32.whl", hash = "sha256:717f185086bb9d817d4537dd18d5df5d657598cd00e6fc22e4d54d84de266c1d"}, - {file = "yarl-1.12.1-cp38-cp38-win_amd64.whl", hash = "sha256:71978ba778948760cff528235c951ea0ef7a4f9c84ac5a49975f8540f76c3f73"}, - {file = "yarl-1.12.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:30ffc046ebddccb3c4cac72c1a3e1bc343492336f3ca86d24672e90ccc5e788a"}, - {file = "yarl-1.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f10954b233d4df5cc3137ffa5ced97f8894152df817e5d149bf05a0ef2ab8134"}, - {file = "yarl-1.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2e912b282466444023610e4498e3795c10e7cfd641744524876239fcf01d538d"}, - {file = "yarl-1.12.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6af871f70cfd5b528bd322c65793b5fd5659858cdfaa35fbe563fb99b667ed1f"}, - {file = "yarl-1.12.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c3e4e1f7b08d1ec6b685ccd3e2d762219c550164fbf524498532e39f9413436e"}, - {file = "yarl-1.12.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9a7ee79183f0b17dcede8b6723e7da2ded529cf159a878214be9a5d3098f5b1e"}, - {file = "yarl-1.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96c8ff1e1dd680e38af0887927cab407a4e51d84a5f02ae3d6eb87233036c763"}, - {file = "yarl-1.12.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e9905fc2dc1319e4c39837b906a024cf71b1261cc66b0cd89678f779c0c61f5"}, - {file = "yarl-1.12.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:01549468858b87d36f967c97d02e6e54106f444aeb947ed76f8f71f85ed07cec"}, - {file = "yarl-1.12.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:96b34830bd6825ca0220bf005ea99ac83eb9ce51301ddb882dcf613ae6cd95fb"}, - {file = "yarl-1.12.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:2aee7594d2c2221c717a8e394bbed4740029df4c0211ceb0f04815686e99c795"}, - {file = "yarl-1.12.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:15871130439ad10abb25a4631120d60391aa762b85fcab971411e556247210a0"}, - {file = "yarl-1.12.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:838dde2cb570cfbb4cab8a876a0974e8b90973ea40b3ac27a79b8a74c8a2db15"}, - {file = "yarl-1.12.1-cp39-cp39-win32.whl", hash = "sha256:eacbcf30efaca7dc5cb264228ffecdb95fdb1e715b1ec937c0ce6b734161e0c8"}, - {file = "yarl-1.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:76a59d1b63de859398bc7764c860a769499511463c1232155061fe0147f13e01"}, - {file = "yarl-1.12.1-py3-none-any.whl", hash = "sha256:dc3192a81ecd5ff954cecd690327badd5a84d00b877e1573f7c9097ce13e5bfb"}, - {file = "yarl-1.12.1.tar.gz", hash = "sha256:5b860055199aec8d6fe4dcee3c5196ce506ca198a50aab0059ffd26e8e815828"}, + {file = "yarl-1.13.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:82e692fb325013a18a5b73a4fed5a1edaa7c58144dc67ad9ef3d604eccd451ad"}, + {file = "yarl-1.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df4e82e68f43a07735ae70a2d84c0353e58e20add20ec0af611f32cd5ba43fb4"}, + {file = "yarl-1.13.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ec9dd328016d8d25702a24ee274932aebf6be9787ed1c28d021945d264235b3c"}, + {file = "yarl-1.13.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5820bd4178e6a639b3ef1db8b18500a82ceab6d8b89309e121a6859f56585b05"}, + {file = "yarl-1.13.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86c438ce920e089c8c2388c7dcc8ab30dfe13c09b8af3d306bcabb46a053d6f7"}, + {file = "yarl-1.13.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3de86547c820e4f4da4606d1c8ab5765dd633189791f15247706a2eeabc783ae"}, + {file = "yarl-1.13.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ca53632007c69ddcdefe1e8cbc3920dd88825e618153795b57e6ebcc92e752a"}, + {file = "yarl-1.13.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d4ee1d240b84e2f213565f0ec08caef27a0e657d4c42859809155cf3a29d1735"}, + {file = "yarl-1.13.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c49f3e379177f4477f929097f7ed4b0622a586b0aa40c07ac8c0f8e40659a1ac"}, + {file = "yarl-1.13.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5c5e32fef09ce101fe14acd0f498232b5710effe13abac14cd95de9c274e689e"}, + {file = "yarl-1.13.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ab9524e45ee809a083338a749af3b53cc7efec458c3ad084361c1dbf7aaf82a2"}, + {file = "yarl-1.13.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:b1481c048fe787f65e34cb06f7d6824376d5d99f1231eae4778bbe5c3831076d"}, + {file = "yarl-1.13.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:31497aefd68036d8e31bfbacef915826ca2e741dbb97a8d6c7eac66deda3b606"}, + {file = "yarl-1.13.1-cp310-cp310-win32.whl", hash = "sha256:1fa56f34b2236f5192cb5fceba7bbb09620e5337e0b6dfe2ea0ddbd19dd5b154"}, + {file = "yarl-1.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:1bbb418f46c7f7355084833051701b2301092e4611d9e392360c3ba2e3e69f88"}, + {file = "yarl-1.13.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:216a6785f296169ed52cd7dcdc2612f82c20f8c9634bf7446327f50398732a51"}, + {file = "yarl-1.13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:40c6e73c03a6befb85b72da213638b8aaa80fe4136ec8691560cf98b11b8ae6e"}, + {file = "yarl-1.13.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2430cf996113abe5aee387d39ee19529327205cda975d2b82c0e7e96e5fdabdc"}, + {file = "yarl-1.13.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fb4134cc6e005b99fa29dbc86f1ea0a298440ab6b07c6b3ee09232a3b48f495"}, + {file = "yarl-1.13.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:309c104ecf67626c033845b860d31594a41343766a46fa58c3309c538a1e22b2"}, + {file = "yarl-1.13.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f90575e9fe3aae2c1e686393a9689c724cd00045275407f71771ae5d690ccf38"}, + {file = "yarl-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d2e1626be8712333a9f71270366f4a132f476ffbe83b689dd6dc0d114796c74"}, + {file = "yarl-1.13.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b66c87da3c6da8f8e8b648878903ca54589038a0b1e08dde2c86d9cd92d4ac9"}, + {file = "yarl-1.13.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cf1ad338620249f8dd6d4b6a91a69d1f265387df3697ad5dc996305cf6c26fb2"}, + {file = "yarl-1.13.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:9915300fe5a0aa663c01363db37e4ae8e7c15996ebe2c6cce995e7033ff6457f"}, + {file = "yarl-1.13.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:703b0f584fcf157ef87816a3c0ff868e8c9f3c370009a8b23b56255885528f10"}, + {file = "yarl-1.13.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:1d8e3ca29f643dd121f264a7c89f329f0fcb2e4461833f02de6e39fef80f89da"}, + {file = "yarl-1.13.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7055bbade838d68af73aea13f8c86588e4bcc00c2235b4b6d6edb0dbd174e246"}, + {file = "yarl-1.13.1-cp311-cp311-win32.whl", hash = "sha256:a3442c31c11088e462d44a644a454d48110f0588de830921fd201060ff19612a"}, + {file = "yarl-1.13.1-cp311-cp311-win_amd64.whl", hash = "sha256:81bad32c8f8b5897c909bf3468bf601f1b855d12f53b6af0271963ee67fff0d2"}, + {file = "yarl-1.13.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f452cc1436151387d3d50533523291d5f77c6bc7913c116eb985304abdbd9ec9"}, + {file = "yarl-1.13.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9cec42a20eae8bebf81e9ce23fb0d0c729fc54cf00643eb251ce7c0215ad49fe"}, + {file = "yarl-1.13.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d959fe96e5c2712c1876d69af0507d98f0b0e8d81bee14cfb3f6737470205419"}, + {file = "yarl-1.13.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8c837ab90c455f3ea8e68bee143472ee87828bff19ba19776e16ff961425b57"}, + {file = "yarl-1.13.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:94a993f976cdcb2dc1b855d8b89b792893220db8862d1a619efa7451817c836b"}, + {file = "yarl-1.13.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b2442a415a5f4c55ced0fade7b72123210d579f7d950e0b5527fc598866e62c"}, + {file = "yarl-1.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3fdbf0418489525231723cdb6c79e7738b3cbacbaed2b750cb033e4ea208f220"}, + {file = "yarl-1.13.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6b7f6e699304717fdc265a7e1922561b02a93ceffdaefdc877acaf9b9f3080b8"}, + {file = "yarl-1.13.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bcd5bf4132e6a8d3eb54b8d56885f3d3a38ecd7ecae8426ecf7d9673b270de43"}, + {file = "yarl-1.13.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:2a93a4557f7fc74a38ca5a404abb443a242217b91cd0c4840b1ebedaad8919d4"}, + {file = "yarl-1.13.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:22b739f99c7e4787922903f27a892744189482125cc7b95b747f04dd5c83aa9f"}, + {file = "yarl-1.13.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:2db874dd1d22d4c2c657807562411ffdfabec38ce4c5ce48b4c654be552759dc"}, + {file = "yarl-1.13.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4feaaa4742517eaceafcbe74595ed335a494c84634d33961214b278126ec1485"}, + {file = "yarl-1.13.1-cp312-cp312-win32.whl", hash = "sha256:bbf9c2a589be7414ac4a534d54e4517d03f1cbb142c0041191b729c2fa23f320"}, + {file = "yarl-1.13.1-cp312-cp312-win_amd64.whl", hash = "sha256:d07b52c8c450f9366c34aa205754355e933922c79135125541daae6cbf31c799"}, + {file = "yarl-1.13.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:95c6737f28069153c399d875317f226bbdea939fd48a6349a3b03da6829fb550"}, + {file = "yarl-1.13.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:cd66152561632ed4b2a9192e7f8e5a1d41e28f58120b4761622e0355f0fe034c"}, + {file = "yarl-1.13.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6a2acde25be0cf9be23a8f6cbd31734536a264723fca860af3ae5e89d771cd71"}, + {file = "yarl-1.13.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a18595e6a2ee0826bf7dfdee823b6ab55c9b70e8f80f8b77c37e694288f5de1"}, + {file = "yarl-1.13.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a31d21089894942f7d9a8df166b495101b7258ff11ae0abec58e32daf8088813"}, + {file = "yarl-1.13.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:45f209fb4bbfe8630e3d2e2052535ca5b53d4ce2d2026bed4d0637b0416830da"}, + {file = "yarl-1.13.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f722f30366474a99745533cc4015b1781ee54b08de73260b2bbe13316079851"}, + {file = "yarl-1.13.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3bf60444269345d712838bb11cc4eadaf51ff1a364ae39ce87a5ca8ad3bb2c8"}, + {file = "yarl-1.13.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:942c80a832a79c3707cca46bd12ab8aa58fddb34b1626d42b05aa8f0bcefc206"}, + {file = "yarl-1.13.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:44b07e1690f010c3c01d353b5790ec73b2f59b4eae5b0000593199766b3f7a5c"}, + {file = "yarl-1.13.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:396e59b8de7e4d59ff5507fb4322d2329865b909f29a7ed7ca37e63ade7f835c"}, + {file = "yarl-1.13.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:3bb83a0f12701c0b91112a11148b5217617982e1e466069d0555be9b372f2734"}, + {file = "yarl-1.13.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c92b89bffc660f1274779cb6fbb290ec1f90d6dfe14492523a0667f10170de26"}, + {file = "yarl-1.13.1-cp313-cp313-win32.whl", hash = "sha256:269c201bbc01d2cbba5b86997a1e0f73ba5e2f471cfa6e226bcaa7fd664b598d"}, + {file = "yarl-1.13.1-cp313-cp313-win_amd64.whl", hash = "sha256:1d0828e17fa701b557c6eaed5edbd9098eb62d8838344486248489ff233998b8"}, + {file = "yarl-1.13.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:8be8cdfe20787e6a5fcbd010f8066227e2bb9058331a4eccddec6c0db2bb85b2"}, + {file = "yarl-1.13.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:08d7148ff11cb8e886d86dadbfd2e466a76d5dd38c7ea8ebd9b0e07946e76e4b"}, + {file = "yarl-1.13.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4afdf84610ca44dcffe8b6c22c68f309aff96be55f5ea2fa31c0c225d6b83e23"}, + {file = "yarl-1.13.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0d12fe78dcf60efa205e9a63f395b5d343e801cf31e5e1dda0d2c1fb618073d"}, + {file = "yarl-1.13.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:298c1eecfd3257aa16c0cb0bdffb54411e3e831351cd69e6b0739be16b1bdaa8"}, + {file = "yarl-1.13.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c14c16831b565707149c742d87a6203eb5597f4329278446d5c0ae7a1a43928e"}, + {file = "yarl-1.13.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a9bacedbb99685a75ad033fd4de37129449e69808e50e08034034c0bf063f99"}, + {file = "yarl-1.13.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:658e8449b84b92a4373f99305de042b6bd0d19bf2080c093881e0516557474a5"}, + {file = "yarl-1.13.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:373f16f38721c680316a6a00ae21cc178e3a8ef43c0227f88356a24c5193abd6"}, + {file = "yarl-1.13.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:45d23c4668d4925688e2ea251b53f36a498e9ea860913ce43b52d9605d3d8177"}, + {file = "yarl-1.13.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f7917697bcaa3bc3e83db91aa3a0e448bf5cde43c84b7fc1ae2427d2417c0224"}, + {file = "yarl-1.13.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:5989a38ba1281e43e4663931a53fbf356f78a0325251fd6af09dd03b1d676a09"}, + {file = "yarl-1.13.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:11b3ca8b42a024513adce810385fcabdd682772411d95bbbda3b9ed1a4257644"}, + {file = "yarl-1.13.1-cp38-cp38-win32.whl", hash = "sha256:dcaef817e13eafa547cdfdc5284fe77970b891f731266545aae08d6cce52161e"}, + {file = "yarl-1.13.1-cp38-cp38-win_amd64.whl", hash = "sha256:7addd26594e588503bdef03908fc207206adac5bd90b6d4bc3e3cf33a829f57d"}, + {file = "yarl-1.13.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a0ae6637b173d0c40b9c1462e12a7a2000a71a3258fa88756a34c7d38926911c"}, + {file = "yarl-1.13.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:576365c9f7469e1f6124d67b001639b77113cfd05e85ce0310f5f318fd02fe85"}, + {file = "yarl-1.13.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:78f271722423b2d4851cf1f4fa1a1c4833a128d020062721ba35e1a87154a049"}, + {file = "yarl-1.13.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d74f3c335cfe9c21ea78988e67f18eb9822f5d31f88b41aec3a1ec5ecd32da5"}, + {file = "yarl-1.13.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1891d69a6ba16e89473909665cd355d783a8a31bc84720902c5911dbb6373465"}, + {file = "yarl-1.13.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fb382fd7b4377363cc9f13ba7c819c3c78ed97c36a82f16f3f92f108c787cbbf"}, + {file = "yarl-1.13.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c8854b9f80693d20cec797d8e48a848c2fb273eb6f2587b57763ccba3f3bd4b"}, + {file = "yarl-1.13.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bbf2c3f04ff50f16404ce70f822cdc59760e5e2d7965905f0e700270feb2bbfc"}, + {file = "yarl-1.13.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fb9f59f3848edf186a76446eb8bcf4c900fe147cb756fbbd730ef43b2e67c6a7"}, + {file = "yarl-1.13.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ef9b85fa1bc91c4db24407e7c4da93a5822a73dd4513d67b454ca7064e8dc6a3"}, + {file = "yarl-1.13.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:098b870c18f1341786f290b4d699504e18f1cd050ed179af8123fd8232513424"}, + {file = "yarl-1.13.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:8c723c91c94a3bc8033dd2696a0f53e5d5f8496186013167bddc3fb5d9df46a3"}, + {file = "yarl-1.13.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:44a4c40a6f84e4d5955b63462a0e2a988f8982fba245cf885ce3be7618f6aa7d"}, + {file = "yarl-1.13.1-cp39-cp39-win32.whl", hash = "sha256:84bbcdcf393139f0abc9f642bf03f00cac31010f3034faa03224a9ef0bb74323"}, + {file = "yarl-1.13.1-cp39-cp39-win_amd64.whl", hash = "sha256:fc2931ac9ce9c61c9968989ec831d3a5e6fcaaff9474e7cfa8de80b7aff5a093"}, + {file = "yarl-1.13.1-py3-none-any.whl", hash = "sha256:6a5185ad722ab4dd52d5fb1f30dcc73282eb1ed494906a92d1a228d3f89607b0"}, + {file = "yarl-1.13.1.tar.gz", hash = "sha256:ec8cfe2295f3e5e44c51f57272afbd69414ae629ec7c6b27f5a410efc78b70a0"}, ] [package.dependencies] @@ -4580,10 +5603,11 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", type = ["pytest-mypy"] [extras] -core = ["aiosqlite", "apscheduler", "asyncpg", "bcrypt", "beautifulsoup4", "boto3", "deepdiff", "fire", "fsspec", "future", "gunicorn", "hatchet-sdk", "litellm", "markdown", "neo4j", "numpy", "ollama", "openpyxl", "passlib", "psutil", "pydantic", "pyjwt", "pypdf", "python-docx", "python-multipart", "python-pptx", "pyyaml", "redis", "sqlalchemy", "supabase", "tokenizers", "unstructured-client", "uvicorn", "vecs"] -core-ingest-movies = [] +core = ["aiosqlite", "apscheduler", "asyncpg", "bcrypt", "boto3", "deepdiff", "fire", "future", "graspologic", "gunicorn", "hatchet-sdk", "litellm", "networkx", "ollama", "passlib", "psutil", "pydantic", "pyjwt", "python-multipart", "pyyaml", "sqlalchemy", "supabase", "tokenizers", "unstructured-client", "uvicorn", "vecs"] +hatchet = ["hatchet-sdk"] +ingestion-bundle = ["beautifulsoup4", "markdown", "numpy", "openpyxl", "pypdf", "python-docx", "python-pptx"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "e805b0ac2d8342df8a94971e97c00e75cb6db79a466fac9adf12ea8b0e87dd80" +content-hash = "5c69411b316396adbf4267e0a9f531732d1758e58e8c37c27ca1581f3bcb4c00" \ No newline at end of file diff --git a/py/pyproject.toml b/py/pyproject.toml index a30e5ec37..d073ec83b 100644 --- a/py/pyproject.toml +++ b/py/pyproject.toml @@ -5,12 +5,12 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "r2r" readme = "README.md" -version = "3.1.53" +version = "3.2.00" description = "SciPhi R2R" authors = ["Owen Colegrove "] license = "MIT" -include = ["r2r.toml", "compose.yaml", "compose.neo4j.yaml", "compose.ollama.yaml", "compose.postgres.yaml", "compose.hatchet.yaml", "pyproject.toml"] +include = ["r2r.toml", "compose.yaml", "compose.full.yaml", "pyproject.toml"] packages = [ { include = "r2r", from = "." }, { include = "core", from = "." }, @@ -29,58 +29,93 @@ fastapi = "^0.114.0" httpx = "^0.27.0" nest-asyncio = "^1.6.0" openai = "^1.11.1" +posthog = "^3.5.0" python-dotenv = "^1.0.1" requests = "^2.31.0" toml = "^0.10.2" types-requests = "^2.31.0" -posthog = "^3.5.0" +unstructured-client = "^0.25.5" -# Core dependencies (optional) -future = { version = "^1.0.0", optional = true } -pydantic = { extras = ["email"], version = "^2.8.2", optional = true } -fire = { version = "^0.5.0", optional = true } -gunicorn = { version = "^21.2.0", optional = true } -uvicorn = { version = "^0.27.0.post1", optional = true } +# Shared dependencies (optional) aiosqlite = { version = "^0.20.0", optional = true } -python-multipart = { version = "^0.0.9", optional = true } +apscheduler = { version = "^3.10.4", optional = true } asyncpg = { version = "^0.29.0", optional = true } -redis = { version = "^5.0.4", optional = true } -beautifulsoup4 = { version = "^4.12.3", optional = true } -openpyxl = { version = "^3.1.2", optional = true } -markdown = { version = "^3.6", optional = true } -pypdf = { version = "^4.2.0", optional = true } -python-pptx = { version = "^1.0.1", optional = true } -python-docx = { version = "^1.1.0", optional = true } -vecs = { version = "^0.4.0", optional = true } -litellm = { version = "^1.42.3", optional = true } +bcrypt = { version = "^4.1.3", optional = true } +boto3 = { version = "^1.35.17", optional = true } # for AWS bedrock support +deepdiff = { version = "^7.0.1", optional = true } +fire = { version = "^0.5.0", optional = true } fsspec = { version = "^2024.6.0", optional = true } -sqlalchemy = { version = "^2.0.30", optional = true } +future = { version = "^1.0.0", optional = true } +graspologic = { version = "^3.4.1", optional = true } +gunicorn = { version = "^21.2.0", optional = true } +litellm = { version = "^1.42.3", optional = true } +networkx = { version = "^3.3", optional = true } ollama = { version = "^0.3.1", optional = true } -neo4j = { version = "^5.21.0", optional = true } passlib = { version = "^1.7.4", optional = true } -bcrypt = { version = "^4.1.3", optional = true } +psutil = { version = "^6.0.0", optional = true } +python-multipart = { version = "^0.0.9", optional = true } +pydantic = { extras = ["email"], version = "^2.8.2", optional = true } pyjwt = { version = "^2.8.0", optional = true } pyyaml = { version = "^6.0.1", optional = true } -psutil = { version = "^6.0.0", optional = true } -deepdiff = { version = "^7.0.1", optional = true } +sqlalchemy = { version = "^2.0.30", optional = true } supabase = { version = "^2.7.4", optional = true } tokenizers = { version = "0.19", optional = true } -unstructured-client = { version = "^0.25.5", optional = true } +uvicorn = { version = "^0.27.0.post1", optional = true } +vecs = { version = "^0.4.0", optional = true } + +# Hatchet hatchet-sdk = { version = "^0.36.17", optional = true } + +# R2R Ingestion numpy = { version = ">=1.22.4,<1.29.0", optional = true } -apscheduler = { version = "^3.10.4", optional = true } -boto3 = { version = "^1.35.17", optional = true } # for AWS bedrock support +markdown = { version = "^3.6", optional = true } +openpyxl = { version = "^3.1.2", optional = true } +pypdf = { version = "^4.2.0", optional = true } +python-pptx = { version = "^1.0.1", optional = true } +python-docx = { version = "^1.1.0", optional = true } +beautifulsoup4 = { version = "^4.12.3", optional = true } +bs4 = "^0.0.2" + [tool.poetry.extras] core = [ - "future", "pydantic", "fire", "gunicorn", "uvicorn", "aiosqlite", - "asyncpg", "redis", "beautifulsoup4", "openpyxl", "markdown", "pypdf", - "python-pptx", "python-docx", "vecs", "litellm", "python-multipart", - "fsspec", "sqlalchemy", "ollama", "neo4j", "passlib", "bcrypt", - "pyjwt", "pyyaml", "psutil", "deepdiff", "tokenizers", "supabase", - "unstructured-client", "hatchet-sdk", "numpy", "apscheduler", "boto3" + "aiosqlite", + "apscheduler", + "asyncpg", + "bcrypt", + "boto3", + "deepdiff", + "fire", + "future", + "graspologic", + "gunicorn", + "litellm", + "networkx", + "ollama", + "passlib", + "psutil", + "pydantic", + "pyjwt", + "pyyaml", + "sqlalchemy", + "supabase", + "tokenizers", + "unstructured-client", + "uvicorn", + "vecs", + "python-multipart", + "hatchet-sdk", +] +hatchet = ["hatchet-sdk"] +ingestion-bundle = [ + "numpy", + "markdown", + "openpyxl", + "pypdf", + "python-pptx", + "python-docx", + "beautifulsoup4", ] -core-ingest-movies = ["opencv-python", "moviepy"] [tool.poetry.group.dev.dependencies] black = "^24.3.0" @@ -139,4 +174,4 @@ testpaths = [ filterwarnings = [ "ignore::DeprecationWarning", "ignore::pytest.PytestUnraisableExceptionWarning", -] +] \ No newline at end of file diff --git a/py/r2r.toml b/py/r2r.toml index 6b2d857fb..8867a1b32 100644 --- a/py/r2r.toml +++ b/py/r2r.toml @@ -11,14 +11,6 @@ require_email_verification = false default_admin_email = "admin@example.com" default_admin_password = "change_me_immediately" -[chunking] -provider = "unstructured_local" -strategy = "auto" -chunking_strategy = "by_title" -new_after_n_chars = 512 -max_characters = 1_024 -combine_under_n_chars = 128 -overlap = 20 [completion] provider = "litellm" @@ -37,6 +29,8 @@ provider = "bcrypt" [database] provider = "postgres" +default_collection_name = "Default" +default_collection_description = "Your default collection." [embedding] provider = "litellm" @@ -50,28 +44,35 @@ concurrent_request_limit = 256 [file] provider = "postgres" +[ingestion] +provider = "r2r" +chunking_strategy = "recursive" +chunk_size = 1_024 +chunk_overlap = 512 +excluded_parsers = ["mp4"] + [kg] -provider = "neo4j" +provider = "postgres" batch_size = 256 [kg.kg_creation_settings] - kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" + kg_triples_extraction_prompt = "graphrag_triples_extraction_few_shot" entity_types = [] # if empty, all entities are extracted relation_types = [] # if empty, all relations are extracted fragment_merge_count = 4 # number of fragments to merge into a single extraction max_knowledge_triples = 100 - generation_config = { model = "gpt-4o-mini" } # and other params, model used for triplet extraction + max_description_input_length = 1024 + generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for triplet extraction [kg.kg_enrichment_settings] community_reports_prompt = "graphrag_community_reports_prompt" - max_description_input_length = 65536 - generation_config = { model = "gpt-4o-mini" } # and other params, model used for node description and graph clustering - leiden_params = { max_levels = 10 } # more params here: https://neo4j.com/docs/graph-data-science/current/algorithms/leiden/ + generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for node description and graph clustering + leiden_params = {} [kg.kg_search_settings] map_system_prompt = "graphrag_map_system_prompt" reduce_system_prompt = "graphrag_reduce_system_prompt" - generation_config = { model = "gpt-4o-mini" } + generation_config = { model = "openai/gpt-4o-mini" } [logging] provider = "local" @@ -79,11 +80,7 @@ log_table = "logs" log_info_table = "log_info" [orchestration] -provider = "hatchet" - -[parsing] -provider = "unstructured_local" -excluded_parsers = ["mp4"] +provider = "simple" [prompt] provider = "r2r" diff --git a/py/r2r/__init__.py b/py/r2r/__init__.py index ab98a96ed..2ea7b41ec 100644 --- a/py/r2r/__init__.py +++ b/py/r2r/__init__.py @@ -1,3 +1,4 @@ +import logging from pathlib import Path import toml @@ -5,6 +6,8 @@ from sdk import * from shared import * +logger = logging.getLogger(__name__) + pyproject_path = Path(__file__).parent.parent / "pyproject.toml" pyproject_data = toml.load(pyproject_path) __version__ = pyproject_data["tool"]["poetry"]["version"] @@ -22,9 +25,10 @@ from core import * __all__ += core.__all__ -except ImportError: - # Core dependencies not installed - pass +except ImportError as e: + logger.error( + f"ImportError: `{e}`, likely due to core dependencies not being installed." + ) # Add a function to get the version diff --git a/py/sdk/client.py b/py/sdk/client.py index 7f8093b09..70cff022a 100644 --- a/py/sdk/client.py +++ b/py/sdk/client.py @@ -9,9 +9,9 @@ from .auth import AuthMethods from .ingestion import IngestionMethods +from .kg import KGMethods from .management import ManagementMethods from .models import R2RException -from .restructure import RestructureMethods from .retrieval import RetrievalMethods from .server import ServerMethods @@ -91,7 +91,7 @@ def __init__( self._auth = AuthMethods self._ingestion = IngestionMethods self._management = ManagementMethods - self._restructure = RestructureMethods + self._kg = KGMethods self._retrieval = RetrievalMethods self._server = ServerMethods @@ -101,7 +101,7 @@ def __init__( self._auth, self._ingestion, self._management, - self._restructure, + self._kg, self._retrieval, self._server, ]: diff --git a/py/sdk/ingestion.py b/py/sdk/ingestion.py index ff53be783..26ab9f59d 100644 --- a/py/sdk/ingestion.py +++ b/py/sdk/ingestion.py @@ -4,18 +4,15 @@ from typing import Optional, Union from uuid import UUID -from .models import ChunkingConfig - class IngestionMethods: - @staticmethod async def ingest_files( client, file_paths: list[str], document_ids: Optional[list[Union[str, UUID]]] = None, metadatas: Optional[list[dict]] = None, - chunking_config: Optional[Union[dict, ChunkingConfig]] = None, + ingestion_config: Optional[dict] = None, ) -> dict: """ Ingest files into your R2R deployment @@ -24,7 +21,7 @@ async def ingest_files( file_paths (List[str]): List of file paths to ingest. document_ids (Optional[List[str]]): List of document IDs. metadatas (Optional[List[dict]]): List of metadata dictionaries for each file. - chunking_config (Optional[Union[dict, ChunkingConfig]]): Custom chunking configuration. + ingestion_config (Optional[Union[dict]]): Custom chunking configuration. Returns: dict: Ingestion results containing processed, failed, and skipped documents. @@ -37,11 +34,6 @@ async def ingest_files( raise ValueError( "Number of metadatas must match number of document IDs." ) - # if chunking_config is not None and not isinstance( - # chunking_config, ChunkingConfig - # ): - # # check if the provided dict maps to a ChunkingConfig - # ChunkingConfig(**chunking_config) all_file_paths: list[str] = [] for path in file_paths: @@ -74,45 +66,20 @@ async def ingest_files( if metadatas: data["metadatas"] = json.dumps(metadatas) - if chunking_config: - data["chunking_config"] = json.dumps( - chunking_config.model_dump() # type: ignore - if isinstance(chunking_config, ChunkingConfig) - else chunking_config - ) + if ingestion_config: + data["ingestion_config"] = json.dumps(ingestion_config) - print("data = ", data) return await client._make_request( "POST", "ingest_files", data=data, files=files_tuples ) - @staticmethod - async def retry_ingest_files( - client, - document_ids: list[Union[str, UUID]], - ) -> dict: - """ - Retry ingestion for failed documents. - - Args: - document_ids (List[Union[str, UUID]]): List of document IDs to retry. - - Returns: - dict: Retry results containing processed, failed, and skipped documents. - """ - return await client._make_request( - "POST", - "retry_ingest_files", - data={"document_ids": [str(doc_id) for doc_id in document_ids]}, - ) - @staticmethod async def update_files( client, file_paths: list[str], document_ids: Optional[list[Union[str, UUID]]] = None, metadatas: Optional[list[dict]] = None, - chunking_config: Optional[Union[dict, ChunkingConfig]] = None, + ingestion_config: Optional[dict] = None, ) -> dict: """ Update existing files in your R2R deployment. @@ -121,7 +88,7 @@ async def update_files( file_paths (List[str]): List of file paths to update. document_ids (List[str]): List of document IDs to update. metadatas (Optional[List[dict]]): List of updated metadata dictionaries for each file. - chunking_config (Optional[Union[dict, ChunkingConfig]]): Custom chunking configuration. + ingestion_config (Optional[Union[dict]]): Custom chunking configuration. Returns: dict: Update results containing processed, failed, and skipped documents. @@ -155,12 +122,8 @@ async def update_files( ) if metadatas: data["metadatas"] = json.dumps(metadatas) - if chunking_config: - data["chunking_config"] = ( - chunking_config.model_dump() # type: ignore - if isinstance(chunking_config, ChunkingConfig) - else chunking_config - ) + if ingestion_config: + data["ingestion_config"] = json.dumps(ingestion_config) return await client._make_request( "POST", "update_files", data=data, files=files diff --git a/py/sdk/kg.py b/py/sdk/kg.py new file mode 100644 index 000000000..33a0f70e9 --- /dev/null +++ b/py/sdk/kg.py @@ -0,0 +1,62 @@ +import json +from typing import Optional, Union + +from .models import ( + KGCreationResponse, + KGCreationSettings, + KGEnrichmentResponse, + KGEnrichmentSettings, +) + + +class KGMethods: + + @staticmethod + async def create_graph( + client, + collection_id: str, + kg_creation_settings: Optional[Union[dict, KGCreationSettings]] = None, + ) -> KGCreationResponse: + """ + Create a graph from the given settings. + """ + if isinstance(kg_creation_settings, KGCreationSettings): + kg_creation_settings = kg_creation_settings.model_dump() + elif kg_creation_settings is None or kg_creation_settings == "{}": + kg_creation_settings = {} + + data = { + "collection_id": collection_id, + "kg_creation_settings": json.dumps(kg_creation_settings), + } + + return await client._make_request("POST", "create_graph", json=data) + + @staticmethod + async def enrich_graph( + client, + collection_id: str, + kg_enrichment_settings: Optional[ + Union[KGEnrichmentSettings, dict] + ] = None, + ) -> KGEnrichmentResponse: + """ + Perform graph enrichment over the entire graph. + + Args: + collection_id (str): The ID of the collection to enrich. + kg_enrichment_settings (Optional[Union[dict, KGEnrichmentSettings]]): Settings for the graph enrichment process. + Returns: + KGEnrichmentResponse: Results of the graph enrichment process. + """ + if isinstance(kg_enrichment_settings, KGEnrichmentSettings): + kg_enrichment_settings = kg_enrichment_settings.model_dump() + elif kg_enrichment_settings is None or kg_enrichment_settings == "{}": + kg_enrichment_settings = {} + + data = { + "collection_id": collection_id, + "kg_enrichment_settings": json.dumps(kg_enrichment_settings), + } + + return await client._make_request("POST", "enrich_graph", json=data) diff --git a/py/sdk/management.py b/py/sdk/management.py index f1ea1d999..3dddf64ac 100644 --- a/py/sdk/management.py +++ b/py/sdk/management.py @@ -293,30 +293,6 @@ async def document_chunks( "GET", f"document_chunks/{document_id}", params=params ) - @staticmethod - async def inspect_knowledge_graph( - client, - offset: Optional[int] = None, - limit: Optional[int] = None, - ) -> dict: - """ - Inspect the knowledge graph associated with your R2R deployment. - - Args: - limit (Optional[int]): The maximum number of nodes to return. Defaults to 100. - - Returns: - dict: The knowledge graph inspection results. - """ - params = {} - if offset is not None: - params["offset"] = offset - if limit is not None: - params["limit"] = limit - return await client._make_request( - "GET", "inspect_knowledge_graph", params=params - ) - @staticmethod async def collections_overview( client, diff --git a/py/sdk/models.py b/py/sdk/models.py index ce39eaf67..c17c3ee93 100644 --- a/py/sdk/models.py +++ b/py/sdk/models.py @@ -1,7 +1,4 @@ -from enum import Enum -from typing import Optional - -from shared.abstractions import ( # ChunkingConfig, +from shared.abstractions import ( GenerationConfig, HybridSearchSettings, KGCommunityResult, @@ -30,27 +27,29 @@ UserResponse, ) - -class Strategy(str, Enum): - # Unstructured methods - BY_TITLE = "by_title" - BASIC = "basic" - # R2R methods - RECURSIVE = "recursive" - CHARACTER = "character" - - -# TODO - Remove this class -class ChunkingConfig(R2RSerializable): - provider: str = "unstructured_local" # or unstructured_api - combine_under_n_chars: Optional[int] = 128 - max_characters: Optional[int] = 500 - coordinates: bool = False - encoding: Optional[str] = "utf-8" - extract_image_block_types: Optional[list[str]] = None - gz_uncompressed_content_type: Optional[str] = None - hi_res_model_name: Optional[str] = None - include_orig_elements: Optional[bool] = True - include_page_breaks: bool = False - languages: Optional[list[str]] = None - multipage_sections: bool = True +__all__ = [ + "GenerationConfig", + "HybridSearchSettings", + "KGCommunityResult", + "KGCreationSettings", + "KGEnrichmentSettings", + "KGEntityResult", + "KGGlobalResult", + "KGRelationshipResult", + "KGSearchMethod", + "KGSearchResult", + "KGSearchResultType", + "KGSearchSettings", + "Message", + "MessageType", + "R2RException", + "R2RSerializable", + "Token", + "VectorSearchResult", + "VectorSearchSettings", + "KGCreationResponse", + "KGEnrichmentResponse", + "RAGResponse", + "SearchResponse", + "UserResponse", +] diff --git a/py/sdk/restructure.py b/py/sdk/restructure.py deleted file mode 100644 index 40209c344..000000000 --- a/py/sdk/restructure.py +++ /dev/null @@ -1,72 +0,0 @@ -import json -from typing import Optional, Union - -from .models import ( - KGCreationResponse, - KGCreationSettings, - KGEnrichmentResponse, - KGEnrichmentSettings, -) - - -class RestructureMethods: - - @staticmethod - async def create_graph( - client, - document_ids: Optional[list[str]] = None, - kg_creation_settings: Optional[Union[dict, KGCreationSettings]] = None, - ) -> KGCreationResponse: - """ - Create a graph from the given settings. - """ - - data = {} - - if document_ids: - data["document_ids"] = document_ids - if kg_creation_settings: - if isinstance(kg_creation_settings, KGCreationSettings): - kg_creation_settings = kg_creation_settings.dict() - data["kg_creation_settings"] = kg_creation_settings # type: ignore - - response = await client._make_request( - "POST", "create_graph", json=data - ) - return response - - @staticmethod - async def enrich_graph( - client, - skip_clustering: bool = False, - force_enrichment: bool = False, - kg_enrichment_settings: Optional[ - Union[dict, KGEnrichmentSettings] - ] = None, - ) -> KGEnrichmentResponse: - """ - Perform graph enrichment over the entire graph. - - Args: - skip_clustering (bool): Whether to skip leiden clustering on the graph or not. - force_enrichment (bool): Force Enrichment step even if graph creation is still in progress for some documents. - kg_enrichment_settings (KGEnrichmentSettings): Settings for the graph enrichment process. - Returns: - KGEnrichmentResponse: Results of the graph enrichment process. - """ - - data = {} - - if skip_clustering: - data["skip_clustering"] = skip_clustering - if force_enrichment: - data["force_enrichment"] = force_enrichment - if kg_enrichment_settings: - if isinstance(kg_enrichment_settings, KGEnrichmentSettings): - kg_enrichment_settings = kg_enrichment_settings.dict() - data["kg_enrichment_settings"] = kg_enrichment_settings # type: ignore - - response = await client._make_request( - "POST", "enrich_graph", json=data - ) - return response diff --git a/py/shared/abstractions/__init__.py b/py/shared/abstractions/__init__.py index b9cb2683e..7bfb9a711 100644 --- a/py/shared/abstractions/__init__.py +++ b/py/shared/abstractions/__init__.py @@ -4,11 +4,11 @@ DataType, Document, DocumentExtraction, - DocumentFragment, DocumentInfo, DocumentType, IngestionStatus, - RestructureStatus, + KGCreationStatus, + KGEnrichmentStatus, ) from .embedding import EmbeddingPurpose, default_embedding_prefixes from .exception import R2RDocumentProcessingError, R2RException @@ -21,6 +21,7 @@ RelationshipType, Triple, ) +from .kg import KGCreationSettings, KGEnrichmentSettings from .llm import ( GenerationConfig, LLMChatCompletion, @@ -29,7 +30,6 @@ RAGCompletion, ) from .prompt import Prompt -from .restructure import KGCreationSettings, KGEnrichmentSettings from .search import ( AggregateSearchResult, HybridSearchSettings, @@ -59,10 +59,10 @@ "DataType", "Document", "DocumentExtraction", - "DocumentFragment", "DocumentInfo", "IngestionStatus", - "RestructureStatus", + "KGCreationStatus", + "KGEnrichmentStatus", "DocumentType", # Embedding abstractions "EmbeddingPurpose", diff --git a/py/shared/abstractions/document.py b/py/shared/abstractions/document.py index d7e0cd68f..87714cb6c 100644 --- a/py/shared/abstractions/document.py +++ b/py/shared/abstractions/document.py @@ -116,20 +116,26 @@ class IngestionStatus(str, Enum): EMBEDDING = "embedding" STORING = "storing" - FAILURE = "failure" + FAILED = "failed" SUCCESS = "success" -class RestructureStatus(str, Enum): - """Status of document processing.""" +class KGCreationStatus(str, Enum): + """Status of KG Creation per document.""" + + PENDING = "pending" + PROCESSING = "processing" + SUCCESS = "success" + FAILED = "failed" + + +class KGEnrichmentStatus(str, Enum): + """Status of KG Enrichment per collection.""" PENDING = "pending" PROCESSING = "processing" - ENRICHING = "enriching" - FAILURE = "failure" SUCCESS = "success" - ENRICHED = "enriched" - ENRICHMENT_FAILURE = "enrichment_failure" + FAILED = "failed" class DocumentInfo(R2RSerializable): @@ -144,7 +150,7 @@ class DocumentInfo(R2RSerializable): version: str size_in_bytes: int ingestion_status: IngestionStatus = IngestionStatus.PENDING - restructuring_status: RestructureStatus = RestructureStatus.PENDING + kg_extraction_status: KGCreationStatus = KGCreationStatus.PENDING created_at: Optional[datetime] = None updated_at: Optional[datetime] = None ingestion_attempt_number: Optional[int] = None @@ -163,7 +169,7 @@ def convert_to_db_entry(self): "version": self.version, "size_in_bytes": self.size_in_bytes, "ingestion_status": self.ingestion_status.value, - "restructuring_status": self.restructuring_status.value, + "kg_extraction_status": self.kg_extraction_status.value, "created_at": self.created_at or now, "updated_at": self.updated_at or now, "ingestion_attempt_number": self.ingestion_attempt_number or 0, @@ -179,15 +185,3 @@ class DocumentExtraction(R2RSerializable): user_id: UUID data: DataType metadata: dict - - -class DocumentFragment(R2RSerializable): - """A fragment extracted from a document.""" - - id: UUID - extraction_id: UUID - document_id: UUID - user_id: UUID - collection_ids: list[UUID] - data: DataType - metadata: dict diff --git a/py/shared/abstractions/graph.py b/py/shared/abstractions/graph.py index dbf795e27..6af5e20c5 100644 --- a/py/shared/abstractions/graph.py +++ b/py/shared/abstractions/graph.py @@ -1,7 +1,6 @@ import json import logging import uuid -from collections.abc import Callable from dataclasses import dataclass from typing import Any, Optional, Union @@ -46,17 +45,17 @@ class RelationshipType(R2RSerializable): class Entity(R2RSerializable): """An entity extracted from a document.""" - id: Optional[int] = None # given by neo4j + id: Optional[int] = None category: str name: str description: Optional[str] = None description_embedding: Optional[list[float]] = None name_embedding: Optional[list[float]] = None graph_embedding: Optional[list[float]] = None - community_ids: Optional[list[str]] = None - text_unit_ids: Optional[list[str]] = None - document_ids: Optional[list[str]] = None - rank: Optional[int] = 1 + community_numbers: Optional[list[str]] = None + extraction_ids: Optional[list[str]] = None + document_id: Optional[str] = None + rank: Optional[int] = None attributes: Optional[Union[dict[str, Any], str]] = None def __str__(self): @@ -79,7 +78,7 @@ def __init__(self, **kwargs): class Triple(BaseModel): """A relationship between two entities. This is a generic relationship, and can be used to represent any type of relationship between any two entities.""" - id: Optional[int] = None # given by neo4j + id: Optional[int] = None subject: str | None = None """The source entity name.""" @@ -96,14 +95,14 @@ class Triple(BaseModel): description: str | None = None """A description of the relationship (optional).""" - predicate_embedding: list[float] = [] + predicate_embedding: list[float] | None = None """The semantic embedding for the relationship description (optional).""" - text_unit_ids: list[str] = [] + extraction_ids: list[str] = [] """List of text unit IDs in which the relationship appears (optional).""" - document_ids: list[str] = [] - """List of document IDs in which the relationship appears (optional).""" + document_id: str | None = None + """Document ID in which the relationship appears (optional).""" attributes: dict[str, Any] | str = {} """Additional attributes associated with the relationship (optional). To be included in the search prompt""" @@ -127,8 +126,8 @@ def from_dict( predicate_key: str = "predicate", description_key: str = "description", weight_key: str = "weight", - text_unit_ids_key: str = "text_unit_ids", - document_ids_key: str = "document_ids", + extraction_ids_key: str = "extraction_ids", + document_id_key: str = "document_id", attributes_key: str = "attributes", ) -> "Triple": """Create a new relationship from the dict data.""" @@ -141,8 +140,8 @@ def from_dict( predicate=d.get(predicate_key), description=d.get(description_key), weight=d.get(weight_key, 1.0), - text_unit_ids=d.get(text_unit_ids_key), - document_ids=d.get(document_ids_key), + extraction_ids=d.get(extraction_ids_key), + document_id=d.get(document_id_key), attributes=d.get(attributes_key, {}), ) @@ -151,10 +150,10 @@ def from_dict( class Community(BaseModel): """A protocol for a community in the system.""" - id: str + id: int | None = None """The ID of the community.""" - level: str = "" + level: int | None = None """Community level.""" entity_ids: list[str] | None = None @@ -216,26 +215,35 @@ def from_dict( @dataclass -class CommunityReport(Named): +class CommunityReport(BaseModel): """Defines an LLM-generated summary report of a community.""" - community_id: str + community_number: int """The ID of the community this report is associated with.""" + level: int + """The level of the community this report is associated with.""" + + collection_id: uuid.UUID + """The ID of the collection this report is associated with.""" + + name: str = "" + """Name of the report.""" + summary: str = "" """Summary of the report.""" - full_content: str = "" - """Full content of the report.""" + findings: list[str] = [] + """Findings of the report.""" - rank: float | None = 1.0 - """Rank of the report, used for sorting (optional). Higher means more important""" + rating: float | None = None + """Rating of the report.""" - summary_embedding: list[float] | None = None - """The semantic (i.e. text) embedding of the report summary (optional).""" + rating_explanation: str | None = None + """Explanation of the rating.""" - full_content_embedding: list[float] | None = None - """The semantic (i.e. text) embedding of the full report content (optional).""" + embedding: list[float] | None = None + """Embedding of summary and findings.""" attributes: dict[str, Any] | None = None """A dictionary of additional attributes associated with the report (optional).""" @@ -251,159 +259,34 @@ def from_dict( d: dict[str, Any], id_key: str = "id", title_key: str = "title", - community_id_key: str = "community_id", + community_number_key: str = "community_number", short_id_key: str = "short_id", summary_key: str = "summary", - full_content_key: str = "full_content", + findings_key: str = "findings", rank_key: str = "rank", summary_embedding_key: str = "summary_embedding", - full_content_embedding_key: str = "full_content_embedding", + embedding_key: str = "embedding", attributes_key: str = "attributes", ) -> "CommunityReport": """Create a new community report from the dict data.""" return CommunityReport( id=d[id_key], title=d[title_key], - community_id=d[community_id_key], + community_number=d[community_number_key], short_id=d.get(short_id_key), summary=d[summary_key], - full_content=d[full_content_key], + findings=d[findings_key], rank=d[rank_key], summary_embedding=d.get(summary_embedding_key), - full_content_embedding=d.get(full_content_embedding_key), - attributes=d.get(attributes_key), - ) - - -@dataclass -class Covariate(Identified): - """ - A protocol for a covariate in the system. - - Covariates are metadata associated with a subject, e.g. entity claims. - Each subject (e.g. entity) may be associated with multiple types of covariates. - """ - - subject_id: str - """The subject id.""" - - subject_type: str = "entity" - """The subject type.""" - - covariate_type: str = "claim" - """The covariate type.""" - - text_unit_ids: list[str] | None = None - """List of text unit IDs in which the covariate info appears (optional).""" - - document_ids: list[str] | None = None - """List of document IDs in which the covariate info appears (optional).""" - - attributes: dict[str, Any] | None = None - - def __init__(self, **kwargs): - super().__init__(**kwargs) - if isinstance(self.attributes, str): - self.attributes = json.loads(self.attributes) - - @classmethod - def from_dict( - cls, - d: dict[str, Any], - id_key: str = "id", - subject_id_key: str = "subject_id", - subject_type_key: str = "subject_type", - covariate_type_key: str = "covariate_type", - short_id_key: str = "short_id", - text_unit_ids_key: str = "text_unit_ids", - document_ids_key: str = "document_ids", - attributes_key: str = "attributes", - ) -> "Covariate": - """Create a new covariate from the dict data.""" - return Covariate( - id=d[id_key], - short_id=d.get(short_id_key), - subject_id=d[subject_id_key], - subject_type=d.get(subject_type_key, "entity"), - covariate_type=d.get(covariate_type_key, "claim"), - text_unit_ids=d.get(text_unit_ids_key), - document_ids=d.get(document_ids_key), + embedding=d.get(embedding_key), attributes=d.get(attributes_key), ) -@dataclass -class TextUnit(Identified): - """A protocol for a TextUnit item in a Document database.""" - - text: str - """The text of the unit.""" - - text_embedding: list[float] | None = None - """The text embedding for the text unit (optional).""" - - entity_ids: list[str] | None = None - """List of entity IDs related to the text unit (optional).""" - - relationship_ids: list[str] | None = None - """List of relationship IDs related to the text unit (optional).""" - - covariate_ids: dict[str, list[str]] | None = None - "Dictionary of different types of covariates related to the text unit (optional)." - - n_tokens: int | None = None - """The number of tokens in the text (optional).""" - - document_ids: list[str] | None = None - """List of document IDs in which the text unit appears (optional).""" - - attributes: dict[str, Any] | None = None - """A dictionary of additional attributes associated with the text unit (optional).""" - - def __init__(self, **kwargs): - super().__init__(**kwargs) - if isinstance(self.attributes, str): - self.attributes = json.loads(self.attributes) - if isinstance(self.covariate_ids, str): - self.covariate_ids = json.loads(self.covariate_ids) - - @classmethod - def from_dict( - cls, - d: dict[str, Any], - id_key: str = "id", - short_id_key: str = "short_id", - text_key: str = "text", - text_embedding_key: str = "text_embedding", - entities_key: str = "entity_ids", - relationships_key: str = "relationship_ids", - covariates_key: str = "covariate_ids", - n_tokens_key: str = "n_tokens", - document_ids_key: str = "document_ids", - attributes_key: str = "attributes", - ) -> "TextUnit": - """Create a new text unit from the dict data.""" - return TextUnit( - id=d[id_key], - short_id=d.get(short_id_key), - text=d[text_key], - text_embedding=d.get(text_embedding_key), - entity_ids=d.get(entities_key), - relationship_ids=d.get(relationships_key), - covariate_ids=d.get(covariates_key), - n_tokens=d.get(n_tokens_key), - document_ids=d.get(document_ids_key), - attributes=d.get(attributes_key), - ) - - -TextEmbedder = Callable[[str], list[float]] - - class KGExtraction(R2RSerializable): """An extraction from a document that is part of a knowledge graph.""" - fragment_ids: list[uuid.UUID] + extraction_ids: list[uuid.UUID] document_id: uuid.UUID - entities: dict[str, Entity] + entities: list[Entity] triples: list[Triple] diff --git a/py/shared/abstractions/restructure.py b/py/shared/abstractions/kg.py similarity index 71% rename from py/shared/abstractions/restructure.py rename to py/shared/abstractions/kg.py index 5d420b965..ca6c15559 100644 --- a/py/shared/abstractions/restructure.py +++ b/py/shared/abstractions/kg.py @@ -7,11 +7,16 @@ class KGCreationSettings(R2RSerializable): """Settings for knowledge graph creation.""" - kg_extraction_prompt: str = Field( - default="graphrag_triplet_extraction_zero_shot", + kg_triples_extraction_prompt: str = Field( + default="graphrag_triples_extraction_few_shot", description="The prompt to use for knowledge graph extraction.", ) + force_kg_creation: bool = Field( + default=False, + description="Force run the KG creation step even if the graph is already created.", + ) + entity_types: list[str] = Field( default=[], description="The types of entities to extract.", @@ -22,9 +27,9 @@ class KGCreationSettings(R2RSerializable): description="The types of relations to extract.", ) - fragment_merge_count: int = Field( + extraction_merge_count: int = Field( default=4, - description="The number of fragments to merge into a single KG extraction.", + description="The number of extractions to merge into a single KG extraction.", ) max_knowledge_triples: int = Field( @@ -32,6 +37,11 @@ class KGCreationSettings(R2RSerializable): description="The maximum number of knowledge triples to extract from each chunk.", ) + max_description_input_length: int = Field( + default=65536, + description="The maximum length of the description for a node in the graph.", + ) + generation_config: GenerationConfig = Field( default_factory=GenerationConfig, description="Configuration for text generation during graph enrichment.", @@ -41,16 +51,21 @@ class KGCreationSettings(R2RSerializable): class KGEnrichmentSettings(R2RSerializable): """Settings for knowledge graph enrichment.""" + skip_clustering: bool = Field( + default=False, + description="Whether to skip leiden clustering on the graph or not.", + ) + + force_enrichment: bool = Field( + default=False, + description="Force run the enrichment step even if graph creation is still in progress for some documents.", + ) + community_reports_prompt: str = Field( default="graphrag_community_reports_prompt", description="The prompt to use for knowledge graph enrichment.", ) - max_description_input_length: int = Field( - default=65536, - description="The maximum length of the description for a node in the graph.", - ) - max_summary_input_length: int = Field( default=65536, description="The maximum length of the summary for a community.", diff --git a/py/shared/abstractions/search.py b/py/shared/abstractions/search.py index 8199d8394..1a2c5b1cf 100644 --- a/py/shared/abstractions/search.py +++ b/py/shared/abstractions/search.py @@ -13,7 +13,6 @@ class VectorSearchResult(R2RSerializable): """Result of a search operation.""" - fragment_id: UUID extraction_id: UUID document_id: UUID user_id: Optional[UUID] @@ -23,14 +22,13 @@ class VectorSearchResult(R2RSerializable): metadata: dict[str, Any] def __str__(self) -> str: - return f"VectorSearchResult(fragment_id={self.fragment_id}, extraction_id={self.extraction_id}, document_id={self.document_id}, score={self.score})" + return f"VectorSearchResult(id={self.extraction_id}, document_id={self.document_id}, score={self.score})" def __repr__(self) -> str: return self.__str__() def as_dict(self) -> dict: return { - "fragment_id": self.fragment_id, "extraction_id": self.extraction_id, "document_id": self.document_id, "user_id": self.user_id, @@ -42,7 +40,6 @@ def as_dict(self) -> dict: class Config: json_schema_extra = { - "fragment_id": "c68dc72e-fc23-5452-8f49-d7bd46088a96", "extraction_id": "3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", @@ -95,13 +92,19 @@ class Config: class KGCommunityResult(R2RSerializable): name: str - description: str + summary: str + rating: float + rating_explanation: str + findings: list[str] metadata: Optional[dict[str, Any]] = None class Config: json_schema_extra = { "name": "Community Name", - "description": "Community Description", + "summary": "Community Summary", + "rating": 9, + "rating_explanation": "Rating Explanation", + "findings": ["Finding 1", "Finding 2"], "metadata": {}, } @@ -125,7 +128,7 @@ class KGSearchResult(R2RSerializable): KGEntityResult, KGRelationshipResult, KGCommunityResult, KGGlobalResult ] result_type: Optional[KGSearchResultType] = None - fragment_ids: Optional[list[UUID]] = None + ids: Optional[list[UUID]] = None document_ids: Optional[list[UUID]] = None metadata: dict[str, Any] = {} @@ -134,7 +137,7 @@ class Config: "method": "local", "content": KGEntityResult.Config.json_schema_extra, "result_type": "entity", - "fragment_ids": ["c68dc72e-fc23-5452-8f49-d7bd46088a96"], + "ids": ["c68dc72e-fc23-5452-8f49-d7bd46088a96"], "document_ids": ["3e157b3a-8469-51db-90d9-52e7d896b49b"], "metadata": {"associated_query": "What is the capital of France?"}, } @@ -284,7 +287,6 @@ def model_dump(self, *args, **kwargs): class KGSearchSettings(R2RSerializable): - graphrag_map_system_prompt: str = Field( default="graphrag_map_system_prompt", description="The system prompt for the graphrag map prompt.", diff --git a/py/shared/abstractions/vector.py b/py/shared/abstractions/vector.py index adb61361a..d8b11b1d0 100644 --- a/py/shared/abstractions/vector.py +++ b/py/shared/abstractions/vector.py @@ -40,7 +40,6 @@ def __repr__(self) -> str: class VectorEntry(R2RSerializable): """A vector entry that can be stored directly in supported vector databases.""" - fragment_id: UUID extraction_id: UUID document_id: UUID user_id: UUID @@ -52,7 +51,7 @@ class VectorEntry(R2RSerializable): def __str__(self) -> str: """Return a string representation of the VectorEntry.""" return ( - f"VectorEntry(fragment_id={self.fragment_id}, " + f"VectorEntry(" f"extraction_id={self.extraction_id}, " f"document_id={self.document_id}, " f"user_id={self.user_id}, " diff --git a/py/shared/api/models/__init__.py b/py/shared/api/models/__init__.py index 777ad2ee5..b3d350afd 100644 --- a/py/shared/api/models/__init__.py +++ b/py/shared/api/models/__init__.py @@ -11,6 +11,12 @@ WrappedIngestionResponse, WrappedUpdateResponse, ) +from shared.api.models.kg.responses import ( + KGCreationResponse, + KGEnrichmentResponse, + WrappedKGCreationResponse, + WrappedKGEnrichmentResponse, +) from shared.api.models.management.responses import ( AnalyticsResponse, AppSettingsResponse, @@ -42,12 +48,6 @@ WrappedUserOverviewResponse, WrappedUsersInCollectionResponse, ) -from shared.api.models.restructure.responses import ( - KGCreationResponse, - KGEnrichmentResponse, - WrappedKGCreationResponse, - WrappedKGEnrichmentResponse, -) from shared.api.models.retrieval.responses import ( RAGAgentResponse, RAGResponse, diff --git a/py/shared/api/models/kg/__init__.py b/py/shared/api/models/kg/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/py/shared/api/models/restructure/responses.py b/py/shared/api/models/kg/responses.py similarity index 76% rename from py/shared/api/models/restructure/responses.py rename to py/shared/api/models/kg/responses.py index 0150f4773..e0c3ad890 100644 --- a/py/shared/api/models/restructure/responses.py +++ b/py/shared/api/models/kg/responses.py @@ -8,11 +8,11 @@ class KGCreationResponse(BaseModel): message: str = Field( ..., - description="A message describing the result of the restructure request.", + description="A message describing the result of the KG creation request.", ) task_id: UUID = Field( ..., - description="The task ID of the restructure request.", + description="The task ID of the KG creation request.", ) class Config: @@ -27,11 +27,11 @@ class Config: class KGEnrichmentResponse(BaseModel): message: str = Field( ..., - description="A message describing the result of the restructure request.", + description="A message describing the result of the KG enrichment request.", ) task_id: UUID = Field( ..., - description="The task ID of the restructure request.", + description="The task ID of the KG enrichment request.", ) class Config: diff --git a/py/shared/api/models/management/responses.py b/py/shared/api/models/management/responses.py index 303e4dcad..5652f7005 100644 --- a/py/shared/api/models/management/responses.py +++ b/py/shared/api/models/management/responses.py @@ -91,15 +91,14 @@ class DocumentOverviewResponse(BaseModel): created_at: datetime updated_at: datetime ingestion_status: str - restructuring_status: str + kg_extraction_status: str version: str collection_ids: list[UUID] metadata: dict[str, Any] class DocumentChunkResponse(BaseModel): - fragment_id: UUID - extraction_id: UUID + id: UUID document_id: UUID user_id: UUID collection_ids: list[UUID] diff --git a/py/shared/utils/__init__.py b/py/shared/utils/__init__.py index bae809ee0..8fe601043 100644 --- a/py/shared/utils/__init__.py +++ b/py/shared/utils/__init__.py @@ -4,6 +4,7 @@ format_relations, format_search_results_for_llm, format_search_results_for_stream, + generate_default_user_collection_id, generate_id_from_label, generate_run_id, generate_user_document_id, @@ -21,6 +22,7 @@ "format_search_results_for_llm", "generate_run_id", "generate_user_document_id", + "generate_default_user_collection_id", "increment_version", "decrement_version", "run_pipeline", diff --git a/py/shared/utils/base_utils.py b/py/shared/utils/base_utils.py index ca0a9f29c..7cef3b9cb 100644 --- a/py/shared/utils/base_utils.py +++ b/py/shared/utils/base_utils.py @@ -5,37 +5,69 @@ from uuid import NAMESPACE_DNS, UUID, uuid4, uuid5 from ..abstractions.graph import EntityType, RelationshipType -from ..abstractions.search import AggregateSearchResult +from ..abstractions.search import ( + AggregateSearchResult, + KGCommunityResult, + KGEntityResult, + KGGlobalResult, + KGRelationshipResult, +) logger = logging.getLogger(__name__) -def format_search_results_for_llm( - results: AggregateSearchResult, -) -> str: - formatted_results = "" - i = 0 +def format_search_results_for_llm(results: AggregateSearchResult) -> str: + formatted_results = [] + source_counter = 1 + if results.vector_search_results: - formatted_results += "Vector Search Results:\n" - for i, result in enumerate(results.vector_search_results): - text = result.text - formatted_results += f"Source [{i+1}]:\n{text}\n" + formatted_results.append("Vector Search Results:") + for result in results.vector_search_results: + formatted_results.extend( + (f"Source [{source_counter}]:", f"{result.text}") + ) + source_counter += 1 - i = len(results.vector_search_results) if results.kg_search_results: - formatted_results += "KG Local Results:\n" - for j, kg_result in enumerate(results.kg_search_results): - formatted_results += ( - f"Source [{j+i+1}]: Name - {kg_result.content.name}\n" + formatted_results.append("KG Search Results:") + for kg_result in results.kg_search_results: + formatted_results.extend( + ( + f"Source [{source_counter}]:", + f"Name: {kg_result.content.name}", + ) ) - formatted_results += ( - f"Description - {kg_result.content.description}\n" - ) - findings = kg_result.metadata.get("findings", None) - if findings: - formatted_results += f"Supporting Findings: {findings}\n" - return formatted_results + if isinstance(kg_result.content, KGCommunityResult): + formatted_results.extend( + ( + f"Summary: {kg_result.content.summary}", + f"Rating: {kg_result.content.rating}", + f"Rating Explanation: {kg_result.content.rating_explanation}", + "Findings:", + ) + ) + formatted_results.extend( + f"- {finding}" for finding in kg_result.content.findings + ) + elif isinstance( + kg_result.content, + (KGEntityResult, KGRelationshipResult, KGGlobalResult), + ): + formatted_results.append( + f"Description: {kg_result.content.description}" + ) + + if kg_result.metadata: + formatted_results.append("Metadata:") + formatted_results.extend( + f"- {key}: {value}" + for key, value in kg_result.metadata.items() + ) + + source_counter += 1 + + return "\n".join(formatted_results) def format_search_results_for_stream( @@ -78,6 +110,11 @@ def generate_id_from_label(label: str) -> UUID: return uuid5(NAMESPACE_DNS, label) +def generate_default_user_collection_id(user_id: UUID) -> UUID: + """Generate the default collection ID for a user.""" + return generate_id_from_label(f"{user_id}") + + def generate_user_document_id(filename: str, user_id: UUID) -> UUID: """ Generates a unique document id from a given filename and user id diff --git a/py/tests/conftest.py b/py/tests/conftest.py index a1e29f7dc..84def65c4 100644 --- a/py/tests/conftest.py +++ b/py/tests/conftest.py @@ -47,7 +47,6 @@ def generate_random_vector_entry( vector_data = [random.random() for _ in range(dimension)] metadata = {"key": f"value_id_{id_value}", "raw_key": id_value} return VectorEntry( - fragment_id=generate_id_from_label(f"id_{id_value}"), extraction_id=uuid.uuid4(), document_id=uuid.uuid4(), user_id=uuid.uuid4(), diff --git a/py/tests/core/parsers/conftest.py b/py/tests/core/parsers/conftest.py deleted file mode 100644 index 0a654bba1..000000000 --- a/py/tests/core/parsers/conftest.py +++ /dev/null @@ -1,16 +0,0 @@ -import pytest - -from core.base import ParsingConfig -from core.providers.parsing.unstructured_parsing import ( - UnstructuredParsingProvider, -) - - -@pytest.fixture -def parsing_config(): - return ParsingConfig() - - -@pytest.fixture -def unstructured_parsing_provider(parsing_config): - return UnstructuredParsingProvider(use_api=False, config=parsing_config) diff --git a/py/tests/core/parsers/test_parsers.py b/py/tests/core/parsers/test_parsers.py deleted file mode 100644 index ed1ac5b09..000000000 --- a/py/tests/core/parsers/test_parsers.py +++ /dev/null @@ -1,132 +0,0 @@ -import asyncio -import contextlib -import json -from unittest.mock import MagicMock, patch - -import pytest - -from core.parsers.media.docx_parser import DOCXParser -from core.parsers.media.pdf_parser import PDFParser -from core.parsers.media.ppt_parser import PPTParser -from core.parsers.structured.csv_parser import CSVParser -from core.parsers.structured.json_parser import JSONParser -from core.parsers.structured.xlsx_parser import XLSXParser -from core.parsers.text.html_parser import HTMLParser -from core.parsers.text.md_parser import MDParser -from core.parsers.text.text_parser import TextParser - - -@pytest.fixture(scope="session", autouse=True) -def event_loop_policy(): - asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) - - -@pytest.fixture(scope="function", autouse=True) -async def cleanup_tasks(): - yield - tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] - [task.cancel() for task in tasks] - await asyncio.gather(*tasks, return_exceptions=True) - - -@pytest.mark.asyncio -async def test_text_parser(): - try: - parser = TextParser() - data = "Simple text" - async for result in parser.ingest(data): - assert result == "Simple text" - except asyncio.CancelledError: - pass - - -@pytest.mark.asyncio -async def test_json_parser(): - with contextlib.suppress(asyncio.CancelledError): - parser = JSONParser() - data = json.dumps({"key": "value", "null_key": None}) - async for result in parser.ingest(data): - assert "key: value" in result - assert "null_key" not in result - - -@pytest.mark.asyncio -async def test_html_parser(): - with contextlib.suppress(asyncio.CancelledError): - parser = HTMLParser() - data = "

Hello World

" - async for result in parser.ingest(data): - assert result.strip() == "Hello World" - - -@pytest.mark.asyncio -@patch("pypdf.PdfReader") -async def test_pdf_parser(mock_pdf_reader): - with contextlib.suppress(asyncio.CancelledError): - parser = PDFParser() - mock_pdf_reader.return_value.pages = [ - MagicMock(extract_text=lambda: "Page text") - ] - data = b"fake PDF data" - async for result in parser.ingest(data): - assert result == "Page text" - - -@pytest.mark.asyncio -@patch("pptx.Presentation") -async def test_ppt_parser(mock_presentation): - with contextlib.suppress(asyncio.CancelledError): - mock_slide = MagicMock() - mock_shape = MagicMock(text="Slide text") - mock_slide.shapes = [mock_shape] - mock_presentation.return_value.slides = [mock_slide] - parser = PPTParser() - data = b"fake PPT data" - async for result in parser.ingest(data): - assert result == "Slide text" - - -@pytest.mark.asyncio -@patch("docx.Document") -async def test_docx_parser(mock_document): - with contextlib.suppress(asyncio.CancelledError): - mock_paragraph = MagicMock(text="Paragraph text") - mock_document.return_value.paragraphs = [mock_paragraph] - parser = DOCXParser() - data = b"fake DOCX data" - async for result in parser.ingest(data): - assert result == "Paragraph text" - - -@pytest.mark.asyncio -async def test_csv_parser(): - with contextlib.suppress(asyncio.CancelledError): - parser = CSVParser() - data = "col1,col2\nvalue1,value2" - async for result in parser.ingest(data): - assert result == "col1, col2" - break - - -@pytest.mark.asyncio -@patch("openpyxl.load_workbook") -async def test_xlsx_parser(mock_load_workbook): - with contextlib.suppress(asyncio.CancelledError): - mock_sheet = MagicMock() - mock_sheet.iter_rows.return_value = [(1, 2), (3, 4)] - mock_workbook = MagicMock(worksheets=[mock_sheet]) - mock_load_workbook.return_value = mock_workbook - parser = XLSXParser() - data = b"fake XLSX data" - async for result in parser.ingest(data): - assert result == "1, 2" - break - - -@pytest.mark.asyncio -async def test_markdown_parser(): - with contextlib.suppress(asyncio.CancelledError): - parser = MDParser() - data = "# Header\nContent" - async for result in parser.ingest(data): - assert result.strip() == "Header\nContent" diff --git a/py/tests/core/providers/database/relational/test_document_db.py b/py/tests/core/providers/database/relational/test_document_db.py index 940cd6370..add3aa7f4 100644 --- a/py/tests/core/providers/database/relational/test_document_db.py +++ b/py/tests/core/providers/database/relational/test_document_db.py @@ -7,7 +7,8 @@ DocumentInfo, DocumentType, IngestionStatus, - RestructureStatus, + KGCreationStatus, + KGEnrichmentStatus, ) @@ -30,7 +31,7 @@ async def test_upsert_documents_overview(temporary_postgres_db_provider): version="1.0", size_in_bytes=1024, ingestion_status=IngestionStatus.PENDING, - restructuring_status=RestructureStatus.PENDING, + kg_extraction_status=KGCreationStatus.PENDING, ) await temporary_postgres_db_provider.relational.upsert_documents_overview( document_info @@ -54,8 +55,8 @@ async def test_upsert_documents_overview(temporary_postgres_db_provider): assert inserted_document.size_in_bytes == document_info.size_in_bytes assert inserted_document.ingestion_status == document_info.ingestion_status assert ( - inserted_document.restructuring_status - == document_info.restructuring_status + inserted_document.kg_extraction_status + == document_info.kg_extraction_status ) # Update the document and verify the changes @@ -88,7 +89,7 @@ async def test_delete_from_documents_overview(temporary_postgres_db_provider): version="1.0", size_in_bytes=1024, ingestion_status=IngestionStatus.PENDING, - restructuring_status=RestructureStatus.PENDING, + kg_extraction_status=KGCreationStatus.PENDING, ) await temporary_postgres_db_provider.relational.upsert_documents_overview( document_info @@ -119,7 +120,7 @@ async def test_get_documents_overview(temporary_postgres_db_provider): version="1.0", size_in_bytes=1024, ingestion_status=IngestionStatus.PENDING, - restructuring_status=RestructureStatus.PENDING, + kg_extraction_status=KGCreationStatus.PENDING, ) document_info2 = DocumentInfo( id=UUID("00000000-0000-0000-0000-000000000004"), @@ -131,7 +132,7 @@ async def test_get_documents_overview(temporary_postgres_db_provider): version="1.0", size_in_bytes=2048, ingestion_status=IngestionStatus.SUCCESS, - restructuring_status=RestructureStatus.PENDING, + kg_extraction_status=KGCreationStatus.PENDING, ) await temporary_postgres_db_provider.relational.upsert_documents_overview( [document_info1, document_info2] diff --git a/py/tests/core/providers/kg/test_kg.py b/py/tests/core/providers/kg/test_kg.py deleted file mode 100644 index d82fad50f..000000000 --- a/py/tests/core/providers/kg/test_kg.py +++ /dev/null @@ -1,240 +0,0 @@ -from unittest.mock import AsyncMock, MagicMock, Mock -from uuid import uuid4 - -import pytest - -from core.base import ( - Community, - DocumentFragment, - Entity, - GenerationConfig, - KGExtraction, - Triple, -) -from core.pipes.kg.clustering import KGClusteringPipe -from core.pipes.kg.extraction import AsyncPipe, KGTriplesExtractionPipe - - -@pytest.fixture -def kg_extraction_pipe(): - return KGTriplesExtractionPipe( - config=MagicMock(), - kg_provider=MagicMock(), - database_provider=MagicMock(), - llm_provider=MagicMock(), - prompt_provider=MagicMock(), - chunking_provider=MagicMock(), - ) - - -@pytest.fixture -def document_fragment(): - return DocumentFragment( - id=uuid4(), - collection_ids=[uuid4()], - extraction_id=uuid4(), - document_id=uuid4(), - user_id=uuid4(), - data="Test data", - metadata={}, - ) - - -@pytest.fixture -def kg_extraction_input(): - return DocumentFragment( - document_id=uuid4(), - generation_config=GenerationConfig(), - ) - - -@pytest.mark.asyncio -async def test_extract_kg_success(kg_extraction_pipe, document_fragment): - kg_extraction_pipe.llm_provider.aget_completion = AsyncMock( - return_value=MagicMock( - choices=[ - MagicMock( - message=MagicMock( - content=( - '("entity"$$$$Entity1$$$$Category1$$$$Description1)' - '("relationship"$$$$Entity1$$$$Entity2$$$$Predicate$$$$Description$$$$0.8)' - ) - ) - ) - ] - ) - ) - result = await kg_extraction_pipe.extract_kg( - [document_fragment], GenerationConfig(), 10, [], [] - ) - - assert isinstance(result, KGExtraction) - assert len(result.entities) == 1 - assert len(result.triples) == 1 - assert result.entities["Entity1"].name == "Entity1" - assert result.triples[0].subject == "Entity1" - assert result.triples[0].object == "Entity2" - - -# TODO - Revive extraction testing after recent refactor -# @pytest.mark.asyncio -# async def test_run_logic(kg_extraction_pipe, kg_extraction_input): -# def mock_input_generator(): -# return kg_extraction_input.dict() # AsyncPipe.Input(message=document_fragment.dict()) - -# input_mock = MagicMock() -# input_mock.message = mock_input_generator() - -# kg_extraction_pipe.extract_kg = Mock( -# return_value=KGExtraction( -# fragment_id=document_fragment.id, -# document_id=document_fragment.document_id, -# entities={ -# "TestEntity": Entity( -# name="TestEntity", -# category="TestCategory", -# description="TestDescription", -# ) -# }, -# triples=[ -# Triple( -# subject="TestSubject", -# predicate="TestPredicate", -# object="TestObject", -# ) -# ], -# ) -# ) - -# results = [ -# result -# async for result in kg_extraction_pipe._run_logic( -# input_mock, MagicMock(), "run_id" -# ) -# ] - -# # test failing due to issues with mock -# # assert len(results) == 2 -# # for result in results: -# # assert isinstance(result, KGExtraction) -# # assert len(result.entities) == 1 -# # assert len(result.triples) == 1 - - -@pytest.fixture -def mock_kg_provider(mocker): - provider = mocker.Mock() - provider.get_all_entities.return_value = [ - Entity( - name=f"Entity{i}", - category=f"Category{i%2+1}", - description=f"Description{i}", - ) - for i in range(1, 4) - ] - - provider.get_entities.return_value = [ - Entity( - name=f"Entity{i}", - category=f"Category{i%2+1}", - description=f"Description{i}", - ) - for i in range(1, 4) - ] - - provider.get_triples.return_value = [ - Triple( - subject=f"Entity{i}", - predicate=f"Predicate{i%2+1}", - object=f"Entity{i+1}", - ) - for i in range(1, 4) - ] - - provider.get_communities.return_value = [ - Community( - id=f"Community{i}", - level=f"Level{i%2+1}", - short_id=f"Short{i}", - title=f"Title{i}", - entity_ids=[f"Entity{i}"], - relationship_ids=[f"Relationship{i}"], - ) - for i in range(1, 4) - ] - - return provider - - -@pytest.fixture -def mock_embedding_provider(mocker): - provider = mocker.Mock() - provider.get_embeddings.return_value = [ - [0.1 * i, 0.2 * i, 0.3 * i] for i in range(1, 4) - ] - provider.async_get_embedding = AsyncMock(return_value=[0.1, 0.2, 0.3]) - return provider - - -@pytest.fixture -def mock_completion_provider(mocker): - provider = mocker.Mock() - - async def mock_aget_completion(*args, **kwargs): - return mocker.Mock( - choices=[ - mocker.Mock(message=mocker.Mock(content="Cluster summary")) - ] - ) - - provider.aget_completion = mock_aget_completion - return provider - - -@pytest.fixture -def mock_prompt_provider(mocker): - provider = mocker.Mock() - provider.get_message_payload.return_value = mocker.Mock( - task_prompt_name="graphrag_community_reports", - task_inputs={"input_text": "Test input text"}, - ) - provider._get_message_payload.return_value = { - "task_prompt_name": "graphrag_community_reports", - "task_inputs": {"input_text": "Test input text"}, - } - return provider - - -@pytest.fixture -def kg_clustering_pipe( - mocker, - mock_kg_provider, - mock_embedding_provider, - mock_completion_provider, - mock_prompt_provider, -): - return KGClusteringPipe( - kg_provider=mock_kg_provider, - embedding_provider=mock_embedding_provider, - llm_provider=mock_completion_provider, - prompt_provider=mock_prompt_provider, - n_clusters=2, - ) - - -# Test is failing due to a dependency of graspologic failing to install: /hyppo/kgof/fssd.py:4: ModuleNotFoundError -# @pytest.mark.asyncio -# async def test_cluster_kg(kg_clustering_pipe): -# triples = [ -# Triple(subject="Entity1", predicate="relatedTo", object="Entity2"), -# Triple(subject="Entity2", predicate="relatedTo", object="Entity3"), -# Triple(subject="Entity3", predicate="relatedTo", object="Entity1"), -# ] - -# result = [] -# async for community in kg_clustering_pipe.cluster_kg(triples): -# result.append(community) - -# assert len(result) == 1 -# assert result[0]["id"] == "0_0" -# assert result[0]["title"] == "_" diff --git a/py/tests/core/providers/parsing/conftest.py b/py/tests/core/providers/parsing/conftest.py deleted file mode 100644 index 0a654bba1..000000000 --- a/py/tests/core/providers/parsing/conftest.py +++ /dev/null @@ -1,16 +0,0 @@ -import pytest - -from core.base import ParsingConfig -from core.providers.parsing.unstructured_parsing import ( - UnstructuredParsingProvider, -) - - -@pytest.fixture -def parsing_config(): - return ParsingConfig() - - -@pytest.fixture -def unstructured_parsing_provider(parsing_config): - return UnstructuredParsingProvider(use_api=False, config=parsing_config) diff --git a/py/tests/core/providers/parsing/test_unstructured_parsing.py b/py/tests/core/providers/parsing/test_unstructured_parsing.py deleted file mode 100644 index c6dd36d14..000000000 --- a/py/tests/core/providers/parsing/test_unstructured_parsing.py +++ /dev/null @@ -1,73 +0,0 @@ -from io import BytesIO -from unittest.mock import AsyncMock, MagicMock, patch - -import httpx -import pytest - -from core import generate_id_from_label -from core.base import Document, DocumentExtraction, DocumentType -from core.providers.parsing.unstructured_parsing import FallbackElement - - -@pytest.mark.asyncio -async def test_parse_fallback(unstructured_parsing_provider): - document = Document( - id=generate_id_from_label("test_parse_fallback"), - collection_ids=[], - user_id=generate_id_from_label("test_user"), - type=DocumentType.PNG, - metadata={}, - ) - chunk_size = 128 - - async def mock_ingest(file_content, chunk_size): - for text in ["text1", "text2"]: - yield text - - mock_parser = AsyncMock() - mock_parser.ingest = mock_ingest - unstructured_parsing_provider.parsers[DocumentType.PNG] = mock_parser - - elements = [ - element - async for element in unstructured_parsing_provider.parse_fallback( - b"test_data", document, chunk_size - ) - ] - - assert len(elements) == 2 - assert isinstance(elements[0], FallbackElement) - assert elements[0].text == "text1" - assert elements[0].metadata == {"chunk_id": 0} - assert isinstance(elements[1], FallbackElement) - assert elements[1].text == "text2" - assert elements[1].metadata == {"chunk_id": 1} - - -@pytest.mark.asyncio -async def test_parse_with_local_unstructured(unstructured_parsing_provider): - document = Document( - id=generate_id_from_label("test_parse_with_local_unstructured"), - collection_ids=[], - user_id=generate_id_from_label("test_user"), - type=DocumentType.DOCX, - metadata={"title": "test_title"}, - ) - - async def mock_post(url, json, **kwargs): - return MagicMock( - json=MagicMock(return_value={"elements": [{"text": "test_text"}]}) - ) - - with patch.object(httpx.AsyncClient, "post", side_effect=mock_post): - extractions = [ - extraction - async for extraction in unstructured_parsing_provider.parse( - BytesIO(b"test_data"), document - ) - ] - - assert len(extractions) == 1 - assert isinstance(extractions[0], DocumentExtraction) - assert extractions[0].data == "test_text" - assert extractions[0].metadata["partitioned_by_unstructured"] is True diff --git a/py/tests/regression/expected_outputs/test_document_management.json b/py/tests/regression/expected_outputs/test_document_management.json index 7114b58d2..eee3aec01 100644 --- a/py/tests/regression/expected_outputs/test_document_management.json +++ b/py/tests/regression/expected_outputs/test_document_management.json @@ -64,7 +64,7 @@ { "message": "Ingestion task queued successfully.", "task_id": "2fe08ebb-7157-452c-811c-7ede2b05e750", - "document_id": "db02076e-989a-59cd-98d5-e24e15a0bd27" + "document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1" }, { "message": "Ingestion task queued successfully.", @@ -148,7 +148,7 @@ { "message": "Ingestion task queued successfully.", "task_id": "07acc8f2-48d6-4e1c-8932-3f0c594105bf", - "document_id": "db02076e-989a-59cd-98d5-e24e15a0bd27" + "document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1" }, { "message": "Ingestion task queued successfully.", @@ -238,7 +238,7 @@ "ingestion_attempt_number": null }, { - "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", + "id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "type": "txt", @@ -474,7 +474,7 @@ "message": "Update task queued successfully.", "task_id": "1828ac12-0804-47cf-9623-8110324a52b1", "document_ids": [ - "db02076e-989a-59cd-98d5-e24e15a0bd27" + "9fbe403b-c11c-5aae-8ade-ef22980c3ad1" ] } }, @@ -549,7 +549,7 @@ "ingestion_attempt_number": null }, { - "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", + "id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "type": "txt", @@ -830,7 +830,7 @@ "ingestion_attempt_number": null }, { - "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", + "id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "type": "txt", diff --git a/py/tests/regression/expected_outputs/test_observability.json b/py/tests/regression/expected_outputs/test_observability.json index 9caa47047..cef9c1e32 100644 --- a/py/tests/regression/expected_outputs/test_observability.json +++ b/py/tests/regression/expected_outputs/test_observability.json @@ -17,7 +17,7 @@ "2f576170-c4f9-5141-a910-a0924f341de4", "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc", "57eec3df-cf68-5559-a80d-ae3fb55b9af1", - "db02076e-989a-59cd-98d5-e24e15a0bd27", + "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "d421207a-d799-5806-8d67-46b2005b15d4", "451adbbd-e24e-5c9b-80a8-f424b6c7accb", "5b1bd54f-4d70-56b9-a017-a618bc75f94c" diff --git a/py/tests/regression/expected_outputs/test_user_management.json b/py/tests/regression/expected_outputs/test_user_management.json index 8ce54e668..174311e69 100644 --- a/py/tests/regression/expected_outputs/test_user_management.json +++ b/py/tests/regression/expected_outputs/test_user_management.json @@ -104,7 +104,7 @@ "7b0f40c5-2ace-5781-ae35-ead99ddee8c5", "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", "d421207a-d799-5806-8d67-46b2005b15d4", - "db02076e-989a-59cd-98d5-e24e15a0bd27", + "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "716fea3a-826b-5b27-8e59-ffbd1a35455a", "2f576170-c4f9-5141-a910-a0924f341de4", "3e157b3a-8469-51db-90d9-52e7d896b49b" diff --git a/py/tests/regression/observed_outputs/test_document_management.json b/py/tests/regression/observed_outputs/test_document_management.json index a916b77ca..a78253d1b 100644 --- a/py/tests/regression/observed_outputs/test_document_management.json +++ b/py/tests/regression/observed_outputs/test_document_management.json @@ -2,479 +2,1098 @@ "ingest_sample_files": { "results": [ { - "message": "Ingestion task queued successfully.", - "task_id": "234d5f77-3a09-4ee4-873b-dc97db81afe8", + "message": "Ingestion task completed successfully.", "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" }, { - "message": "Ingestion task queued successfully.", - "task_id": "e361567a-cc8f-47be-914a-0e07286fa3bb", + "message": "Ingestion task completed successfully.", "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a" }, { - "message": "Ingestion task queued successfully.", - "task_id": "327685d0-ef5f-4405-a522-bdf8cfc79789", + "message": "Ingestion task completed successfully.", "document_id": "52e12576-090f-59db-91f4-6d4b2e29ae6c" }, { - "message": "Ingestion task queued successfully.", - "task_id": "965b2b3f-74aa-4a39-9c36-ab1aad27fb25", + "message": "Ingestion task completed successfully.", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b" }, { - "message": "Ingestion task queued successfully.", - "task_id": "1a467abd-31bf-4c21-b05d-dcb36822b050", + "message": "Ingestion task completed successfully.", "document_id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9" }, { - "message": "Ingestion task queued successfully.", - "task_id": "0ae9a48e-bc5c-4eea-bf86-54bf00b09f8f", + "message": "Ingestion task completed successfully.", "document_id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526" }, { - "message": "Ingestion task queued successfully.", - "task_id": "4915b7ed-e255-459b-8af0-3a423e09c881", + "message": "Ingestion task completed successfully.", "document_id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9" }, { - "message": "Ingestion task queued successfully.", - "task_id": "96248159-021b-442e-9d14-783a1e80b39f", + "message": "Ingestion task completed successfully.", "document_id": "7b0f40c5-2ace-5781-ae35-ead99ddee8c5" }, { - "message": "Ingestion task queued successfully.", - "task_id": "e886cf3f-362a-4b41-ab98-303a03a2292a", + "message": "Ingestion task completed successfully.", "document_id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c" }, { - "message": "Ingestion task queued successfully.", - "task_id": "0ae3812e-071b-4b12-ba2d-a425c9d2bacc", + "message": "Ingestion task completed successfully.", "document_id": "2f576170-c4f9-5141-a910-a0924f341de4" }, { - "message": "Ingestion task queued successfully.", - "task_id": "41c6357d-0657-44a1-a0e4-12e99d552b82", + "message": "Ingestion task completed successfully.", "document_id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc" }, { - "message": "Ingestion task queued successfully.", - "task_id": "1985e9a6-9e2c-4f0f-9d3c-99f8be6db02b", + "message": "Ingestion task completed successfully.", "document_id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1" }, { - "message": "Ingestion task queued successfully.", - "task_id": "937ea186-fa97-4a08-a13c-240de51f9e9a", - "document_id": "db02076e-989a-59cd-98d5-e24e15a0bd27" + "message": "Ingestion task completed successfully.", + "document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1" }, { - "message": "Ingestion task queued successfully.", - "task_id": "ceb2f270-595d-4bd6-bfd1-bd2925dc5b96", + "message": "Ingestion task completed successfully.", "document_id": "d421207a-d799-5806-8d67-46b2005b15d4" }, { - "message": "Ingestion task queued successfully.", - "task_id": "9c90d5e4-1513-48c3-a598-beaccd8fcf09", + "message": "Ingestion task completed successfully.", "document_id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb" }, { - "message": "Ingestion task queued successfully.", - "task_id": "d88a4951-22ec-4778-b65c-298710b8e937", + "message": "Ingestion task completed successfully.", "document_id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c" } ] }, "reingest_sample_file": { + "results": "{\"detail\":{\"message\":\"Error during ingestion: Document b4ac4dd6-5f27-596e-a55b-7cf242ca30aa was already ingested and is not in a failed state.\",\"error_type\":\"R2RException\"}}" + }, + "documents_overview": { "results": [ { - "message": "Ingestion task queued successfully.", - "task_id": "f0d71ddb-7516-4331-ad7e-c7f0205f6f66", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" - }, - { - "message": "Ingestion task queued successfully.", - "task_id": "d2c55f19-ceb4-4aad-995d-37569419b40e", - "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a" - }, - { - "message": "Ingestion task queued successfully.", - "task_id": "9e15204c-4f3a-41cc-9f34-0deff009df46", - "document_id": "52e12576-090f-59db-91f4-6d4b2e29ae6c" + "id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_3.html", + "version": "v0", + "size_in_bytes": 166556, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:01:31.874002Z", + "updated_at": "2024-09-27T17:01:31.874010Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "dadbbaf6-559d-48d4-8db4-881f33760af5", - "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b" + "id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "png", + "metadata": { + "version": "v0" + }, + "title": "screen_shot.png", + "version": "v0", + "size_in_bytes": 1055688, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:01:17.143733Z", + "updated_at": "2024-09-27T17:01:17.143741Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "0892bd5e-57a5-47f5-9a0a-8c558db7b4c1", - "document_id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9" + "id": "d421207a-d799-5806-8d67-46b2005b15d4", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "yc_companies.txt", + "version": "v0", + "size_in_bytes": 62948, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:01:15.832273Z", + "updated_at": "2024-09-27T17:01:15.832281Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "04b91d55-2b46-40a7-9ddb-da362286f6bc", - "document_id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526" + "id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "aristotle.txt", + "version": "v0", + "size_in_bytes": 97804, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:01:13.749890Z", + "updated_at": "2024-09-27T17:01:13.749901Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "142a0871-1ab2-4f9b-a9c5-b77ff10141ea", - "document_id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9" + "id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_2.html", + "version": "v0", + "size_in_bytes": 166816, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:01:13.111406Z", + "updated_at": "2024-09-27T17:01:13.111416Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "c60fa8db-33c4-4d66-8317-f4615568835d", - "document_id": "7b0f40c5-2ace-5781-ae35-ead99ddee8c5" + "id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "test.txt", + "version": "v0", + "size_in_bytes": 28, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:01:12.788282Z", + "updated_at": "2024-09-27T17:01:12.788287Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "154593d2-8498-4b92-b765-005dfa9b0de0", - "document_id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c" + "id": "2f576170-c4f9-5141-a910-a0924f341de4", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "pdf", + "metadata": { + "version": "v0" + }, + "title": "lyft_2021.pdf", + "version": "v0", + "size_in_bytes": 1920404, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:00:49.981071Z", + "updated_at": "2024-09-27T17:00:49.981086Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "e076c320-2e29-4787-92a6-f60afd2db533", - "document_id": "2f576170-c4f9-5141-a910-a0924f341de4" + "id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_5.html", + "version": "v0", + "size_in_bytes": 165040, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:00:49.315963Z", + "updated_at": "2024-09-27T17:00:49.315980Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "c7bfee83-259d-4663-9dcc-05d8def34632", - "document_id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc" + "id": "7b0f40c5-2ace-5781-ae35-ead99ddee8c5", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "pdf", + "metadata": { + "version": "v0" + }, + "title": "graphrag.pdf", + "version": "v0", + "size_in_bytes": 2287544, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:00:48.676977Z", + "updated_at": "2024-09-27T17:00:48.676985Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "96875e20-aa60-4ba9-a0c7-013bfd883e68", - "document_id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1" + "id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_4.html", + "version": "v0", + "size_in_bytes": 157484, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:00:48.182144Z", + "updated_at": "2024-09-27T17:00:48.182154Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "706ea773-4345-48cb-a5c8-b4296d7cf906", - "document_id": "db02076e-989a-59cd-98d5-e24e15a0bd27" + "id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "aristotle_v2.txt", + "version": "v0", + "size_in_bytes": 3380, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:00:47.523137Z", + "updated_at": "2024-09-27T17:00:47.523153Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "55182343-2d74-43e7-873b-50ec76f4c6cd", - "document_id": "d421207a-d799-5806-8d67-46b2005b15d4" + "id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "mp3", + "metadata": { + "version": "v0" + }, + "title": "sample2.mp3", + "version": "v0", + "size_in_bytes": 96608, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:00:46.224925Z", + "updated_at": "2024-09-27T17:00:46.224929Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "b41719da-cb6f-4667-919e-23fd0b7bf094", - "document_id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb" + "id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "pdf", + "metadata": { + "version": "v0" + }, + "title": "uber_2021.pdf", + "version": "v0", + "size_in_bytes": 2507312, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-27T17:00:10.909182Z", + "updated_at": "2024-09-27T17:00:10.909191Z", + "ingestion_attempt_number": null }, { - "message": "Ingestion task queued successfully.", - "task_id": "228230e4-4f54-413e-9655-2c9ae06c6754", - "document_id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c" - } - ] - }, - "documents_overview": { - "results": [ - { - "id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c", + "id": "52e12576-090f-59db-91f4-6d4b2e29ae6c", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", + "type": "mp3", "metadata": { "version": "v0" }, - "title": "pg_essay_3.html", + "title": "sample.mp3", "version": "v0", - "size_in_bytes": 166556, + "size_in_bytes": 162228, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:10.456776Z", - "updated_at": "2024-09-20T22:50:10.456787Z", + "created_at": "2024-09-27T17:00:08.852276Z", + "updated_at": "2024-09-27T17:00:08.852281Z", "ingestion_attempt_number": null }, { - "id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb", + "id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "png", + "type": "txt", "metadata": { "version": "v0" }, - "title": "screen_shot.png", + "title": "got.txt", "version": "v0", - "size_in_bytes": 1055688, + "size_in_bytes": 12656, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:10.445592Z", - "updated_at": "2024-09-20T22:50:10.445596Z", + "created_at": "2024-09-27T17:00:08.177884Z", + "updated_at": "2024-09-27T17:00:08.177887Z", "ingestion_attempt_number": null }, { - "id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1", + "id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "type": "html", "metadata": { "version": "v0" }, - "title": "pg_essay_2.html", + "title": "pg_essay_1.html", "version": "v0", - "size_in_bytes": 166816, + "size_in_bytes": 175340, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.506868Z", - "updated_at": "2024-09-20T22:50:09.506871Z", + "created_at": "2024-09-27T17:00:07.272659Z", + "updated_at": "2024-09-27T17:00:07.272681Z", "ingestion_attempt_number": null + } + ], + "total_entries": 16 + }, + "document_chunks_test": { + "results": [ + { + "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "A Project of One's Own", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "286b3218-517c-50bf-b8ea-1262e8ec6b42", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "June 2021A few days ago, on the way home from school, my nine year old son\ntold me he couldn't wait to get home to write more of the story he\nwas working on. This made me as happy as anything I've heard him", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "94aacd72-c343-5467-a57f-e35069b26d05", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "say \u2014 not just because he was excited about his story, but because\nhe'd discovered this way of working. Working on a project of your\nown is as different from ordinary work as skating is from walking.", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "db93e935-83e2-5930-84f1-00e60acb2b67", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "It's more fun, but also much more productive.What proportion of great work has been done by people who were\nskating in this sense? If not all of it, certainly a lot.There is something special about working on a project of your own.", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "4aa0529e-f7f3-5a0b-b70c-3d5957be6eea", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "I wouldn't say exactly that you're happier. A better word would be\nexcited, or engaged. You're happy when things are going well, but\noften they aren't. When I'm writing an essay, most of the time I'm", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "6b216db3-962d-593b-9d8a-ad096df71900", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "worried and puzzled: worried that the essay will turn out badly,\nand puzzled because I'm groping for some idea that I can't see\nclearly enough. Will I be able to pin it down with words? In the", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "6c192997-1f46-5646-9060-0c5c4fdb5b0c", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "end I usually can, if I take long enough, but I'm never sure; the\nfirst few attempts often fail.You have moments of happiness when things work out, but they don't\nlast long, because then you're on to the next problem. So why do", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "bebcc3b0-1911-58e9-b34d-9844e6c7cebb", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "it at all? Because to the kind of people who like working this way,\nnothing else feels as right. You feel as if you're an animal in its\nnatural habitat, doing what you were meant to do \u2014 not always", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "dd2b7c32-50ae-541e-82d3-c63eb38dbd63", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "happy, maybe, but awake and alive.Many kids experience the excitement of working on projects of their\nown. The hard part is making this converge with the work you do as\nan adult. And our customs make it harder. We treat \"playing\" and", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "3b446288-a384-527b-9ce0-5ae3ff4a6127", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "\"hobbies\" as qualitatively different from \"work\". It's not clear\nto a kid building a treehouse that there's a direct (though long)\nroute from that to architecture or engineering. And instead of", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "1f24eedd-a498-57df-98f2-35ff3f545136", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "pointing out the route, we conceal it, by implicitly treating the\nstuff kids do as different from real work.\n[1]Instead of telling kids that their treehouses could be on the path\nto the work they do as adults, we tell them the path goes through", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "7f5e1067-018e-5a6d-88f8-69a89977bfee", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "school. And unfortunately schoolwork tends to be very different from\nworking on projects of one's own. It's usually neither a project,\nnor one's own. So as school gets more serious, working on projects", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "d94e3ccb-f524-524b-acd1-5c3676c25901", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "of one's own is something that survives, if at all, as a thin thread\noff to the side.It's a bit sad to think of all the high school kids turning their\nbacks on building treehouses and sitting in class dutifully learning", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "41159be9-49fa-5dd6-b8ba-2f17fa152514", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "collaboration. And interestingly, it shades into collaboration in\ntwo different ways. One way to collaborate is to share a single\nproject. For example, when two mathematicians collaborate on a proof", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "ec8bf70e-7cfd-5404-9963-05de778a7027", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "about Darwin or Newton to pass some exam, when the work that made\nDarwin and Newton famous was actually closer in spirit to building\ntreehouses than studying for exams.If I had to choose between my kids getting good grades and", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "91a8d83a-d47b-5a74-a99f-af4843371342", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "working on ambitious projects of their own, I'd pick\nthe projects. And not because I'm an indulgent parent, but because\nI've been on the other end and I know which has more predictive\nvalue. When I was picking startups for Y Combinator, I didn't care", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "2cfc596a-5770-5448-b1b1-b2ac0dd411f6", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "about applicants' grades. But if they'd worked on projects of their\nown, I wanted to hear all about those.\n[2]It may be inevitable that school is the way it is. I'm not saying\nwe have to redesign it (though I'm not saying we don't), just that", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "a77ed9c4-5964-5bad-b6dc-37eed79476ce", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "we should understand what it does to our attitudes to work \u2014 that\nit steers us toward the dutiful plodding kind of work, often using\ncompetition as bait, and away from skating.There are occasionally times when schoolwork becomes a project of", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "5294e1b5-4060-52fb-9c64-849ac9f64e1e", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "one's own. Whenever I had to write a paper, that would become a\nproject of my own \u2014 except in English classes, ironically, because\nthe things one has to write in English classes are so\nbogus. And", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "208dff84-1585-5b7d-8884-22b9e7866552", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "bogus. And\nwhen I got to college and started taking CS classes, the programs\nI had to write became projects of my own. Whenever I was writing\nor programming, I was usually skating, and that has been true ever", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "38d53fb3-23a8-5acd-a380-880597165f89", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "since.So where exactly is the edge of projects of one's own? That's an\ninteresting question, partly because the answer is so complicated,\nand partly because there's so much at stake. There turn out to be", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "4d486be0-9a77-5ea7-9dca-d9e89b2ebca7", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "two senses in which work can be one's own: 1) that you're doing it\nvoluntarily, rather than merely because someone told you to, and\n2) that you're doing it by yourself.The edge of the former is quite sharp. People who care a lot about", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "09c22efb-c142-534a-9447-17494f84c9a3", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "their work are usually very sensitive to the difference between\npulling, and being pushed, and work tends to fall into one category\nor the other. But the test isn't simply whether you're told to do", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "e5d1b3d3-139f-5c23-801d-20558174d518", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "something. You can choose to do something you're told to do. Indeed,\nyou can own it far more thoroughly than the person who told you to\ndo it.For example, math homework is for most people something they're", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "af6ba0d0-c99c-5222-89ac-c5d8d6b28e7c", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "told to do. But for my father, who was a mathematician, it wasn't.\nMost of us think of the problems in a math book as a way to test\nor develop our knowledge of the material explained in each section.", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "55a6064d-e548-5026-9304-380b8f666adb", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "But to my father the problems were the part that mattered, and the\ntext was merely a sort of annotation. Whenever he got a new math\nbook it was to him like being given a puzzle: here was a new set", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "fbcce017-8d52-5f53-8285-a11289f21a36", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "of problems to solve, and he'd immediately set about solving all\nof them.The other sense of a project being one's own \u2014 working on it by\noneself \u2014 has a much softer edge. It shades gradually into", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "e8a5204a-a1a6-5497-a2ad-af07eb4bace7", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "that takes shape in the course of a conversation between them. The\nother way is when multiple people work on separate projects of their\nown that fit together like a jigsaw puzzle. For example, when one", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "bfa628c5-9800-54a4-a63e-07a08f2fefc2", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "person writes the text of a book and another does the graphic design.\n[3]These two paths into collaboration can of course be combined. But\nunder the right conditions, the excitement of working on a project", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "44b6b2da-5060-5cff-b49d-c2c07e4ab53b", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "of one's own can be preserved for quite a while before disintegrating\ninto the turbulent flow of work in a large organization. Indeed,\nthe history of successful organizations is partly the history of\ntechniques for preserving that excitement.", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "cdc83d2a-2dfb-5562-88bd-6854cfcfa560", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "techniques for preserving that excitement.\n[4]The team that made the original Macintosh were a great example of\nthis phenomenon. People like Burrell Smith and Andy Hertzfeld and\nBill Atkinson and Susan Kare were not just following orders. They", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "8393b1e4-dc5c-58a4-96e0-b11311bc5ef4", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "were not tennis balls hit by Steve Jobs, but rockets let loose by\nSteve Jobs. There was a lot of collaboration between them, but\nthey all seem to have individually felt the excitement of", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "d0a3ea65-fd9f-598e-91cb-ad60ca8cb225", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "working on a project of one's own.In Andy Hertzfeld's book on the Macintosh, he describes how they'd\ncome back into the office after dinner and work late into the night.\nPeople who've never experienced the thrill of working on a project", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "90eaa50f-e7f1-5dbe-a5b3-406f524cab69", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "they're excited about can't distinguish this kind of working long\nhours from the kind that happens in sweatshops and boiler rooms,\nbut they're at opposite ends of the spectrum. That's why it's a", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "f258c9a9-6d19-5a0f-a6a4-fe90a34e3886", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "mistake to insist dogmatically on \"work/life balance.\" Indeed, the\nmere expression \"work/life\" embodies a mistake: it assumes work and\nlife are distinct. For those to whom the word \"work\" automatically", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "0a3b3f7e-adec-5c9b-95b3-6439ecffb0bd", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "implies the dutiful plodding kind, they are. But for the skaters,\nthe relationship between work and life would be better represented\nby a dash than a slash. I wouldn't want to work on anything that I didn't", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "028bbbdb-ac34-570d-b06d-8d942d9c655d", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "want to take over my life.Of course, it's easier to achieve this level of motivation when\nyou're making something like the Macintosh. It's easy for something\nnew to feel like a project of your own. That's one of the reasons", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "420e6cc0-cd53-52a8-9603-b6341df16ca3", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "for the tendency programmers have to rewrite things that don't need\nrewriting, and to write their own versions of things that already\nexist. This sometimes alarms managers, and measured by total number", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "ca29fb7e-3f3b-5214-b1ff-8bbfe75ef8e1", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "of characters typed, it's rarely the optimal solution. But it's not\nalways driven simply by arrogance or cluelessness.\nWriting code from scratch is also much more rewarding \u2014 so much\nmore rewarding that a good programmer can end up net ahead, despite", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "7fa17848-6703-50d0-87bc-957ea785bf09", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "the shocking waste of characters. Indeed, it may be one of the\nadvantages of capitalism that it encourages such rewriting. A company\nthat needs software to do something can't use the software already", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "6806b7f4-8531-530f-a148-d4018961a842", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "written to do it at another company, and thus has to write their\nown, which often turns out better.\n[5]The natural alignment between skating and solving new problems is\none of the reasons the payoffs from startups are so high. Not only", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "d07324d2-645c-57c3-bafb-3abfde5607f6", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "is the market price of unsolved problems higher, you also get a\ndiscount on productivity when you work on them. In fact, you get a\ndouble increase in productivity: when you're doing a clean-sheet", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "7bee3ec0-8bc2-5809-9885-ad77e36c619b", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "design, it's easier to recruit skaters, and they get to spend all\ntheir time skating.Steve Jobs knew a thing or two about skaters from having watched\nSteve Wozniak. If you can find the right people, you only have to", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "36f55631-f972-5444-b7d9-df38f149c1ce", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "tell them what to do at the highest level. They'll handle the\ndetails. Indeed, they insist on it. For a project to feel like your\nown, you must have sufficient autonomy. You can't be working to\norder, or slowed down", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "eef34619-2797-5024-8873-42a011f9d1b9", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "order, or slowed down\nby bureaucracy.One way to ensure autonomy is not to have a boss at all. There are\ntwo ways to do that: to be the boss yourself, and to work on projects\noutside of work. Though they're at opposite ends of the scale", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "512b2629-40f6-5611-b39a-ded0925bba83", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "financially, startups and open source projects have a lot in common,\nincluding the fact that they're often run by skaters. And indeed,\nthere's a wormhole from one end of the scale to the other: one of\nthe best ways to discover", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "ed505044-a207-5d2e-b658-eb4356e81901", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "the best ways to discover\nstartup ideas is to work on a project\njust for fun.If your projects are the kind that make money, it's easy to work\non them. It's harder when they're not. And the hardest part, usually,", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "8cb9f991-78db-5796-9774-d3fd28e544e6", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "is morale. That's where adults have it harder than kids. Kids just\nplunge in and build their treehouse without worrying about whether\nthey're wasting their time, or how it compares to other treehouses.", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "c70d905f-608f-5d7d-bcbe-5a20f591a5e8", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "And frankly we could learn a lot from kids here. The high standards\nmost grownups have for \"real\" work do not always serve us well.The most important phase in a project of one's own is at the", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "a7a7fc21-f1da-595a-9571-7112607e1e27", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "beginning: when you go from thinking it might be cool to do x to\nactually doing x. And at that point high standards are not merely\nuseless but positively harmful. There are a few people who start", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "c1cba552-2381-5b9b-88c9-96bd58029425", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "too many new projects, but far more, I suspect, who are deterred\nby fear of failure from starting projects that would have succeeded\nif they had.But if we couldn't benefit as kids from the knowledge that our", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "d66492c5-ce35-5b44-b6ad-1783115ffc74", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "treehouses were on the path to grownup projects, we can at least\nbenefit as grownups from knowing that our projects are on a path\nthat stretches back to treehouses. Remember that careless confidence", + "metadata": { + "version": "v0", + "document_type": "html" + } + }, + { + "extraction_id": "90bde670-7caa-5f09-93ba-ab2b82b20ac3", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "you had as a kid when starting something new? That would be a\npowerful thing to recapture.If it's harder as adults to retain that kind of confidence, we at\nleast tend to be more aware of what we're doing. Kids bounce, or", + "metadata": { + "version": "v0", + "document_type": "html" + } }, { - "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", - "collection_ids": [], + "extraction_id": "08e0d5a1-fee3-5074-972d-b2cc05f05780", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", + "collection_ids": [], + "text": "are herded, from one kind of work to the next, barely realizing\nwhat's happening to them. Whereas we know more about different types\nof work and have more control over which we do. Ideally we can have", "metadata": { - "version": "v0" - }, - "title": "aristotle.txt", - "version": "v0", - "size_in_bytes": 97804, - "ingestion_status": "success", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.476022Z", - "updated_at": "2024-09-20T22:50:09.476024Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "d421207a-d799-5806-8d67-46b2005b15d4", - "collection_ids": [], + "extraction_id": "8c4ab727-e5b9-5301-ae57-b21812cfd5d1", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", + "collection_ids": [], + "text": "the best of both worlds: to be deliberate in choosing to work on\nprojects of our own, and carelessly confident in starting new ones.\nNotes[1]\n\"Hobby\" is a curious word. Now it means work that isn't real", "metadata": { - "version": "v0" - }, - "title": "yc_companies.txt", - "version": "v0", - "size_in_bytes": 62948, - "ingestion_status": "success", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.471883Z", - "updated_at": "2024-09-20T22:50:09.471886Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc", - "collection_ids": [], + "extraction_id": "2ba7e1e5-59e3-5376-b9dc-0f215279624f", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", + "collection_ids": [], + "text": "work \u2014 work that one is not to be judged by \u2014 but originally it just\nmeant an obsession in a fairly general sense (even a political\nopinion, for example) that one metaphorically rode as a child rides", "metadata": { - "version": "v0" - }, - "title": "test.txt", - "version": "v0", - "size_in_bytes": 28, - "ingestion_status": "success", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.470416Z", - "updated_at": "2024-09-20T22:50:09.470418Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "2f576170-c4f9-5141-a910-a0924f341de4", - "collection_ids": [], + "extraction_id": "37babbc6-03b8-596b-85c5-7cb11461c4f3", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "pdf", + "collection_ids": [], + "text": "a hobby-horse. It's hard to say if its recent, narrower meaning is\na change for the better or the worse. For sure there are lots of\nfalse positives \u2014 lots of projects that end up being important but", "metadata": { - "version": "v0" - }, - "title": "lyft_2021.pdf", - "version": "v0", - "size_in_bytes": 1920404, - "ingestion_status": "success", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.469252Z", - "updated_at": "2024-09-20T22:50:09.469256Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c", - "collection_ids": [], + "extraction_id": "adc03e1d-89db-5c36-bb16-318eac3cb755", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", + "collection_ids": [], + "text": "are dismissed initially as mere hobbies. But on the other hand, the\nconcept provides valuable cover for projects in the early, ugly\nduckling phase.[2]\nTiger parents, as parents so often do, are fighting the last", "metadata": { - "version": "v0" - }, - "title": "pg_essay_5.html", - "version": "v0", - "size_in_bytes": 165040, - "ingestion_status": "parsing", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.465810Z", - "updated_at": "2024-09-20T22:55:12.531882Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "7b0f40c5-2ace-5781-ae35-ead99ddee8c5", - "collection_ids": [], + "extraction_id": "32ff9dd1-fce0-5766-8c3e-0bb8a9068e0e", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "pdf", + "collection_ids": [], + "text": "war. Grades mattered more in the old days when the route to success\nwas to acquire\ncredentials\nwhile ascending some predefined ladder.\nBut it's just as well that their tactics are focused on grades. How", "metadata": { - "version": "v0" - }, - "title": "graphrag.pdf", - "version": "v0", - "size_in_bytes": 2287544, - "ingestion_status": "parsing", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.455913Z", - "updated_at": "2024-09-20T22:55:12.517199Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9", - "collection_ids": [], + "extraction_id": "9222c916-8bc4-58ef-8c58-924ed71a35d0", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", + "collection_ids": [], + "text": "awful it would be if they invaded the territory of projects, and\nthereby gave their kids a distaste for this kind of work by forcing\nthem to do it. Grades are already a grim, fake world, and aren't", "metadata": { - "version": "v0" - }, - "title": "pg_essay_4.html", - "version": "v0", - "size_in_bytes": 157484, - "ingestion_status": "parsing", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:07.036336Z", - "updated_at": "2024-09-20T22:55:12.484462Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", - "collection_ids": [], + "extraction_id": "00505280-32be-5925-b394-eeddb336ab26", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", + "collection_ids": [], + "text": "harmed much by parental interference, but working on one's own\nprojects is a more delicate, private thing that could be damaged\nvery easily.[3]\nThe complicated, gradual edge between working on one's own", "metadata": { - "version": "v0" - }, - "title": "aristotle_v2.txt", - "version": "v0", - "size_in_bytes": 3380, - "ingestion_status": "parsing", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:05.565359Z", - "updated_at": "2024-09-20T22:55:12.516690Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9", - "collection_ids": [], + "extraction_id": "bac4a7ed-732e-5034-8932-1cffa3fb6b23", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "mp3", + "collection_ids": [], + "text": "projects and collaborating with others is one reason there is so\nmuch disagreement about the idea of the \"lone genius.\" In practice\npeople collaborate (or not) in all kinds of different ways, but the", "metadata": { - "version": "v0" - }, - "title": "sample2.mp3", - "version": "v0", - "size_in_bytes": 96608, - "ingestion_status": "chunking", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:04.622762Z", - "updated_at": "2024-09-20T22:55:12.486232Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "3e157b3a-8469-51db-90d9-52e7d896b49b", - "collection_ids": [], + "extraction_id": "13168f68-b395-58ea-8015-32a52e1b577f", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "pdf", + "collection_ids": [], + "text": "idea of the lone genius is definitely not a myth. There's a core\nof truth to it that goes with a certain way of working.[4]\nCollaboration is powerful too. The optimal organization would\ncombine collaboration and ownership in such a way as to do the least", "metadata": { - "version": "v0" - }, - "title": "uber_2021.pdf", - "version": "v0", - "size_in_bytes": 2507312, - "ingestion_status": "parsing", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:04.211252Z", - "updated_at": "2024-09-20T22:55:12.509328Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "52e12576-090f-59db-91f4-6d4b2e29ae6c", - "collection_ids": [], + "extraction_id": "e879b314-b886-50fd-8503-a3ff208f1796", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "mp3", + "collection_ids": [], + "text": "damage to each. Interestingly, companies and university departments\napproach this ideal from opposite directions: companies insist on\ncollaboration, and occasionally also manage both to recruit skaters", "metadata": { - "version": "v0" - }, - "title": "sample.mp3", - "version": "v0", - "size_in_bytes": 162228, - "ingestion_status": "storing", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:02.699362Z", - "updated_at": "2024-09-20T22:55:10.516462Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "collection_ids": [], + "extraction_id": "8d705d89-4e26-5d28-ba87-68662c801efc", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", + "collection_ids": [], + "text": "and allow them to skate, and university departments insist on the\nability to do independent research (which is by custom treated as\nskating, whether it is or not), and the people they hire collaborate\nas much as they choose.[5]", "metadata": { - "version": "v0" - }, - "title": "pg_essay_1.html", - "version": "v0", - "size_in_bytes": 175340, - "ingestion_status": "storing", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:02.517969Z", - "updated_at": "2024-09-20T22:55:10.518388Z", - "ingestion_attempt_number": null + "version": "v0", + "document_type": "html" + } }, { - "id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", - "collection_ids": [], + "extraction_id": "b11d3e48-3aa9-5bfd-8912-e7151435778a", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", + "collection_ids": [], + "text": "as much as they choose.[5]\nIf a company could design its software in such a way that the\nbest newly arrived programmers always got a clean sheet, it could\nhave a kind of eternal youth. That might not be impossible. If you", "metadata": { - "version": "v0" - }, - "title": "got.txt", - "version": "v0", - "size_in_bytes": 12656, - "ingestion_status": "chunking", - "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:02.516278Z", - "updated_at": "2024-09-20T22:55:10.519709Z", - "ingestion_attempt_number": null - } - ], - "total_entries": 16 - }, - "document_chunks_test": { - "results": [ + "version": "v0", + "document_type": "html" + } + }, { - "fragment_id": "67e7ab57-eaa0-57d8-9276-da273abcdabd", - "extraction_id": "286b3218-517c-50bf-b8ea-1262e8ec6b42", + "extraction_id": "2951ae53-3cd1-5c5e-8898-09b81fe23b64", "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "text": "June 2021 A few days ago, on the way home from school, my nine year old son", + "text": "had a software backbone defining a game with sufficiently clear\nrules, individual programmers could write their own players.\nThanks to Trevor Blackwell, Paul Buchheit, Andy Hertzfeld, Jessica\nLivingston, and Peter Norvig for reading drafts of this.", "metadata": { "version": "v0", - "chunk_order": 0, - "document_type": "html", - "unstructured_filetype": "text/html", - "unstructured_languages": [ - "eng" - ], - "unstructured_text_as_html": "
June 2021 A few days ago, on the way home from school, my nine year old son
", - "partitioned_by_unstructured": true, - "unstructured_is_continuation": true + "document_type": "html" } } ], - "total_entries": 1 + "total_entries": 67 }, "update_document_test": { "results": { "message": "Update task queued successfully.", - "task_id": "ce9a5927-64b8-4431-acf0-fe12ad2230e5", "document_ids": [ - "db02076e-989a-59cd-98d5-e24e15a0bd27" + "9fbe403b-c11c-5aae-8ade-ef22980c3ad1" ] } }, @@ -493,8 +1112,8 @@ "size_in_bytes": 166556, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:10.456776Z", - "updated_at": "2024-09-20T22:55:14.529464Z", + "created_at": "2024-09-27T17:01:31.874002Z", + "updated_at": "2024-09-27T17:01:31.874010Z", "ingestion_attempt_number": null }, { @@ -510,59 +1129,60 @@ "size_in_bytes": 1055688, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:10.445592Z", - "updated_at": "2024-09-20T22:55:14.539605Z", + "created_at": "2024-09-27T17:01:17.143733Z", + "updated_at": "2024-09-27T17:01:17.143741Z", "ingestion_attempt_number": null }, { - "id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1", + "id": "d421207a-d799-5806-8d67-46b2005b15d4", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", + "type": "txt", "metadata": { "version": "v0" }, - "title": "pg_essay_2.html", + "title": "yc_companies.txt", "version": "v0", - "size_in_bytes": 166816, + "size_in_bytes": 62948, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.506868Z", - "updated_at": "2024-09-20T22:55:14.533120Z", + "created_at": "2024-09-27T17:01:15.832273Z", + "updated_at": "2024-09-27T17:01:15.832281Z", "ingestion_attempt_number": null }, { - "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", + "id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "type": "txt", "metadata": { - "version": "v0" + "title": "aristotle_v2.txt", + "version": "v1" }, - "title": "aristotle.txt", - "version": "v0", - "size_in_bytes": 97804, + "title": "aristotle_v2.txt", + "version": "v1", + "size_in_bytes": 2534, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.476022Z", - "updated_at": "2024-09-20T22:55:14.531133Z", + "created_at": "2024-09-27T17:01:13.749890Z", + "updated_at": "2024-09-27T17:06:34.291766Z", "ingestion_attempt_number": null }, { - "id": "d421207a-d799-5806-8d67-46b2005b15d4", + "id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", + "type": "html", "metadata": { "version": "v0" }, - "title": "yc_companies.txt", + "title": "pg_essay_2.html", "version": "v0", - "size_in_bytes": 62948, + "size_in_bytes": 166816, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.471883Z", - "updated_at": "2024-09-20T22:55:14.531772Z", + "created_at": "2024-09-27T17:01:13.111406Z", + "updated_at": "2024-09-27T17:01:13.111416Z", "ingestion_attempt_number": null }, { @@ -578,8 +1198,8 @@ "size_in_bytes": 28, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.470416Z", - "updated_at": "2024-09-20T22:55:14.536782Z", + "created_at": "2024-09-27T17:01:12.788282Z", + "updated_at": "2024-09-27T17:01:12.788287Z", "ingestion_attempt_number": null }, { @@ -593,10 +1213,10 @@ "title": "lyft_2021.pdf", "version": "v0", "size_in_bytes": 1920404, - "ingestion_status": "parsing", + "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.469252Z", - "updated_at": "2024-09-20T22:55:14.569269Z", + "created_at": "2024-09-27T17:00:49.981071Z", + "updated_at": "2024-09-27T17:00:49.981086Z", "ingestion_attempt_number": null }, { @@ -612,8 +1232,8 @@ "size_in_bytes": 165040, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.465810Z", - "updated_at": "2024-09-20T22:55:12.531882Z", + "created_at": "2024-09-27T17:00:49.315963Z", + "updated_at": "2024-09-27T17:00:49.315980Z", "ingestion_attempt_number": null }, { @@ -629,8 +1249,8 @@ "size_in_bytes": 2287544, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.455913Z", - "updated_at": "2024-09-20T22:55:12.517199Z", + "created_at": "2024-09-27T17:00:48.676977Z", + "updated_at": "2024-09-27T17:00:48.676985Z", "ingestion_attempt_number": null }, { @@ -646,8 +1266,8 @@ "size_in_bytes": 157484, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:07.036336Z", - "updated_at": "2024-09-20T22:55:12.484462Z", + "created_at": "2024-09-27T17:00:48.182144Z", + "updated_at": "2024-09-27T17:00:48.182154Z", "ingestion_attempt_number": null }, { @@ -663,8 +1283,8 @@ "size_in_bytes": 3380, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:05.565359Z", - "updated_at": "2024-09-20T22:55:12.516690Z", + "created_at": "2024-09-27T17:00:47.523137Z", + "updated_at": "2024-09-27T17:00:47.523153Z", "ingestion_attempt_number": null }, { @@ -680,8 +1300,8 @@ "size_in_bytes": 96608, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:04.622762Z", - "updated_at": "2024-09-20T22:55:12.486232Z", + "created_at": "2024-09-27T17:00:46.224925Z", + "updated_at": "2024-09-27T17:00:46.224929Z", "ingestion_attempt_number": null }, { @@ -695,10 +1315,10 @@ "title": "uber_2021.pdf", "version": "v0", "size_in_bytes": 2507312, - "ingestion_status": "parsing", + "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:04.211252Z", - "updated_at": "2024-09-20T22:55:12.509328Z", + "created_at": "2024-09-27T17:00:10.909182Z", + "updated_at": "2024-09-27T17:00:10.909191Z", "ingestion_attempt_number": null }, { @@ -714,42 +1334,42 @@ "size_in_bytes": 162228, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:02.699362Z", - "updated_at": "2024-09-20T22:55:10.516462Z", + "created_at": "2024-09-27T17:00:08.852276Z", + "updated_at": "2024-09-27T17:00:08.852281Z", "ingestion_attempt_number": null }, { - "id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", + "type": "txt", "metadata": { "version": "v0" }, - "title": "pg_essay_1.html", + "title": "got.txt", "version": "v0", - "size_in_bytes": 175340, + "size_in_bytes": 12656, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:02.517969Z", - "updated_at": "2024-09-20T22:55:10.518388Z", + "created_at": "2024-09-27T17:00:08.177884Z", + "updated_at": "2024-09-27T17:00:08.177887Z", "ingestion_attempt_number": null }, { - "id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", + "type": "html", "metadata": { "version": "v0" }, - "title": "got.txt", + "title": "pg_essay_1.html", "version": "v0", - "size_in_bytes": 12656, + "size_in_bytes": 175340, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:02.516278Z", - "updated_at": "2024-09-20T22:55:10.519709Z", + "created_at": "2024-09-27T17:00:07.272659Z", + "updated_at": "2024-09-27T17:00:07.272681Z", "ingestion_attempt_number": null } ], @@ -773,8 +1393,8 @@ "size_in_bytes": 166556, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:10.456776Z", - "updated_at": "2024-09-20T22:55:14.529464Z", + "created_at": "2024-09-27T17:01:31.874002Z", + "updated_at": "2024-09-27T17:01:31.874010Z", "ingestion_attempt_number": null }, { @@ -790,59 +1410,60 @@ "size_in_bytes": 1055688, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:10.445592Z", - "updated_at": "2024-09-20T22:55:14.539605Z", + "created_at": "2024-09-27T17:01:17.143733Z", + "updated_at": "2024-09-27T17:01:17.143741Z", "ingestion_attempt_number": null }, { - "id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1", + "id": "d421207a-d799-5806-8d67-46b2005b15d4", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", + "type": "txt", "metadata": { "version": "v0" }, - "title": "pg_essay_2.html", + "title": "yc_companies.txt", "version": "v0", - "size_in_bytes": 166816, + "size_in_bytes": 62948, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.506868Z", - "updated_at": "2024-09-20T22:55:14.533120Z", + "created_at": "2024-09-27T17:01:15.832273Z", + "updated_at": "2024-09-27T17:01:15.832281Z", "ingestion_attempt_number": null }, { - "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", + "id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "type": "txt", "metadata": { - "version": "v0" + "title": "aristotle_v2.txt", + "version": "v1" }, - "title": "aristotle.txt", - "version": "v0", - "size_in_bytes": 97804, + "title": "aristotle_v2.txt", + "version": "v1", + "size_in_bytes": 2534, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.476022Z", - "updated_at": "2024-09-20T22:55:14.531133Z", + "created_at": "2024-09-27T17:01:13.749890Z", + "updated_at": "2024-09-27T17:06:34.291766Z", "ingestion_attempt_number": null }, { - "id": "d421207a-d799-5806-8d67-46b2005b15d4", + "id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1", "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", + "type": "html", "metadata": { "version": "v0" }, - "title": "yc_companies.txt", + "title": "pg_essay_2.html", "version": "v0", - "size_in_bytes": 62948, + "size_in_bytes": 166816, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.471883Z", - "updated_at": "2024-09-20T22:55:14.531772Z", + "created_at": "2024-09-27T17:01:13.111406Z", + "updated_at": "2024-09-27T17:01:13.111416Z", "ingestion_attempt_number": null }, { @@ -858,8 +1479,8 @@ "size_in_bytes": 28, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.470416Z", - "updated_at": "2024-09-20T22:55:14.536782Z", + "created_at": "2024-09-27T17:01:12.788282Z", + "updated_at": "2024-09-27T17:01:12.788287Z", "ingestion_attempt_number": null }, { @@ -873,10 +1494,10 @@ "title": "lyft_2021.pdf", "version": "v0", "size_in_bytes": 1920404, - "ingestion_status": "parsing", + "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.469252Z", - "updated_at": "2024-09-20T22:55:14.569269Z", + "created_at": "2024-09-27T17:00:49.981071Z", + "updated_at": "2024-09-27T17:00:49.981086Z", "ingestion_attempt_number": null }, { @@ -892,8 +1513,8 @@ "size_in_bytes": 165040, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.465810Z", - "updated_at": "2024-09-20T22:55:12.531882Z", + "created_at": "2024-09-27T17:00:49.315963Z", + "updated_at": "2024-09-27T17:00:49.315980Z", "ingestion_attempt_number": null }, { @@ -909,8 +1530,8 @@ "size_in_bytes": 2287544, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:09.455913Z", - "updated_at": "2024-09-20T22:55:12.517199Z", + "created_at": "2024-09-27T17:00:48.676977Z", + "updated_at": "2024-09-27T17:00:48.676985Z", "ingestion_attempt_number": null }, { @@ -926,8 +1547,8 @@ "size_in_bytes": 157484, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:07.036336Z", - "updated_at": "2024-09-20T22:55:12.484462Z", + "created_at": "2024-09-27T17:00:48.182144Z", + "updated_at": "2024-09-27T17:00:48.182154Z", "ingestion_attempt_number": null }, { @@ -943,8 +1564,8 @@ "size_in_bytes": 3380, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:05.565359Z", - "updated_at": "2024-09-20T22:55:12.516690Z", + "created_at": "2024-09-27T17:00:47.523137Z", + "updated_at": "2024-09-27T17:00:47.523153Z", "ingestion_attempt_number": null }, { @@ -960,8 +1581,8 @@ "size_in_bytes": 96608, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:04.622762Z", - "updated_at": "2024-09-20T22:55:12.486232Z", + "created_at": "2024-09-27T17:00:46.224925Z", + "updated_at": "2024-09-27T17:00:46.224929Z", "ingestion_attempt_number": null }, { @@ -975,10 +1596,10 @@ "title": "uber_2021.pdf", "version": "v0", "size_in_bytes": 2507312, - "ingestion_status": "parsing", + "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:04.211252Z", - "updated_at": "2024-09-20T22:55:12.509328Z", + "created_at": "2024-09-27T17:00:10.909182Z", + "updated_at": "2024-09-27T17:00:10.909191Z", "ingestion_attempt_number": null }, { @@ -994,8 +1615,8 @@ "size_in_bytes": 162228, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:02.699362Z", - "updated_at": "2024-09-20T22:55:10.516462Z", + "created_at": "2024-09-27T17:00:08.852276Z", + "updated_at": "2024-09-27T17:00:08.852281Z", "ingestion_attempt_number": null }, { @@ -1011,8 +1632,8 @@ "size_in_bytes": 12656, "ingestion_status": "success", "restructuring_status": "pending", - "created_at": "2024-09-20T22:50:02.516278Z", - "updated_at": "2024-09-20T22:55:10.519709Z", + "created_at": "2024-09-27T17:00:08.177884Z", + "updated_at": "2024-09-27T17:00:08.177887Z", "ingestion_attempt_number": null } ], diff --git a/py/tests/regression/observed_outputs/test_observability.json b/py/tests/regression/observed_outputs/test_observability.json index 458eeb497..e1cd0032a 100644 --- a/py/tests/regression/observed_outputs/test_observability.json +++ b/py/tests/regression/observed_outputs/test_observability.json @@ -24,7 +24,7 @@ "57eec3df-cf68-5559-a80d-ae3fb55b9af1", "01d514a4-5cb1-5c86-be8c-0c5c312a02c9", "d421207a-d799-5806-8d67-46b2005b15d4", - "db02076e-989a-59cd-98d5-e24e15a0bd27", + "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "52e12576-090f-59db-91f4-6d4b2e29ae6c", "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c", "2f576170-c4f9-5141-a910-a0924f341de4" diff --git a/py/tests/regression/observed_outputs/test_retrieval.json b/py/tests/regression/observed_outputs/test_retrieval.json index 8ea8e66f9..6cbf4eefd 100644 --- a/py/tests/regression/observed_outputs/test_retrieval.json +++ b/py/tests/regression/observed_outputs/test_retrieval.json @@ -3,212 +3,132 @@ "results": { "vector_search_results": [ { - "fragment_id": "392ab9b4-c4bc-5894-8edf-332fcd9245bb", - "extraction_id": "cd49a88d-92e5-59f1-8331-3d3d3ecb7f3a", + "extraction_id": "eb0c96ea-1f7c-548c-88ec-a4b25d7dd3c6", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.23822619091116293, - "text": "Finance Leases\n\n2022 2023 2024 2025 2026 Thereafter Total undiscounted lease payments Less: imputed interest\n\n$\n\n280 $ 312 264 214 198 2,067 3,335 (1,506) 1,829 $\n\n140 60 34 9 \u2014 1 244 (10) 234\n\nTotal lease liabilities\n\n$\n\nAs of December 31, 2021, we had additional operating leases and finance leases, primarily for corporate offices and servers, that have not yet commenced of $421 million and $19 million, respectively. These operating and finance leases will commence between fiscal year 2022 and fiscal year 2023 with lease terms of 2 years to 13 years.\n\nMission Bay 1 & 2\n\nIn 2015, we entered into a joint venture (\u201cJV\u201d) agreement with a real estate developer (\u201cJV Partner\u201d) to develop land (\u201cthe Land\u201d) in San Francisco to construct our new headquarters (the \u201cHeadquarters\u201d). The Headquarters consists of two adjacent office buildings totaling approximately 423,000 rentable square feet. In connection with the JV arrangement, we acquired a 49% interest in the JV, the principal asset of which was the Land.", + "score": 0.33923701410073015, + "text": "Francs, any day except for (i) a Saturday, (ii) a Sunday or", "metadata": { "version": "v0", - "chunk_order": 759, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 109, - "partitioned_by_unstructured": true, "associated_query": "What is the capital of France?" } }, { - "fragment_id": "fd8508db-c444-5ed9-afce-67340354fb1e", - "extraction_id": "7f16fa20-9bc1-5841-ba74-95cdbb27e9fb", - "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", + "extraction_id": "42985330-c438-5ee7-b54d-49128e8fb77d", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.21572618390761744, - "text": "(c)\n\nCommuting and Corporate Housing Expenses. During your Employment, the Company shall cover the cost of your reasonable and substantiated expenses for travel between your primary residence and the Company\u2019s headquarters in San Francisco and corporate housing in the San Francisco Bay Area, up to a pre-tax maximum of $200,000 per year in the aggregate. All expense reimbursements shall be made in accordance with the Company\u2019s expense reimbursement policy.\n\n(d)", + "score": 0.3349560397915816, + "text": "Francs and any other foreign currency reasonably requested by", "metadata": { "version": "v0", - "chunk_order": 971, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 170, - "partitioned_by_unstructured": true, "associated_query": "What is the capital of France?" } }, { - "fragment_id": "76d010b2-1498-531a-bf89-66aa17331203", - "extraction_id": "f31920df-e1db-5a2c-9b8f-9c7b845a21c1", - "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "extraction_id": "2ea5816a-bd3c-5216-9ecb-f366b1d3fb07", + "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.1947217732667923, - "text": "45-2647441 (I.R.S. Employer Identification No.)\n\n1515 3rd Street San Francisco, California 94158 (Address of principal executive offices, including zip code) (415) 612-8582 (Registrant\u2019s telephone number, including area code) ____________________________________________\n\nSecurities registered pursuant to Section 12(b) of the Act:\n\nTitle of each class Common Stock, par value $0.00001 per share", + "score": 0.254553869638078, + "text": "Francisco and corporate housing in the San Francisco Bay Area, up to a pre-tax maximum of $200,000 per year in theaggregate. All expense reimbursements shall be made in accordance with the Companys expense reimbursement policy.\n(e)", "metadata": { "version": "v0", - "chunk_order": 1, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 1, - "partitioned_by_unstructured": true, "associated_query": "What is the capital of France?" } }, { - "fragment_id": "c96ee688-6e36-5abb-b066-d87779be1cf6", - "extraction_id": "eb08b70d-2e82-5de3-90ee-98537a761ea8", - "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", + "extraction_id": "3c2ddada-4b84-5b90-9bb0-fe30a7875266", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.19264961779117584, - "text": "Item 2. Properties.\n\nOur corporate headquarters are located in San Francisco, California, and consist of approximately 420,000 square feet under lease agreements through May 31,\n\n2030. We maintain additional offices in multiple locations in the U.S. and internationally in Montreal, Canada, Munich, Germany and Minsk, Belarus.\n\nWe lease all of our facilities and do not own any real property. We believe our facilities are adequate and suitable for our current needs and that, should it be\n\nneeded, suitable additional or alternative space will be available to accommodate our operations.\n\n53\n\nItem 3. Legal Proceedings.\n\nSee discussion under the heading Legal Proceedings in Note 9 to the consolidated financial statements included in Part II, Item 8 of this report.\n\nItem 4. Mine Safety Disclosures.\n\nNot applicable.\n\n54\n\nPART II\n\nItem 5. Market for Registrant\u2019s Common Equity, Related Stockholder Matters and Issuer Purchases of Equity Securities.", + "score": 0.25307297706604004, + "text": "British Pounds , Euros, Australian Dollars, Canadian Dollars, Hong Kong Dollars , Japanese Yen", "metadata": { "version": "v0", - "chunk_order": 434, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 53, - "partitioned_by_unstructured": true, "associated_query": "What is the capital of France?" } }, { - "fragment_id": "cfc07310-2da0-5cfb-ba9b-b3c669084882", - "extraction_id": "376938c5-338f-5c56-b599-efcf94ace67a", + "extraction_id": "4851d734-5431-5bb8-81d7-fb98f7ca10cf", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.18162215163575735, - "text": "Property and equipment, net\n\n$\n\nWe capitalized $76 million and $55 million in internal-use software costs during the years ended December 31, 2020 and 2021, respectively, which is included in property and equipment, net on the consolidated balance sheets. Amortization of capitalized software development costs was $22 million, $55 million, and $69 million for the years ended December 31, 2019, 2020 and 2021, respectively.\n\nAmounts in construction in progress represent buildings, leasehold improvements, assets under construction, and other assets not placed in service.", + "score": 0.2385799139738083, + "text": "capital, capital expenditures, expansion plans, and other investments, which may in turn limit our ability to implement our business strategy, heighten ourvulnerability", "metadata": { "version": "v0", - "chunk_order": 754, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 107, - "partitioned_by_unstructured": true, "associated_query": "What is the capital of France?" } }, { - "fragment_id": "2654f646-222c-50af-bd1c-c7311e6a9dc9", - "extraction_id": "b25d210b-1b58-578a-b038-34f76d77f377", + "extraction_id": "6267e755-622a-551a-ae68-aba8960110aa", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.18122072927828292, - "text": "\u201cCanadian BA Rate Borrowing\u201d refers to a Borrowing bearing interest at a rate determined by reference to the Canadian BA Rate.\n\n10\n\n\u201cCanadian BA Rate Loan\u201d refers to a Loan bearing interest at a rate determined by reference to the Canadian BA Rate.\n\n\u201cCanadian Dollars\u201d means the lawful currency of Canada.\n\n\u201cCapital Lease Obligations\u201d of any Person means the obligations of such Person to pay rent or other amounts under any lease of (or", + "score": 0.23678594906413897, + "text": "executive, legislative, judicial, taxing, regulatory or administrative powers or functions of or pertaining to government (including any supra-\nnational bodies such as the European Union or the European Central Bank).", "metadata": { "version": "v0", - "chunk_order": 1085, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 182, - "partitioned_by_unstructured": true, "associated_query": "What is the capital of France?" } }, { - "fragment_id": "309f729d-78eb-569a-837c-50367c20e898", - "extraction_id": "dfc368a6-efaf-5f4d-a20b-0fd6059a5f35", - "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", + "extraction_id": "90bff76d-ba56-57ed-87f9-e7d701b9980d", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.1806427240371704, - "text": "principal payments on finance lease obligations for $35.5 million.\n\nCash provided by financing activities was $512.6 million for the year ended December 31, 2020, which primarily consisted of proceeds from issuance of our 2025\n\nNotes of $734.1 million offset by the purchase of the Capped Calls for $132.7 million.\n\nLiquidity and Capital Resources", + "score": 0.23538486659526825, + "text": "or , Canadian Dollars, Japanese Yen, British Pounds and Swiss Francs", "metadata": { "version": "v0", - "chunk_order": 531, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 71, - "partitioned_by_unstructured": true, "associated_query": "What is the capital of France?" } }, { - "fragment_id": "a0a98796-ca8a-5c8b-9063-a15064a914d4", - "extraction_id": "411a2659-a240-5c20-b11a-305b2b66987d", + "extraction_id": "7c1d2cdf-bf13-5818-b6d8-539eb5e19d5d", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.16736936076696907, - "text": "Depreciation and Amortization\n\nDepreciation and amortization expenses primarily consist of depreciation on buildings, site improvements, computer and network equipment, software, leasehold improvements, furniture and fixtures, and amortization of intangible assets. Depreciation includes expenses associated with buildings, site improvements, computer and network equipment, leased vehicles, and furniture, fixtures, as well as leasehold improvements. Amortization includes expenses associated with our capitalized internal-use software and acquired intangible assets.\n\nAs our business recovers from the impacts of COVID-19, we would anticipate depreciation and amortization expenses to increase as we continue to build out\n\nour network infrastructure and building locations.", + "score": 0.22707748413085938, + "text": "RFR means, for any RFR Loan denominated in (a) British Pounds, SONIA and (b) Swiss Francs, SARON. \nRFR Borrowing means, as to any Borrowing, the RFR Loans comprising such Borrowing.", "metadata": { "version": "v0", - "chunk_order": 457, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 55, - "partitioned_by_unstructured": true, "associated_query": "What is the capital of France?" } }, { - "fragment_id": "b31fdb40-3d2e-54ba-8064-321fd1420f38", - "extraction_id": "ff6c949e-ff32-5c38-983c-eba10f3a9609", - "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", + "extraction_id": "1180d5aa-8038-593b-bf9e-037f4432d6ce", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.1666801671171001, - "text": "Our future capital requirements will depend on many factors, including, but not limited to our growth, our ability to maintain profitability on an Adjusted EBITDA basis, our ability to attract and retain drivers and riders on our platform, the continuing market acceptance of our offerings, the timing and extent of spending to support our efforts to develop our platform, actual insurance payments for which we have made reserves, measures we take in response to the COVID-19 pandemic, our ability to maintain demand for and confidence in the safety of our platform during and following the COVID-19 pandemic, and the expansion of sales and marketing activities. As noted above, we expect to see continued suppression of demand for our platform and the resultant negative impacts on revenue for so long as the travel restrictions and other social distancing measures in response to COVID-19 remain in place. Further, we may in the future enter into arrangements to acquire or invest in businesses, products, services and", + "score": 0.22615296996551615, + "text": "RFR Loan means a Loan that bears interest at a rate based on the Adjusted Daily Simple RFR. For the avoidance of doubt, only Loans denominated in British Pounds and Swiss Francs shall bear interest at a rate based on the Adjusted Daily Simple RFR.", "metadata": { "version": "v0", - "chunk_order": 538, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 72, - "partitioned_by_unstructured": true, "associated_query": "What is the capital of France?" } }, { - "fragment_id": "ff0eb83c-ea67-51e2-8201-4211aa9d591a", - "extraction_id": "69e31e99-75df-5850-b71b-2b973d4cf244", - "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "extraction_id": "3c4406dc-4e9f-5bb9-bec3-8bdfe05c09d1", + "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.1657480697932172, - "text": "(1)\n\n(2)\n\nFor the year ended December 31, 2019, the increase in the valuation allowance was primarily attributable to a step-up in the tax basis of intellectual property rights, an increase in U.S. federal, state and Netherlands deferred tax assets resulting from the loss from operations, and tax credits generated during the year.\n\nFor the year ended December 31, 2020, the increase in the valuation allowance was primarily attributable to an increase in tax rate in Netherlands, an increase\n\nin U.S. federal, state and Netherlands deferred tax assets resulting from the loss from operations, and tax credits generated during the year.", + "score": 0.2248732879682359, + "text": "None.\nItem 2. Properties.\nOur\n corporate headquarters are located in San Francisco, California, and consist of approximately 420,000 square feet under lease agreements through May 31,2030. We maintain addition", "metadata": { "version": "v0", - "chunk_order": 969, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 148, - "partitioned_by_unstructured": true, "associated_query": "What is the capital of France?" } } @@ -219,14 +139,14 @@ "basic_rag": { "results": { "completion": { - "id": "chatcmpl-A9gpFrelYOJObG7VkyoUz23HCc0r2", + "id": "chatcmpl-AC8ifwyo0TwipK2PuidzP204Mbm11", "choices": [ { "finish_reason": "stop", "index": 0, "logprobs": null, "message": { - "content": "Uber's profit in 2020 was actually a net loss. The net loss attributable to Uber Technologies, Inc. for the year ended December 31, 2020, was $6.8 billion [3].", + "content": "Uber's profit in 2020 was not a profit but a net loss. Specifically, the net loss attributable to Uber Technologies, Inc. in 2020 was $6,768 million [3], [4].", "refusal": null, "role": "assistant", "function_call": null, @@ -234,15 +154,15 @@ } } ], - "created": 1726872937, + "created": 1727456817, "model": "gpt-4o-2024-05-13", "object": "chat.completion", "service_tier": null, - "system_fingerprint": "fp_3537616b13", + "system_fingerprint": "fp_057232b607", "usage": { "completion_tokens": 44, - "prompt_tokens": 2320, - "total_tokens": 2364, + "prompt_tokens": 712, + "total_tokens": 756, "completion_tokens_details": { "reasoning_tokens": 0 } @@ -251,212 +171,132 @@ "search_results": { "vector_search_results": [ { - "fragment_id": "07aa09c5-81a8-5a48-953a-532064a446f8", - "extraction_id": "d3060c36-85dc-5e8d-b8ff-cfe4c1753ccc", + "extraction_id": "4e3d65e9-bf2d-581e-a936-63c7b6b62cde", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.7445549521115464, - "text": "Revenue was $17.5 billion, or up 57% year-over-year, reflecting the overall growth in our Delivery business and an increase in Freight revenue attributable to the acquisition of Transplace in the fourth quarter of 2021 as well as growth in the number of shippers and carriers on the network combined with an increase in volumes with our top shippers.\n\nNet loss attributable to Uber Technologies, Inc. was $496 million, a 93% improvement year-over-year, driven by a $1.6 billion pre-tax gain on the sale of our ATG Business to Aurora, a $1.6 billion pre-tax net benefit relating to Uber\u2019s equity investments, as well as reductions in our fixed cost structure and increased variable cost efficiencies. Net loss attributable to Uber Technologies, Inc. also included $1.2 billion of stock-based compensation expense.", + "score": 0.7555597570204893, + "text": "Net\n loss attributable to Uber Technologies, Inc. was $496 million, a 93% improvement year-over-year, driven by a $1.6 billion pre-tax gain on the sale of ourATG", "metadata": { "version": "v0", - "chunk_order": 445, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 53, - "partitioned_by_unstructured": true, "associated_query": "What was Uber's profit in 2020?" } }, { - "fragment_id": "2044e305-c042-5f0d-b05d-a2b97181f7a8", - "extraction_id": "4329441a-5faf-5e9d-801f-ebd753ee1bd3", + "extraction_id": "05c6bc4e-6a70-50c6-b3a4-cfd07530d58f", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.7071289420127869, - "text": "Total costs and expenses Loss from operations\n\nInterest expense Other income (expense), net Loss before income taxes and loss from equity method investments Provision for (benefit from) income taxes Loss from equity method investments Net loss including non-controlling interests\n\nLess: net loss attributable to non-controlling interests, net of tax\n\n100 %\n\n46 % 16 % 32 % 20 % 24 % 5 % 144 % (44)% (4)% (15)% (62)% (2)% \u2014 % (61)% \u2014 % (61)%\n\n100 %\n\n54 % 11 % 27 % 12 % 13 % 5 % 122 % (22)% (3)% 19 % (6)% (3)% \u2014 % (3)% \u2014 % (3)%\n\nNet loss attributable to Uber Technologies, Inc.\n\n(1)\n\nTotals of percentage of revenues may not foot due to rounding.\n\nComparison of the Years Ended December 31, 2020 and 2021\n\nRevenue\n\nYear Ended December 31,\n\n(In millions, except percentages)\n\n2020\n\n2021\n\n2020 to 2021 % Change\n\nRevenue\n\n$\n\n11,139 $\n\n17,455\n\n57 %", + "score": 0.6704964041709948, + "text": "ciencies. Net loss attributable to Uber Technologies, Inc. also included $1.2 billion of stock-based compensation expense.Adjusted", "metadata": { "version": "v0", - "chunk_order": 463, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 57, - "partitioned_by_unstructured": true, "associated_query": "What was Uber's profit in 2020?" } }, { - "fragment_id": "3840834b-7c74-5417-9252-9080e609fb2f", - "extraction_id": "cf934fe1-926d-5525-a230-30946961cf28", + "extraction_id": "6f160bdf-b5af-5725-9084-5285e8ac6f14", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.6809690201571295, - "text": "Year Ended December 31, 2020\n\nRevenue Costs and expenses Cost of revenue, exclusive of depreciation and amortization shown separately below Operations and support Sales and marketing Research and development General and administrative Depreciation and amortization\n\nTotal costs and expenses Loss from operations\n\nInterest expense Other income (expense), net Loss before income taxes and loss from equity method investments Provision for (benefit from) income taxes Loss from equity method investments Net loss including non-controlling interests\n\nLess: net loss attributable to non-controlling interests, net of tax\n\n$\n\n13,000 $\n\n6,061 2,302 4,626 4,836 3,299 472 21,596 (8,596) (559) 722 (8,433) 45 (34) (8,512) (6) (8,506) $\n\n11,139 $\n\n5,154 1,819 3,583 2,205 2,666 575 16,002 (4,863) (458) (1,625) (6,946) (192) (34) (6,788) (20) (6,768) $\n\nNet loss attributable to Uber Technologies, Inc. Net loss per share attributable to Uber Technologies, Inc. common stockholders:\n\n$\n\nBasic\n\n$\n\n(6.81) $\n\n(3.86) $", + "score": 0.663426180873742, + "text": "ments(34) (37) Net loss including non-controlling interests\n(6,788) (570) Less: net loss attributab\nle to non-controlling interests, net of tax(20) (74) Net loss attributable to Uber Technologies, Inc.\n$ (6,768) $ (496) 54", "metadata": { "version": "v0", - "chunk_order": 574, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 77, - "partitioned_by_unstructured": true, "associated_query": "What was Uber's profit in 2020?" } }, { - "fragment_id": "e2a37b27-0644-59e4-9746-37d48592a299", - "extraction_id": "6b86ac2f-ce33-5126-83e6-a8731ea677c8", + "extraction_id": "6492ad57-81b5-509b-9b29-257222d2ab27", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.6805637085605776, - "text": "Less: net loss attributable to non-controlling interests, net of tax\n\n$\n\n11,139 $\n\n5,154 1,819 3,583 2,205 2,666 575 16,002 (4,863) (458) (1,625) (6,946) (192) (34) (6,788) (20) (6,768) $\n\n17,455\n\n9,351 1,877 4,789 2,054 2,316 902 21,289 (3,834) (483) 3,292 (1,025) (492) (37) (570) (74) (496)\n\nNet loss attributable to Uber Technologies, Inc.\n\n$\n\n54\n\nThe following table sets forth the components of our consolidated statements of operations for each of the periods presented as a percentage of revenue\n\n(1)\n\n:\n\nYear Ended December 31, 2021 2020\n\nRevenue Costs and expenses Cost of revenue, exclusive of depreciation and amortization shown separately below Operations and support Sales and marketing Research and development General and administrative Depreciation and amortization", + "score": 0.6612654530247788, + "text": "(8,512) (6,788) (570) Less: net loss attributab\nle to non-controlling interests, net of tax(6) (20) (74) Net loss attributable to Uber Technologies, Inc.\n$ (8,506) $ (6,768) $ (496) Net loss per share attributable to Uber Technologies, Inc. co", "metadata": { "version": "v0", - "chunk_order": 462, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 56, - "partitioned_by_unstructured": true, "associated_query": "What was Uber's profit in 2020?" } }, { - "fragment_id": "de30c3c9-cdfd-5872-bdaf-4859bef5c3a8", - "extraction_id": "33bc6d8b-9fdc-5df7-be1d-fa7de176a0b5", + "extraction_id": "47051b83-0003-57ac-af18-c14632f085ce", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.6537506580352783, - "text": "The Uber Service activities are performed to satisfy our sole performance obligation in the transaction, which is to connect Drivers and Merchants with end-\n\nusers to facilitate the completion of a successful transaction.\n\nIn 2020, we began charging Mobility end-users a fee to use the platform in certain markets. In these transactions, in addition to a performance obligation to Drivers, we also have a performance obligation to end-users, which is to connect end-users to Drivers in the marketplace. We recognize revenue when a trip is complete. We present revenue on a net basis for these transactions, as we do not control the service provided by Drivers to end-users. For the years ended December 31, 2020 and 2021, we recognized total revenue of $323 million and $336 million, respectively, associated with these fees charged to end-users.", + "score": 0.6534081721172037, + "text": "$ 722 $ (1,625) $ 3,292 During\n the year ended December 31, 2020, gain on business divestitures, net represented a $154 million gain on the sale of our Uber Eats India operationsto Zomato", "metadata": { "version": "v0", - "chunk_order": 642, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 90, - "partitioned_by_unstructured": true, "associated_query": "What was Uber's profit in 2020?" } }, { - "fragment_id": "4b6ee3eb-daca-5930-bafe-946cad56cdcc", - "extraction_id": "eb2bc121-0b00-5f70-8eb6-549e1fb1ed72", + "extraction_id": "f72d0e3f-554f-5ff6-b4a0-ff785eda07b5", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.6387766599655151, - "text": "Other income (expense), net\n\n$\n\nDuring the year ended December 31, 2020, gain on business divestitures, net represented a $154 million gain on the sale of our Uber Eats India operations to Zomato recognized in the first quarter of 2020 and a $77 million gain on the sale of our European Freight Business to sennder GmbH (\u201cSennder\u201d) recognized in the fourth quarter of 2020, partially offset by a $27 million loss on the sale of our JUMP operations to Lime recognized in the second quarter of 2020.\n\n(1)\n\nDuring the year ended December 31, 2021, gain on business divestitures, net represented a $1.6 billion gain on the sale of our ATG Business to Aurora", + "score": 0.651171585847642, + "text": "The following table presents a reconciliation of net loss attributable to Uber Technologies, Inc., the most directly comparable GAAP financial measure, toAdjusted EBITDA for each of the periods ind\nicated:Year Ended December 31,\n(In millions)", "metadata": { "version": "v0", - "chunk_order": 799, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 118, - "partitioned_by_unstructured": true, "associated_query": "What was Uber's profit in 2020?" } }, { - "fragment_id": "af5f93d3-8b2f-5e71-a358-0dd56c2f68ac", - "extraction_id": "acf12622-2e6e-5234-9768-ba448294a81d", - "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", + "extraction_id": "cfb55758-8051-5ef4-a48b-7a2448593635", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.632179401387013, - "text": "2019\n\n100.0 %\n\n60.2 17.6 41.6 22.5 32.8 174.7 (74.7) \u2014 2.8 (71.9) 0.1 (72.0)%\n\n2019 to 2020 % Change\n\n(35) %\n\nsecond quarter of 2021. These increases were offset by investments in driver supply by increasing driver incentives recorded as a reduction to revenue by $942.9 million in 2021 as compared to the prior year as rider demand outpaced driver supply during certain periods of the pandemic recovery in 2021. Revenue in 2020 was also higher in the first quarter of 2020 prior to the implementation of shelter-in-place orders and other travel restrictions across North America beginning March 2020.\n\nWe expect to see continued recovery in demand for our platform and the resulting positive impacts on revenue as there are more widespread immunity levels, more communities reopen and other restrictive travel and social distancing measures in response to COVID-19 are eased. However, we cannot predict the impact of COVID variants and the longer term impact of the pandemic on consumer behavior.\n\nCost of Revenue\n\n2021", + "score": 0.6217785557904453, + "text": "See the section titled Reconciliations of Non-GAAP Financial Measures for our definition and a reconciliation of net loss attributable toUber Technologies, Inc. to Adjusted EBITDA.\nYear Ended December 31,\n(In millions, except percentages)", "metadata": { "version": "v0", - "chunk_order": 493, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 63, - "partitioned_by_unstructured": true, "associated_query": "What was Uber's profit in 2020?" } }, { - "fragment_id": "d5379124-e7ff-509f-b47f-a79152eec2d4", - "extraction_id": "2562b865-e4df-5376-9e70-927be9afbb7e", + "extraction_id": "e27535ac-c2f0-5a79-94d6-346074607528", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.63012705682194, - "text": "For additional discussion, see the risk factor titled \u201c\u2014If we are unable to attract or maintain a critical mass of Drivers, consumers, merchants, shippers, and carriers, whether as a result of competition or other factors, our platform will become less appealing to platform users, and our financial results would be adversely impacted.\u201d included in Part I, Item 1A of this Annual Report on Form 10-K as well our 2021 ESG Report and our 2021 People and Culture Report. The information in these reports is not a part of this Form 10-K.\n\nAdditional Information\n\nWe were founded in 2009 and incorporated as Ubercab, Inc., a Delaware corporation, in July 2010. In February 2011, we changed our name to Uber\n\nTechnologies, Inc. Our principal executive offices are located at 1515 3rd Street, San Francisco, California 94158, and our telephone number is (415) 612-8582.\n\n10", + "score": 0.6196261459700446, + "text": "rial respects, the financial condition and results of operations of Uber Technologies, Inc.Date:\nFebruary 24, 2022 By: /s/ Nelson Chai Nelson Chai\nChief Financial Officer\n(Principal Financial Offic\ner)", "metadata": { "version": "v0", - "chunk_order": 77, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 12, - "partitioned_by_unstructured": true, "associated_query": "What was Uber's profit in 2020?" } }, { - "fragment_id": "98a93be5-13ba-5bd6-9a18-e7ceef0fae88", - "extraction_id": "8ab931e3-8f47-5598-90b8-928f387ec256", + "extraction_id": "e972d08a-7dc8-533b-b821-0eff80d7a81e", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.6285917009601995, - "text": "Uber Technologies, Inc. (\u201cUber,\u201d \u201cwe,\u201d \u201cour,\u201d or \u201cus\u201d) was incorporated in Delaware in July 2010, and is headquartered in San Francisco, California. Uber is a technology platform that uses a massive network, leading technology, operational excellence and product expertise to power movement from point A to point B. Uber develops and operates proprietary technology applications supporting a variety of offerings on its platform (\u201cplatform(s)\u201d or \u201cPlatform(s)\u201d). Uber connects consumers (\u201cRider(s)\u201d) with independent providers of ride services (\u201cMobility Driver(s)\u201d) for ridesharing services, and connects Riders and other consumers (\u201cEaters\u201d) with restaurants, grocers and other stores (collectively, \u201cMerchants\u201d) with delivery service providers (\u201cCouriers\u201d) for meal preparation, grocery and other delivery services. Riders and Eaters are collectively referred to as \u201cend-user(s)\u201d or \u201cconsumer(s).\u201d Mobility Drivers and Couriers are collectively referred to as \u201cDriver(s).\u201d Uber also connects consumers with public", + "score": 0.611637363737102, + "text": "rial respects, the financial condition and results of operations of Uber Technologies, Inc.Date:\nFebruary 24, 2022 By: /s/ Dara Khosrowshahi Dara Khosrowshahi\nChief Executive Officer \nand Director(Principal Executive Officer)\nI,", "metadata": { "version": "v0", - "chunk_order": 592, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 84, - "partitioned_by_unstructured": true, "associated_query": "What was Uber's profit in 2020?" } }, { - "fragment_id": "d87084d1-c52a-5a4b-96ad-9fc1cb98bfc5", - "extraction_id": "8361bf60-bce2-56c2-b982-376a75e47d58", + "extraction_id": "82a9e673-6970-5f82-b3f7-cf27f06fd6d3", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.6283430678639979, - "text": "Year Ended December 31, 2020\n\n1,000 49 1,189 (27) \u2014 (138) (34) 8,939 (4) 3,824\n\n247 125 2,628 (527) (891) (224) 38 1,379 (92) (4,327)\n\n8,209 34 12,067 $\n\n12,067 (349) 7,391 $\n\n332 $ 133\n\n412 $ 82\n\n14,224 4,229 \u2014 251 9 \u2014 \u2014\n\n\u2014 \u2014 \u2014 196 3,898 171 1,634\n\n2021\n\n675 107 1,484 (27) (307) (226) 101 1,780 (69) 65\n\n7,391 349 7,805\n\n449 87\n\n\u2014 \u2014 232 184 1,868 1,018 \u2014\n\nUBER TECHNOLOGIES, INC.\n\nNOTES TO CONSOLIDATED FINANCIAL STATEMENTS\n\nNote 1 \u2013 Description of Business and Summary of Significant Accounting Policies\n\nDescription of Business", + "score": 0.6114085779895093, + "text": "ments % %Net loss including non-controlling interests\n(61) %(3) %Less: net loss attributab\nle to non-controlling interests, net of tax % %Net loss attributable to Uber Technologies, Inc.\n(61) %(3) % Totals of percentage of", "metadata": { "version": "v0", - "chunk_order": 591, "document_type": "pdf", - "unstructured_filetype": "application/pdf", - "unstructured_languages": [ - "eng" - ], - "unstructured_page_number": 83, - "partitioned_by_unstructured": true, "associated_query": "What was Uber's profit in 2020?" } } @@ -468,14 +308,14 @@ "hybrid_rag": { "results": { "completion": { - "id": "chatcmpl-A9gpGNU67ZRKhvfOaZSOwJnzbVP2S", + "id": "chatcmpl-AC8ihJ9JWaUV4gxNiljwFM3ryCQaN", "choices": [ { "finish_reason": "stop", "index": 0, "logprobs": null, "message": { - "content": "John Snow is Ned Stark\u2019s bastard son. Since Catelyn is not his mother, he is not a proper member of the Stark family, and he often feels himself an outsider. He is also a highly capable swordsman and thinker, with a knack for piercing observations [1].", + "content": "The provided context does not contain any information specifically about John Snow. Therefore, I cannot provide an answer based on the given context.", "refusal": null, "role": "assistant", "function_call": null, @@ -483,15 +323,15 @@ } } ], - "created": 1726872938, + "created": 1727456819, "model": "gpt-4o-2024-05-13", "object": "chat.completion", "service_tier": null, - "system_fingerprint": "fp_3537616b13", + "system_fingerprint": "fp_e375328146", "usage": { - "completion_tokens": 57, - "prompt_tokens": 1426, - "total_tokens": 1483, + "completion_tokens": 26, + "prompt_tokens": 520, + "total_tokens": 546, "completion_tokens_details": { "reasoning_tokens": 0 } @@ -500,122 +340,120 @@ "search_results": { "vector_search_results": [ { - "fragment_id": "7cbdab86-1689-5779-81bd-62f7eb3ab36d", - "extraction_id": "866f85a0-b3d6-5fc5-9ca0-dbd2373eac58", - "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "extraction_id": "eb2bc121-0b00-5f70-8eb6-549e1fb1ed72", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.2625822126865387, - "text": "Eddard (Ned) Stark\n\nThe Lord of Winterfell and new Hand of the King. A devoted father and dutiful lord, he is best characterized by his strong sense of honor, and he strives to always do what is right, regardless of his personal feelings.\n\nCatelyn (Cat) Tully\n\nNed\u2019s wife and Lady Stark of Winterfell. She is intelligent, strong, and fiercely devoted to her family, leading her to seek out the person responsible for trying to kill her son Bran.\n\nDaenerys Stormborn Targaryen\n\nThe Dothraki khaleesi (queen) and Targaryen princess. She and her brother are the only surviving members of the Targaryen family, and she grows from a frightened girl to a confident ruler, while still maintaining her kindness, over the course of the novel.\n\nJon Snow\n\nNed Stark\u2019s bastard son. Since Catelyn is not his mother, he is not a proper member of the Stark family, and he often feels himself an outsider. He is also a highly capable swordsman and thinker, with a knack for piercing observations.", + "score": 0.28816498588532347, + "text": "to economic, social, weather, and regulatory conditions or other circumstances in each of these large metropolitan areas. Outbreaks of contagiousdiseases", "metadata": { "version": "v0", - "chunk_order": 0, - "document_type": "txt", - "unstructured_filetype": "text/plain", - "unstructured_languages": [ - "eng" - ], - "partitioned_by_unstructured": true, + "document_type": "pdf", "associated_query": "Who is John Snow?" } }, { - "fragment_id": "8e563fb1-4665-53a8-8a83-63a1f88e2aea", - "extraction_id": "f6bc23b5-bc80-5e49-9b55-25e9abe97073", - "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "extraction_id": "0c3d6bf6-4c46-5360-9316-763136f6f223", + "document_id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.2610678750594818, - "text": "Jeor Mormont (Commander Mormont)\n\nLord Commander of the Night\u2019s Watch at Castle Black. Commander Mormont is tough, old, and wise, and his men call him \u201cThe Old Bear.\u201d\n\nMaester Aemon\n\nThe chief man of learning at Castle Black. Despite his blind white eyes, Maester Aemon sees and speaks the truth in cryptic ways. Though few people realize it, Aemon is one of the few surviving members of the Targaryen family, but he has always put his vows to the Night\u2019s Watch ahead of any family loyalties.\n\nSamwell (Sam) Tarly\n\nA new recruit to the Night\u2019s Watch who is fat and cowardly but very smart. Sam loves to read and eat but hates to fight, and he quickly becomes one of Jon Snow\u2019s closest companions at the Wall.\n\nSer Allister Thorne\n\nCastle Black\u2019s resentful master-at-arms. He hard on the new recruits to the Night\u2019s Watch and seems to enjoy making them suffer, causing Jon to rebel against him. During Robert\u2019s rebellion against the former king, he was a Targaryen loyalist.", + "score": 0.28385223246501545, + "text": "which set the groundwork for the development of modern science.", "metadata": { "version": "v0", - "chunk_order": 7, "document_type": "txt", - "unstructured_filetype": "text/plain", - "unstructured_languages": [ - "eng" - ], - "partitioned_by_unstructured": true, "associated_query": "Who is John Snow?" } }, { - "fragment_id": "eaf48cfe-592e-55fa-9f07-613a4f221c45", - "extraction_id": "fdf6127b-e623-58bc-a50b-b7e7b040c03a", - "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "extraction_id": "cd4e458e-9536-5e9c-a6c9-8a025b6d990a", + "document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.2574058771133423, - "text": "Varys (The Spider)\n\nThe Red Keep\u2019s master of whispers and a eunuch. His role in the court is to run a network of spies and keep the king informed, and he often uses what he knows to manipulate those around him, including the king.\n\nRobert Baratheon\n\nThe corpulent king of Westeros. He loves to fight, drink, and sleep with women, and he hates the duties of ruling. He and Ned are long-time friends, and he was engaged to Ned\u2019s sister until she died.\n\nSer Jorah Mormont\n\nAn exiled knight who serves unofficially as Daenerys\u2019s chief advisor. Though he was exiled by Ned Stark for selling slaves, he is intelligent, valiant, and a great fighter. He swears allegiance to Viserys as true king of Westeros, but he also feeds information about the Targaryens back to Varys.", + "score": 0.28305502038866215, + "text": "which set the groundwork for the development of modern science.", "metadata": { - "version": "v0", - "chunk_order": 3, + "title": "aristotle_v2.txt", + "version": "v1", "document_type": "txt", - "unstructured_filetype": "text/plain", - "unstructured_languages": [ - "eng" - ], - "partitioned_by_unstructured": true, "associated_query": "Who is John Snow?" } }, { - "fragment_id": "3cb1c2db-01e4-5ea8-a39f-31f5949637f8", - "extraction_id": "02b64e7c-5aa5-5380-8fa0-3d8b64866aa8", - "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "extraction_id": "07f109ee-7744-58cc-b0ac-0d98e979f674", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.20913409943187866, - "text": "Aerys II Targaryen\n\nKing of Westeros before Robert Baratheon. He was known as The Mad King because of his cruelty. Aerys murdered Ned\u2019s older brother, Brandon Stark, in the Red Keep\u2019s throne room. At the end of the war that followed, Jaime Lannister slew Aerys in the same room.\n\nRhaegar Targaryen\n\nThe heir to Aerys and older brother of Daenerys and Viserys. Rhaegar kidnapped Lyanna Stark, Robert\u2019s betrothed, helping to set in motion the events that led to Robert\u2019s Rebellion. The war effectively ended when Robert slew Rhaegar with his warhammer on the Trident River.\n\nJon Arryn\n\nThe recently deceased Lord of the Eyrie and Hand of the King. Jon Arryn fostered Ned Stark and Robert Baratheon at the Eyrie. When Robert became king, Jon Arryn served as his Hand until his murder.", + "score": 0.25786875141449306, + "text": "/or can directly impact platform users;\npublic health concerns or emergencies, such as the current COVID-19 pandemic and other highly communicable diseases or viruses, outbreaks ofwhich have from time to t", "metadata": { "version": "v0", - "chunk_order": 10, - "document_type": "txt", - "unstructured_filetype": "text/plain", - "unstructured_languages": [ - "eng" - ], - "partitioned_by_unstructured": true, + "document_type": "pdf", "associated_query": "Who is John Snow?" } }, { - "fragment_id": "5d09c80f-ba83-5204-a6b3-f08831e150b0", - "extraction_id": "f86a905c-8d82-52ff-ad72-a800ca3af6f4", - "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "extraction_id": "8892b315-9e2c-5333-8c98-1b50a0e596c4", + "document_id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.1912464201450348, - "text": "Illyrio Mopatis\n\nAn obese merchant from the Free Cities who helps Daenerys and Viserys Targaryen. Illyrio is very rich and very well-informed. He is quick to please, especially when there is a possibility that his kindness will help him avoid trouble or gain greater fortune in the future.\n\nSer Barristan Selmy\n\nLord Commander of the Kingsguard. He has served kings Jaehaerys, Aerys II, and Robert. Though he has grown old, Barristan \u201cThe Bold\u201d is a formidable fighter. He is, and has always been, an honorable knight.\n\nRenly Baratheon\n\nThe youngest of the three Baratheon brothers. Renly is lighthearted and opportunistic, and unexpectedly ambitious. He serves on Robert\u2019s royal council.\n\nStannis Baratheon\n\nThe middle brother of the three Baratheons. Stannis does not appear in A Game of Thrones, but as the brother of the king, he is a potential heir to the throne. Stannis does not seem to be well-liked.", + "score": 0.24598884630336215, + "text": "completely as soon as it was discovered \u2014 that whatever the church thought\nof the heliocentric model, astronomers must have been convinced as\nsoon as Copernicus proposed it. Far, in fact, from it. Copernicus", "metadata": { "version": "v0", - "chunk_order": 8, - "document_type": "txt", - "unstructured_filetype": "text/plain", - "unstructured_languages": [ - "eng" - ], - "partitioned_by_unstructured": true, + "document_type": "html", "associated_query": "Who is John Snow?" } }, { - "fragment_id": "d143b410-f9a4-5f3b-bb46-fb412eda8201", - "extraction_id": "37f940e2-18f4-50f5-93aa-cec422fc9211", - "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "extraction_id": "59c4fd2c-1bcf-52ef-b2d1-81856e409a50", + "document_id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], - "score": 0.1862841248512268, - "text": "Sandor (The Hound) Clegane\n\nPrince Joff\u2019s unofficial bodyguard. Proud that he is not a knight, The Hound appears to have no scruples whatsoever and does what Joffrey orders, however cruel or unjust, without question. His face is scarred on one side by extensive burning inflicted by his brother, Gregor.\n\nRobb Stark\n\nThe eldest Stark son and thus heir to Ned Stark. Though just fourteen, he is mature beyond his age as well as being brave and dutiful like his father.\n\nMaester Luwin\n\nCounselor to Ned, Catelyn, and Robb. Luwin is old and wise, and his advice proves indispensible to the Starks.\n\nTheon Greyjoy\n\nThe Starks\u2019s ward and Robb\u2019s best friend. Ned Stark took the young Theon, now nineteen, as a ward after putting down a rebellion led by the Greyjoy family, and Theon consequently grew up with the Stark children as something like a brother.", + "score": 0.24115355471011057, + "text": "Copernican Revolution, where you can see him at work developing the\nidea.[4]\nThis is one reason people with a touch of Asperger's may have\nan advantage in discovering new ideas. They're always flying on\ninstruments.[5]", + "metadata": { + "version": "v0", + "document_type": "html", + "associated_query": "Who is John Snow?" + } + }, + { + "extraction_id": "ebe211aa-6385-5965-a6c8-637a97bfa672", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.23118500523656615, + "text": "r long-term shifts in the travel patterns of consumers away from metropolitan areas, due to health concerns regarding epidemicsor", + "metadata": { + "version": "v0", + "document_type": "pdf", + "associated_query": "Who is John Snow?" + } + }, + { + "extraction_id": "06db6a29-d8cd-53a1-b32b-afb9369ff4f6", + "document_id": "d421207a-d799-5806-8d67-46b2005b15d4", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.22753206163262585, + "text": "https://www.ycombinator.com/companies/datacurve\nhttps://www.ycombinator.com/companies/ruby-card\nhttps://www.ycombinator.com/companies/schemeflow\nhttps://www.ycombinator.com/companies/zentail\nhttps://www.ycombinator.com/companies/truemetrics", "metadata": { "version": "v0", - "chunk_order": 5, "document_type": "txt", - "unstructured_filetype": "text/plain", - "unstructured_languages": [ - "eng" - ], - "partitioned_by_unstructured": true, + "associated_query": "Who is John Snow?" + } + }, + { + "extraction_id": "48fe0852-91df-57ac-9b32-24aecc637109", + "document_id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.22600733029754994, + "text": "published the heliocentric model in 1532, but it wasn't till the\nmid seventeenth century that the balance of scientific opinion\nshifted in its favor.\n[5]Few understand how feeble new ideas look when they first appear.", + "metadata": { + "version": "v0", + "document_type": "html", "associated_query": "Who is John Snow?" } } @@ -630,7 +468,7 @@ "choices": [ { "message": { - "content": "[{\"fragment_id\": \"94684f2d-fe60-5ba3-b1e8-0a921841bac9\", \"extraction_id\": \"fde39a49-00fc-5622-addd-13eb9c3bad4b\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6675721804840684, \"text\": \"Total Stockholders\\u2019 Equity (Deficit) 1,676,163\\n\\n5,184\\n\\n\\u2014\\n\\n(26,298)\\n\\n28,637\\n\\n(1) 721,710 (2,038) (1,009,359) 1,393,998\\n\\nLyft, Inc. Consolidated Statements of Cash Flows (in thousands)\\n\\n2021\\n\\nCash flows from operating activities Net loss Adjustments to reconcile net loss to net cash used in operating activities\\n\\n$\\n\\n(1,009,359)\\n\\nDepreciation and amortization Stock-based compensation Amortization of premium on marketable securities Accretion of discount on marketable securities Amortization of debt discount and issuance costs Deferred income tax from convertible senior notes Loss on sale and disposal of assets, net Gain on divestiture Other Changes in operating assets and liabilities, net effects of acquisition\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 572, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 82, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"0d5c5803-8846-59d4-8ae3-3696b718f162\", \"extraction_id\": \"fa3d2549-593a-5a80-88a2-b2d031d79771\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6647443571534382, \"text\": \"79\\n\\n2019 3,615,960\\n\\n2,176,469 636,116 1,505,640 814,122 1,186,093 6,318,440 (2,702,480) \\u2014 102,595 (2,599,885) 2,356 (2,602,241)\\n\\n(11.44)\\n\\n227,498\\n\\n81,321 75,212 971,941 72,046 398,791\\n\\nLyft, Inc. Consolidated Statements of Comprehensive Loss (in thousands)\\n\\nNet loss Other comprehensive income (loss)\\n\\n$\\n\\nYear Ended December 31, 2020 (1,752,857) $\\n\\n2021 (1,009,359) $\\n\\nForeign currency translation adjustment Unrealized gain (loss) on marketable securities, net of taxes\\n\\nOther comprehensive income (loss)\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 567, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 79, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"5d663f0a-c9a9-580f-818b-5ca0a1ca73f2\", \"extraction_id\": \"12916d2c-0691-528c-86aa-6784c1f35c55\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6628832616140414, \"text\": \"Overview\\n\\nLyft, Inc (the \\u201cCompany\\u201d or \\u201cLyft\\u201d) started a movement to revolutionize transportation. In 2012, we launched our peer-to-peer marketplace for on-demand ridesharing and have continued to pioneer innovations aligned with our mission. Today, Lyft is one of the largest multimodal transportation networks in the United States and Canada.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 16, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 5, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"7133acca-f147-5f43-b2e0-71228282fda0\", \"extraction_id\": \"212d8290-564d-5039-93cc-00cea31a1771\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6603014862717096, \"text\": \"Revenues from Contracts with Customers (ASC 606)\\n\\nWe generate substantially all our revenue from our ridesharing marketplace that connects drivers and riders. We recognize revenue from fees paid by drivers for use of our Lyft Platform offerings in accordance with ASC 606 as described in Note 2 of the notes to our consolidated financial statements. Drivers enter into terms of service (\\u201cToS\\u201d) with us in order to use our Lyft Driver App.\\n\\n58\\n\\n2019 to 2020 % Change\\n\\n19.0% (1.8)% (6.7)% 2.3%\\n\\nWe provide a service to drivers to complete a successful transportation service for riders. This service includes on-demand lead generation that assists drivers to find, receive and fulfill on-demand requests from riders seeking transportation services and related collection activities using our Lyft Platform. As a result, our single performance obligation in the transaction is to connect drivers with riders to facilitate the completion of a successful transportation service for riders.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 459, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 58, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"1a76991f-fa85-59b2-b522-700d47b2d809\", \"extraction_id\": \"0773cd62-b39f-517f-b6f1-be788b38374d\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6477650017878516, \"text\": \"Corporate Information\\n\\nWe were incorporated in 2007 as Bounder Web, Inc., a Delaware corporation. In 2008, we changed our name to Zimride, Inc. We founded Lyft in 2012 and\\n\\nchanged our name to Lyft, Inc. in 2013 when we sold the assets related to our Zimride operations.\\n\\n13\\n\\nAvailable Information\\n\\nOur website is located at www.lyft.com, and our investor relations website is located at investor.lyft.com. Copies of our Annual Report on Form 10-K, Quarterly Reports on Form 10-Q, Current Reports on Form 8-K and amendments to these reports filed or furnished pursuant to Section 13(a) or 15(d) of the Exchange Act, as amended, are available free of charge on our investor relations website as soon as reasonably practicable after we file such material electronically with or furnish it to the Securities and Exchange Commission (the \\u201cSEC\\u201d). The SEC also maintains a website that contains our SEC filings at www.sec.gov.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 82, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 13, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"17673edc-6fb7-577d-9bca-457c5745382d\", \"extraction_id\": \"bde94416-baaa-573a-9bc7-86ddf28535b1\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6406270265579269, \"text\": \"We generate substantially all of our revenue from our ridesharing marketplace that connects drivers and riders. We collect service fees and commissions from drivers for their use of our ridesharing marketplace. As drivers accept more rider leads and complete more rides, we earn more revenue. We also generate revenue from riders renting Light Vehicles, drivers renting vehicles through Express Drive, Lyft Rentals renters, Lyft Driver Center and Lyft Auto Care users, and by making our ridesharing marketplace available to organizations through our Lyft Business offerings, such as our Concierge and Corporate Business Travel programs. In the second quarter of 2021, we began generating revenues from licensing and data access agreements, primarily with third-party autonomous vehicle companies.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 20, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 5, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"8f6299b7-4582-5bac-8c74-7ca57714aefa\", \"extraction_id\": \"310e9e1f-25d3-5287-a905-5446f661d6da\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6395331787506555, \"text\": \"Revenue Recognition\\n\\nThe Company generates its revenue from its multimodal transportation networks that offer access to a variety of transportation options through the Lyft Platform and mobile-based applications. Substantially all of the Company\\u2019s revenue is generated from its ridesharing marketplace that connects drivers and riders and is recognized in accordance with Accounting Standards Codification Topic 606 (\\u201cASC 606\\u201d). In addition, the Company generates revenue in accordance with ASC 606 from licensing and data access, primarily with third-party autonomous vehicle companies. The Company also generates rental revenue from Flexdrive, its network of Light Vehicles and Lyft Rentals, which is recognized in accordance with Accounting Standards Codification Topic 842 (\\u201cASC 842\\u201d).\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 591, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 86, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"ff837ea0-0062-59ca-bb4f-aa7a1c9cecd0\", \"extraction_id\": \"41e4db8a-0478-5015-8263-cde0618ec626\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6308713775353904, \"text\": \"Light Vehicle Rider and Lyft Rentals Renter Incentives\\n\\nIncentives offered to Light Vehicle riders and Lyft Rentals renters were not material for the years ended December 31, 2021 and 2020.\\n\\nFor the years ended December 31, 2021, 2020 and 2019, in relation to the driver, rider, Light Vehicle riders and Lyft Rentals renters incentive programs, the Company recorded $1.3 billion, $390.8 million and $560.3 million as a reduction to revenue and $64.7 million, $135.0 million and $381.5 million as sales and marketing expense, respectively.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 611, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 89, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"c24d144b-c22d-5c08-876f-a03e43620aa4\", \"extraction_id\": \"2a4caab0-6193-5263-8eab-c7763e8f38e8\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6294938325882002, \"text\": \"Software Development Costs\\n\\nThe Company incurs costs related to developing the Lyft Platform and related support systems. The Company capitalizes development costs related to the Lyft Platform and related support systems once the preliminary project stage is complete and it is probable that the project will be completed and the software will be used to perform the function intended. The Company capitalized $16.2 million and $12.8 million of software development costs during the year ended December 31, 2021 and 2020, respectively. For the year ended December 31, 2019, capitalized software development costs was not material.\\n\\nInsurance Reserves\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 649, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 94, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"83985ff5-653f-53eb-b137-f616b4292f51\", \"extraction_id\": \"0919e3d5-03b0-5d54-b5f3-7f6ad4534412\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6223346365889701, \"text\": \"32.1\\u2020\\n\\nCertifications of Principal Executive Officer and Principal Financial Officer pursuant to 18 U.S.C. Section 1350, as adopted pursuant to Section 906 of the Sarbanes-Oxley Act of 2002.\\n\\n101\\n\\nThe following financial information from Lyft, Inc.\\u2019s Annual Report on Form 10-K for the fiscal year ended December 31, 2021 formatted in Inline XBRL (eXtensible Business Reporting Language): (i) Consolidated Statements of Operations for the fiscal years ended December 31, 2021, 2020 and 2019; (ii) Consolidated Statements of Comprehensive Income (Loss) for the fiscal years ended December 31, 2021, 2020, and 2019; (iii) Consolidated Balance Sheets as of December 31, 2021 and 2020; (iv) Consolidated Statements of Cash Flows for the fiscal years ended December 31, 2021, 2020, and 2019; (v) Consolidated Statements of Redeemable Convertible Preferred Stock and Stockholders\\u2019 Equity for the fiscal years ended December 31, 2021, 2020, and 2019; and (vi) Notes to the Consolidated Financial Statements.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 817, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 127, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}]Lyft's profit in 2020 was a net loss of $1,752,857,000 [2]." + "content": "[{\"extraction_id\": \"dccb91ff-5f99-5d05-ac2c-f816666722b3\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.7521053104337878, \"text\": \"Lyft, Inc.Consolidated Statements of Operations\\n(in thousands, except for per share data\\n) Year Ended December 31,\\n2021\\n2020 2019 Revenue\\n$ 3,208,323 $ 2,364,681 $ 3,615,960 Costs and expenses\\nCost of revenue\", \"metadata\": {\"version\": \"v0\", \"document_type\": \"pdf\", \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"extraction_id\": \"b10d9872-12b7-595e-a52a-3aec28c9979c\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.7355356970110133, \"text\": \"of Lyft rideshare and fleet data. For the year ended December 31, 2021, the Company recognized a $119.3 million pre-tax gain for the divestiture of certainassets\", \"metadata\": {\"version\": \"v0\", \"document_type\": \"pdf\", \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"extraction_id\": \"0bcc564d-4acc-5b46-bc0a-7b3789772c46\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.7130040355327264, \"text\": \"Lyft, Inc.Consolidated Statements of Cash Flows\\n(in thousands)\\nYear Ended December 31,\\n2021\\n2020 2019 Cash flows from operating activities\\nNet loss\", \"metadata\": {\"version\": \"v0\", \"document_type\": \"pdf\", \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"extraction_id\": \"932fc155-6aaf-58f5-ad30-516acd19c22a\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6921069209511935, \"text\": \"Lyft, Inc.Consolidated Statements of Comprehensive Loss\\n(in thousands) \\nYear Ended December 31,\\n2021\\n2020 2019 Net loss\\n$ (1,009,359) $ (1,752,857) $ (2,602,241) Other comprehensive incom\\ne (loss)Foreign currency transla\", \"metadata\": {\"version\": \"v0\", \"document_type\": \"pdf\", \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"extraction_id\": \"6b9c7827-ceea-575d-9f0c-f36acbe3e67e\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6723841223258826, \"text\": \"101\\nThe following financial in formation from Lyft, Inc.s Annual Report on Form10-K for the fiscal year \\nended December 31, 2021 formatted in Inline XBRL(eXtensible Business Reporting Languag\\ne): (i) Consolidated Statements ofOperations for the fisca\", \"metadata\": {\"version\": \"v0\", \"document_type\": \"pdf\", \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"extraction_id\": \"71e00c81-feef-5e8b-9811-5dc70880b351\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6665983001411494, \"text\": \"Lyft, Inc.Consolidated Balance Sheets\\n(in thousands, except for share and per share da\\nta) December 31,\\n2021\\n2020 Assets\\nCurrent assets\\nCash and cash equivalents\\n$ 457,325 $ 319,734 Short-term investments\", \"metadata\": {\"version\": \"v0\", \"document_type\": \"pdf\", \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"extraction_id\": \"d3c64d50-2c28-5e4b-bc15-d19e3ebd28b3\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6555785742057231, \"text\": \"Lyft rideshare and fleet data. The Company will receive, in total, approximately $515 million in cash in connection with this transaction, with $165 million paid upfrontand\", \"metadata\": {\"version\": \"v0\", \"document_type\": \"pdf\", \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"extraction_id\": \"6b24b706-621b-5585-bd7c-c36d02c2ca31\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6316942415601919, \"text\": \"Lyft, Inc.Notes to Consolidated Financial Statements\\n1. Description of Business and Basis of Presentation\\nOrganization and Description of Business\\nLyft,\", \"metadata\": {\"version\": \"v0\", \"document_type\": \"pdf\", \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"extraction_id\": \"8a038741-12a0-5772-a92a-fd6fc28ed543\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6263277602801762, \"text\": \"ride in the Lyft App. This update resulted in a 0.01% increase, or an additional 927 Active Riders in the fourth quarter of 2020. Prior to the fourth quarter of 2020, allConcierge riders were exc\", \"metadata\": {\"version\": \"v0\", \"document_type\": \"pdf\", \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"extraction_id\": \"e15d31fb-7cf0-5271-a5c4-f404dd8c13b9\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6254095881689086, \"text\": \"We\\n have audited the accompanying consolidated balance sheets of Lyft, Inc. and its subsidiaries (the Company) as of December 31, 2021 and 2020, and therelated consolidated statements of\", \"metadata\": {\"version\": \"v0\", \"document_type\": \"pdf\", \"associated_query\": \"What was Lyft's profit in 2020?\"}}]Lyft did not make a profit in 2020. Instead, the company reported a net loss of $1,752,857,000 for the year ended December 31, 2020 [4]." } } ] diff --git a/py/tests/regression/observed_outputs/test_user_management.json b/py/tests/regression/observed_outputs/test_user_management.json index 9b334e838..334326110 100644 --- a/py/tests/regression/observed_outputs/test_user_management.json +++ b/py/tests/regression/observed_outputs/test_user_management.json @@ -1,15 +1,15 @@ { "register_user": { "results": { - "id": "05f95944-be0f-5913-b04c-0eaa5cf45a39", - "email": "test_9e56903b@example.com", + "id": "763b61e9-906f-5d22-bc64-d6359bd03fa5", + "email": "test_4c8d38b7@example.com", "is_active": true, "is_superuser": false, - "created_at": "2024-09-20T22:58:45.796169Z", - "updated_at": "2024-09-20T22:58:45.796169Z", + "created_at": "2024-09-27T18:10:40.666080Z", + "updated_at": "2024-09-27T18:10:40.666080Z", "is_verified": false, "collection_ids": [], - "hashed_password": "$2b$12$febFeILwiLIv21raMwdHGuNNl3zKXbxymHlnqMw/BUEvtAzdHa1OK", + "hashed_password": "$2b$12$Hs4sNysr/D.9FCfTt5VgDe3uwOM38yrfQC4kXG6B/OnhQwkBV/uGu", "verification_code_expiry": null, "name": null, "bio": null, @@ -19,26 +19,26 @@ "login_user": { "results": { "access_token": { - "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzllNTY5MDNiQGV4YW1wbGUuY29tIiwiZXhwIjoxNzI2ODc2NzI2LjA2ODg5NiwidG9rZW5fdHlwZSI6ImFjY2VzcyJ9.smZcUPGlas4ql_OFpxa9uaZwIEagcERBMuemGIAerh0", + "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzRjOGQzOGI3QGV4YW1wbGUuY29tIiwiZXhwIjoxNzI3NDY0MjQwLjk0MDY4MiwidG9rZW5fdHlwZSI6ImFjY2VzcyJ9.hgdGN36hMp-Seed4dXY3uhbATa2FyMG1ikPFe1FrS6U", "token_type": "access" }, "refresh_token": { - "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzllNTY5MDNiQGV4YW1wbGUuY29tIiwiZXhwIjoxNzI3NDc3OTI2LCJ0b2tlbl90eXBlIjoicmVmcmVzaCJ9.u_TcrN_i0KTWRPwkb1-jsbKjJbsu8T6I3h0m96pwIx4", + "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzRjOGQzOGI3QGV4YW1wbGUuY29tIiwiZXhwIjoxNzI4MDY1NDQwLCJ0b2tlbl90eXBlIjoicmVmcmVzaCJ9.3PiG-JsbdcwZzbmF6CnJ93GhDZT3EOjgso_xymAZzvk", "token_type": "refresh" } } }, "user_info": { "results": { - "id": "05f95944-be0f-5913-b04c-0eaa5cf45a39", - "email": "test_9e56903b@example.com", + "id": "763b61e9-906f-5d22-bc64-d6359bd03fa5", + "email": "test_4c8d38b7@example.com", "is_active": true, "is_superuser": false, - "created_at": "2024-09-20T22:58:45.796169Z", - "updated_at": "2024-09-20T22:58:45.796169Z", + "created_at": "2024-09-27T18:10:40.666080Z", + "updated_at": "2024-09-27T18:10:40.666080Z", "is_verified": true, "collection_ids": [], - "hashed_password": "$2b$12$febFeILwiLIv21raMwdHGuNNl3zKXbxymHlnqMw/BUEvtAzdHa1OK", + "hashed_password": "$2b$12$Hs4sNysr/D.9FCfTt5VgDe3uwOM38yrfQC4kXG6B/OnhQwkBV/uGu", "verification_code_expiry": null, "name": null, "bio": null, @@ -52,12 +52,12 @@ }, "update_profile": { "results": { - "id": "05f95944-be0f-5913-b04c-0eaa5cf45a39", - "email": "test_9e56903b@example.com", + "id": "763b61e9-906f-5d22-bc64-d6359bd03fa5", + "email": "test_4c8d38b7@example.com", "is_active": true, "is_superuser": false, - "created_at": "2024-09-20T22:58:45.796169Z", - "updated_at": "2024-09-20T22:58:46.618261Z", + "created_at": "2024-09-27T18:10:40.666080Z", + "updated_at": "2024-09-27T18:10:41.536066Z", "is_verified": true, "collection_ids": [], "hashed_password": null, @@ -70,11 +70,11 @@ "refresh_token": { "results": { "access_token": { - "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzllNTY5MDNiQGV4YW1wbGUuY29tIiwiZXhwIjoxNzI2ODc2NzI2LjY0MzM0NSwidG9rZW5fdHlwZSI6ImFjY2VzcyJ9.UYcp5cwnOvEtCNm-nqQzI8TWw7mmVn_cSEcEDtTltLI", + "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzRjOGQzOGI3QGV4YW1wbGUuY29tIiwiZXhwIjoxNzI3NDY0MjQxLjU1MjMwNywidG9rZW5fdHlwZSI6ImFjY2VzcyJ9.jphB-DO_Ny2sm_AEI05kvrGkcJrI7rL_OX-JBdZ2kUs", "token_type": "access" }, "refresh_token": { - "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzllNTY5MDNiQGV4YW1wbGUuY29tIiwiZXhwIjoxNzI3NDc3OTI2LCJ0b2tlbl90eXBlIjoicmVmcmVzaCJ9.u_TcrN_i0KTWRPwkb1-jsbKjJbsu8T6I3h0m96pwIx4", + "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzRjOGQzOGI3QGV4YW1wbGUuY29tIiwiZXhwIjoxNzI4MDY1NDQxLCJ0b2tlbl90eXBlIjoicmVmcmVzaCJ9.QgucJVlZsP2MTVqvUrOma6wv-V2fmKq9FkVleKsVvMM", "token_type": "refresh" } } @@ -87,37 +87,21 @@ "is_superuser": true, "is_active": true, "is_verified": true, - "created_at": "2024-09-20T22:49:55.902324Z", - "updated_at": "2024-09-20T22:49:55.902324Z", + "created_at": "2024-09-27T18:10:22.934012Z", + "updated_at": "2024-09-27T18:10:22.934012Z", "collection_ids": [], - "num_files": 15, - "total_size_in_bytes": 8862496, - "document_ids": [ - "3e157b3a-8469-51db-90d9-52e7d896b49b", - "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc", - "7b0f40c5-2ace-5781-ae35-ead99ddee8c5", - "451adbbd-e24e-5c9b-80a8-f424b6c7accb", - "e797da22-8c5d-54e5-bed5-a55954cf6bf9", - "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", - "5b1bd54f-4d70-56b9-a017-a618bc75f94c", - "716fea3a-826b-5b27-8e59-ffbd1a35455a", - "57eec3df-cf68-5559-a80d-ae3fb55b9af1", - "01d514a4-5cb1-5c86-be8c-0c5c312a02c9", - "d421207a-d799-5806-8d67-46b2005b15d4", - "db02076e-989a-59cd-98d5-e24e15a0bd27", - "52e12576-090f-59db-91f4-6d4b2e29ae6c", - "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c", - "2f576170-c4f9-5141-a910-a0924f341de4" - ] + "num_files": 0, + "total_size_in_bytes": 0, + "document_ids": [] }, { - "user_id": "05f95944-be0f-5913-b04c-0eaa5cf45a39", - "email": "test_9e56903b@example.com", + "user_id": "763b61e9-906f-5d22-bc64-d6359bd03fa5", + "email": "test_4c8d38b7@example.com", "is_superuser": false, "is_active": true, "is_verified": true, - "created_at": "2024-09-20T22:58:45.796169Z", - "updated_at": "2024-09-20T22:58:46.618261Z", + "created_at": "2024-09-27T18:10:40.666080Z", + "updated_at": "2024-09-27T18:10:41.536066Z", "collection_ids": [], "num_files": 0, "total_size_in_bytes": 0, @@ -133,7 +117,7 @@ }, "delete_account": { "results": { - "message": "User account 24d5aa60-d6c5-5360-af03-f178e12b8722 deleted successfully." + "message": "User account dd9da79c-d3c2-56d5-b26e-598db8c124c0 deleted successfully." } } } diff --git a/py/tests/regression/runner.py b/py/tests/regression/runner.py index 355a3ffa3..4d1701b1c 100644 --- a/py/tests/regression/runner.py +++ b/py/tests/regression/runner.py @@ -20,8 +20,8 @@ def __init__( self.client = R2RClient(base_url=base_url) self.tests: List[BaseTest] = [] self.test_order = [ - "TestDocumentManagement", - "TestRetrieval", + # "TestDocumentManagement", + # "TestRetrieval", "TestUserManagement", # "TestObservability", "TestGroupManagement", diff --git a/py/tests/regression/test_cases/base.py b/py/tests/regression/test_cases/base.py index 48120f59a..91555644b 100644 --- a/py/tests/regression/test_cases/base.py +++ b/py/tests/regression/test_cases/base.py @@ -116,7 +116,6 @@ def _custom_diff( "actual_results": actual_results, } else: - deep_diff = DeepDiff( expected_results, actual_results, diff --git a/py/tests/regression/test_cases/test_document_management.py b/py/tests/regression/test_cases/test_document_management.py index 7079408a7..a4a44e048 100644 --- a/py/tests/regression/test_cases/test_document_management.py +++ b/py/tests/regression/test_cases/test_document_management.py @@ -6,9 +6,8 @@ class TestDocumentManagement(BaseTest): - CHUNKS_FILE_ID = "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" - UPDATE_FILE_ID = "db02076e-989a-59cd-98d5-e24e15a0bd27" + UPDATE_FILE_ID = "9fbe403b-c11c-5aae-8ade-ef22980c3ad1" DELETE_FILE_ID = "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" def __init__(self, client): diff --git a/py/tests/regression/test_cases/test_graph_creation.py b/py/tests/regression/test_cases/test_graph_creation.py new file mode 100644 index 000000000..e69de29bb diff --git a/services/unstructured/main.py b/services/unstructured/main.py index 476c047d2..c76327936 100644 --- a/services/unstructured/main.py +++ b/services/unstructured/main.py @@ -1,13 +1,14 @@ -from fastapi import FastAPI, HTTPException -from pydantic import BaseModel -from typing import List, Dict -from io import BytesIO import asyncio +import base64 import concurrent.futures +import logging import os -import base64 +from io import BytesIO +from typing import Dict, List, Optional + +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel from unstructured.partition.auto import partition -import logging logger = logging.getLogger(__name__) @@ -16,7 +17,8 @@ class PartitionRequestModel(BaseModel): file_content: bytes - chunking_config: Dict + ingestion_config: Dict + filename: Optional[str] = None class PartitionResponseModel(BaseModel): @@ -28,10 +30,10 @@ class PartitionResponseModel(BaseModel): ) -def run_partition(file_content: str, chunking_config: Dict) -> List[Dict]: +def run_partition(file_content: str, filename: str, ingestion_config: Dict) -> List[Dict]: file_content_bytes = base64.b64decode(file_content) file_io = BytesIO(file_content_bytes) - elements = partition(file=file_io, **chunking_config) + elements = partition(file=file_io, file_filename=filename, **ingestion_config) return [element.to_dict() for element in elements] @@ -49,7 +51,8 @@ async def partition_endpoint(request: PartitionRequestModel): executor, run_partition, request.file_content, - request.chunking_config, + request.filename, + request.ingestion_config, ) logger.info(f"Partitioning completed") return PartitionResponseModel(elements=elements) diff --git a/templates/recommendation_platform/README.md b/templates/recommendation_platform/README.md index 8b5238bdd..59c9fbff7 100644 --- a/templates/recommendation_platform/README.md +++ b/templates/recommendation_platform/README.md @@ -71,7 +71,7 @@ with open(input_csv_path, "r") as csvfile: # Ingest the temporary file using the R2R client with a custom chunk size client.ingest_files( - [temp_filename], chunking_config={"chunk_size": 2048} + [temp_filename], ingestion_config={"chunk_size": 2048} ) os.remove(temp_filename) diff --git a/templates/recommendation_platform/python-backend/r2r-ingestion.py b/templates/recommendation_platform/python-backend/r2r-ingestion.py index f6588d9ff..202de96b2 100644 --- a/templates/recommendation_platform/python-backend/r2r-ingestion.py +++ b/templates/recommendation_platform/python-backend/r2r-ingestion.py @@ -33,7 +33,7 @@ # Ingest the temporary file using the R2R client client.ingest_files( - [temp_filename], chunking_config={"chunk_size": 2048} + [temp_filename], ingestion_config={"provider": "r2r", "chunk_size": 2048} ) os.remove(temp_filename) diff --git a/templates/recommendation_platform/python-backend/r2r_ingestion.py b/templates/recommendation_platform/python-backend/r2r_ingestion.py index 73c61cf8e..c67b7833c 100644 --- a/templates/recommendation_platform/python-backend/r2r_ingestion.py +++ b/templates/recommendation_platform/python-backend/r2r_ingestion.py @@ -33,7 +33,7 @@ # Ingest the temporary file using the R2R client with a custom chunk size client.ingest_files( - [temp_filename], chunking_config={"chunk_size": 4096} + [temp_filename], ingestion_config={"provider": "r2r", "chunk_size": 4096} ) os.remove(temp_filename) diff --git a/templates/ycombinator_graphrag/python-backend/main.py b/templates/ycombinator_graphrag/python-backend/main.py index 38cfab6b0..d45eb2510 100644 --- a/templates/ycombinator_graphrag/python-backend/main.py +++ b/templates/ycombinator_graphrag/python-backend/main.py @@ -3,6 +3,7 @@ import time import uuid +import yaml from datasets import load_dataset args = argparse.ArgumentParser() @@ -58,10 +59,10 @@ def wait_till_ready(status_var, status_value): break else: # if at least one says failed, exit - if "failure" in status_counts or "enrichment_failure" in status_counts: + if "failed" in status_counts or "enrichment_failure" in status_counts: print(f"At least one document has failed {status_var} => {status_value}") for document in documents_overview: - if document.get(status_var) == "failure": + if document.get(status_var) == "failed": print(document.get("id"), document.get("status")) exit(1) time.sleep(10) @@ -80,15 +81,15 @@ def create_graph(): print("Creating graph...") entity_types = ["ORGANIZATION", "GEO", "PERSON", "INDUSTRY_SECTOR", "PRODUCT", "COMPETITOR", "TECHNOLOGY", "ACQUISITION", "INVESTOR", ] documents_overview = client.documents_overview(limit=1000)['results'] - document_ids = [document.get("id") for document in documents_overview if document.get("restructuring_status") in ["pending", "failure", "enrichment_failure"]] + document_ids = [document.get("id") for document in documents_overview if document.get("kg_extraction_status") in ["pending", "failed", "enrichment_failure"]] client.create_graph(document_ids = document_ids) - wait_till_ready("restructuring_status", "success") + wait_till_ready("kg_extraction_status", "success") def enrich_graph(): print("Enriching graph...") client.enrich_graph() - wait_till_ready("restructuring_status", "enriched") + wait_till_ready("kg_extraction_status", "enriched") def update_prompts(): print("Updating prompts...") diff --git a/templates/ycombinator_graphrag/python-backend/prompts.yaml b/templates/ycombinator_graphrag/python-backend/prompts.yaml index de652edc8..1506c1f7d 100644 --- a/templates/ycombinator_graphrag/python-backend/prompts.yaml +++ b/templates/ycombinator_graphrag/python-backend/prompts.yaml @@ -1,4 +1,4 @@ -graphrag_triplet_extraction_zero_shot: +graphrag_triples_extraction_few_shot: template: > -Goal- Given a text document, identify all entities and their entity types from the text and all relationships among the identified entities. diff --git a/templates/ycombinator_graphrag/web-app/types.ts b/templates/ycombinator_graphrag/web-app/types.ts index c3af4f926..c71d090a1 100644 --- a/templates/ycombinator_graphrag/web-app/types.ts +++ b/templates/ycombinator_graphrag/web-app/types.ts @@ -354,7 +354,7 @@ export interface KGSearchResult { method: 'local' | 'global'; content: any; result_type: 'entity' | 'relationship' | 'community' | 'global'; - fragment_ids: string[]; + extraction_ids: string[]; document_ids: string[]; metadata: Record; }